From 04e8d7bc60c0ad2892ca834d4e42e98e3c027668 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Fri, 5 Dec 2025 18:06:30 -0300 Subject: [PATCH] Add chromium para suporte no ARM --- Dockerfile | 10 ++--- docker-compose.yml | 3 ++ src/scrapers.py | 106 +++++++++++++++++++++++++++++---------------- 3 files changed, 75 insertions(+), 44 deletions(-) diff --git a/Dockerfile b/Dockerfile index f72a044..e350fdf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,12 +47,10 @@ RUN apt-get update && apt-get install -y \ xdg-utils \ && rm -rf /var/lib/apt/lists/* -RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \ - && gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \ - && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \ - && apt-get update \ - && apt-get install -y google-chrome-stable \ - && rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg +RUN apt-get update && apt-get install -y \ + chromium \ + chromium-driver \ + && rm -rf /var/lib/apt/lists/* WORKDIR /app diff --git a/docker-compose.yml b/docker-compose.yml index aa78dae..3455339 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,9 @@ services: build: context: . dockerfile: Dockerfile + platforms: + - linux/amd64 + - linux/arm64 container_name: proxy-scraper environment: # PostgreSQL Connection diff --git a/src/scrapers.py b/src/scrapers.py index aab084b..eb2e99c 100644 --- a/src/scrapers.py +++ b/src/scrapers.py @@ -207,7 +207,10 @@ class SeleniumScraper(ProxyScraper): self.driver: Optional[webdriver.Chrome] = None def _init_driver(self): - """Initialize Chrome WebDriver with headless options.""" + """Initialize Chrome/Chromium WebDriver with headless options.""" + import os + import shutil + chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") @@ -218,49 +221,76 @@ class SeleniumScraper(ProxyScraper): "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" ) - # Get chromedriver path and fix webdriver-manager bug - import os - import stat + # Try to find chromium or chrome binary + chromium_paths = [ + "/usr/bin/chromium", + "/usr/bin/chromium-browser", + "/usr/bin/google-chrome", + "/usr/bin/google-chrome-stable", + ] - chromedriver_path = ChromeDriverManager().install() - logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}") + chrome_binary = None + for path in chromium_paths: + if os.path.exists(path): + chrome_binary = path + logger.info(f"Found browser binary: {chrome_binary}") + break - # Fix for webdriver-manager bug that may return wrong file - # Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE) - filename = os.path.basename(chromedriver_path) + if chrome_binary: + chrome_options.binary_location = chrome_binary - # If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong - if filename not in ("chromedriver", "chromedriver.exe"): - logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...") + # Try to find chromedriver binary + chromedriver_paths = [ + "/usr/bin/chromedriver", + shutil.which("chromedriver"), + ] - # Navigate to the base directory containing the chromedriver - base_dir = os.path.dirname(chromedriver_path) + chromedriver_path = None + for path in chromedriver_paths: + if path and os.path.exists(path): + chromedriver_path = path + logger.info(f"Found chromedriver: {chromedriver_path}") + break - # Look for chromedriver in the same directory - for potential_file in ["chromedriver", "chromedriver.exe"]: - potential_path = os.path.join(base_dir, potential_file) - if os.path.exists(potential_path): - chromedriver_path = potential_path - logger.info(f"Found correct chromedriver: {chromedriver_path}") - break + # If not found, try to use webdriver-manager as fallback + if not chromedriver_path: + logger.info("Chromedriver not found in system paths, using webdriver-manager...") + import stat + + chromedriver_path = ChromeDriverManager().install() + logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}") + + # Fix for webdriver-manager bug that may return wrong file + filename = os.path.basename(chromedriver_path) + + if filename not in ("chromedriver", "chromedriver.exe"): + logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...") + + base_dir = os.path.dirname(chromedriver_path) + + for potential_file in ["chromedriver", "chromedriver.exe"]: + potential_path = os.path.join(base_dir, potential_file) + if os.path.exists(potential_path): + chromedriver_path = potential_path + logger.info(f"Found correct chromedriver: {chromedriver_path}") + break + else: + parent_dir = os.path.dirname(base_dir) + for root, dirs, files in os.walk(parent_dir): + for file in files: + if file == "chromedriver" or file == "chromedriver.exe": + chromedriver_path = os.path.join(root, file) + logger.info(f"Found chromedriver at: {chromedriver_path}") + break + + logger.info(f"Using chromedriver: {chromedriver_path}") + + # Ensure the file has execute permissions + if os.path.exists(chromedriver_path): + current_permissions = os.stat(chromedriver_path).st_mode + os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) else: - # If not found, search in parent directory or subdirectories - parent_dir = os.path.dirname(base_dir) - for root, dirs, files in os.walk(parent_dir): - for file in files: - if file == "chromedriver" or file == "chromedriver.exe": - chromedriver_path = os.path.join(root, file) - logger.info(f"Found chromedriver at: {chromedriver_path}") - break - - logger.info(f"Using chromedriver: {chromedriver_path}") - - # Ensure the file has execute permissions - if os.path.exists(chromedriver_path): - current_permissions = os.stat(chromedriver_path).st_mode - os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - else: - raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}") + raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}") service = Service(chromedriver_path) self.driver = webdriver.Chrome(service=service, options=chrome_options)