Add chromium para suporte no ARM

This commit is contained in:
LeoMortari
2025-12-05 18:06:30 -03:00
parent bf16df3b6c
commit 04e8d7bc60
3 changed files with 75 additions and 44 deletions

View File

@@ -47,12 +47,10 @@ RUN apt-get update && apt-get install -y \
xdg-utils \ xdg-utils \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \ RUN apt-get update && apt-get install -y \
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \ chromium \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \ chromium-driver \
&& apt-get update \ && rm -rf /var/lib/apt/lists/*
&& apt-get install -y google-chrome-stable \
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
WORKDIR /app WORKDIR /app

View File

@@ -3,6 +3,9 @@ services:
build: build:
context: . context: .
dockerfile: Dockerfile dockerfile: Dockerfile
platforms:
- linux/amd64
- linux/arm64
container_name: proxy-scraper container_name: proxy-scraper
environment: environment:
# PostgreSQL Connection # PostgreSQL Connection

View File

@@ -207,7 +207,10 @@ class SeleniumScraper(ProxyScraper):
self.driver: Optional[webdriver.Chrome] = None self.driver: Optional[webdriver.Chrome] = None
def _init_driver(self): def _init_driver(self):
"""Initialize Chrome WebDriver with headless options.""" """Initialize Chrome/Chromium WebDriver with headless options."""
import os
import shutil
chrome_options = Options() chrome_options = Options()
chrome_options.add_argument("--headless") chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--no-sandbox")
@@ -218,25 +221,53 @@ class SeleniumScraper(ProxyScraper):
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
) )
# Get chromedriver path and fix webdriver-manager bug # Try to find chromium or chrome binary
import os chromium_paths = [
"/usr/bin/chromium",
"/usr/bin/chromium-browser",
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
]
chrome_binary = None
for path in chromium_paths:
if os.path.exists(path):
chrome_binary = path
logger.info(f"Found browser binary: {chrome_binary}")
break
if chrome_binary:
chrome_options.binary_location = chrome_binary
# Try to find chromedriver binary
chromedriver_paths = [
"/usr/bin/chromedriver",
shutil.which("chromedriver"),
]
chromedriver_path = None
for path in chromedriver_paths:
if path and os.path.exists(path):
chromedriver_path = path
logger.info(f"Found chromedriver: {chromedriver_path}")
break
# If not found, try to use webdriver-manager as fallback
if not chromedriver_path:
logger.info("Chromedriver not found in system paths, using webdriver-manager...")
import stat import stat
chromedriver_path = ChromeDriverManager().install() chromedriver_path = ChromeDriverManager().install()
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}") logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
# Fix for webdriver-manager bug that may return wrong file # Fix for webdriver-manager bug that may return wrong file
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
filename = os.path.basename(chromedriver_path) filename = os.path.basename(chromedriver_path)
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
if filename not in ("chromedriver", "chromedriver.exe"): if filename not in ("chromedriver", "chromedriver.exe"):
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...") logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
# Navigate to the base directory containing the chromedriver
base_dir = os.path.dirname(chromedriver_path) base_dir = os.path.dirname(chromedriver_path)
# Look for chromedriver in the same directory
for potential_file in ["chromedriver", "chromedriver.exe"]: for potential_file in ["chromedriver", "chromedriver.exe"]:
potential_path = os.path.join(base_dir, potential_file) potential_path = os.path.join(base_dir, potential_file)
if os.path.exists(potential_path): if os.path.exists(potential_path):
@@ -244,7 +275,6 @@ class SeleniumScraper(ProxyScraper):
logger.info(f"Found correct chromedriver: {chromedriver_path}") logger.info(f"Found correct chromedriver: {chromedriver_path}")
break break
else: else:
# If not found, search in parent directory or subdirectories
parent_dir = os.path.dirname(base_dir) parent_dir = os.path.dirname(base_dir)
for root, dirs, files in os.walk(parent_dir): for root, dirs, files in os.walk(parent_dir):
for file in files: for file in files: