Add chromium para suporte no ARM

This commit is contained in:
LeoMortari
2025-12-05 18:06:30 -03:00
parent bf16df3b6c
commit 04e8d7bc60
3 changed files with 75 additions and 44 deletions

View File

@@ -47,12 +47,10 @@ RUN apt-get update && apt-get install -y \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
&& apt-get update \
&& apt-get install -y google-chrome-stable \
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
RUN apt-get update && apt-get install -y \
chromium \
chromium-driver \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app

View File

@@ -3,6 +3,9 @@ services:
build:
context: .
dockerfile: Dockerfile
platforms:
- linux/amd64
- linux/arm64
container_name: proxy-scraper
environment:
# PostgreSQL Connection

View File

@@ -207,7 +207,10 @@ class SeleniumScraper(ProxyScraper):
self.driver: Optional[webdriver.Chrome] = None
def _init_driver(self):
"""Initialize Chrome WebDriver with headless options."""
"""Initialize Chrome/Chromium WebDriver with headless options."""
import os
import shutil
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
@@ -218,49 +221,76 @@ class SeleniumScraper(ProxyScraper):
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
)
# Get chromedriver path and fix webdriver-manager bug
import os
import stat
# Try to find chromium or chrome binary
chromium_paths = [
"/usr/bin/chromium",
"/usr/bin/chromium-browser",
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
]
chromedriver_path = ChromeDriverManager().install()
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
chrome_binary = None
for path in chromium_paths:
if os.path.exists(path):
chrome_binary = path
logger.info(f"Found browser binary: {chrome_binary}")
break
# Fix for webdriver-manager bug that may return wrong file
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
filename = os.path.basename(chromedriver_path)
if chrome_binary:
chrome_options.binary_location = chrome_binary
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
if filename not in ("chromedriver", "chromedriver.exe"):
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
# Try to find chromedriver binary
chromedriver_paths = [
"/usr/bin/chromedriver",
shutil.which("chromedriver"),
]
# Navigate to the base directory containing the chromedriver
base_dir = os.path.dirname(chromedriver_path)
chromedriver_path = None
for path in chromedriver_paths:
if path and os.path.exists(path):
chromedriver_path = path
logger.info(f"Found chromedriver: {chromedriver_path}")
break
# Look for chromedriver in the same directory
for potential_file in ["chromedriver", "chromedriver.exe"]:
potential_path = os.path.join(base_dir, potential_file)
if os.path.exists(potential_path):
chromedriver_path = potential_path
logger.info(f"Found correct chromedriver: {chromedriver_path}")
break
# If not found, try to use webdriver-manager as fallback
if not chromedriver_path:
logger.info("Chromedriver not found in system paths, using webdriver-manager...")
import stat
chromedriver_path = ChromeDriverManager().install()
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
# Fix for webdriver-manager bug that may return wrong file
filename = os.path.basename(chromedriver_path)
if filename not in ("chromedriver", "chromedriver.exe"):
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
base_dir = os.path.dirname(chromedriver_path)
for potential_file in ["chromedriver", "chromedriver.exe"]:
potential_path = os.path.join(base_dir, potential_file)
if os.path.exists(potential_path):
chromedriver_path = potential_path
logger.info(f"Found correct chromedriver: {chromedriver_path}")
break
else:
parent_dir = os.path.dirname(base_dir)
for root, dirs, files in os.walk(parent_dir):
for file in files:
if file == "chromedriver" or file == "chromedriver.exe":
chromedriver_path = os.path.join(root, file)
logger.info(f"Found chromedriver at: {chromedriver_path}")
break
logger.info(f"Using chromedriver: {chromedriver_path}")
# Ensure the file has execute permissions
if os.path.exists(chromedriver_path):
current_permissions = os.stat(chromedriver_path).st_mode
os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
else:
# If not found, search in parent directory or subdirectories
parent_dir = os.path.dirname(base_dir)
for root, dirs, files in os.walk(parent_dir):
for file in files:
if file == "chromedriver" or file == "chromedriver.exe":
chromedriver_path = os.path.join(root, file)
logger.info(f"Found chromedriver at: {chromedriver_path}")
break
logger.info(f"Using chromedriver: {chromedriver_path}")
# Ensure the file has execute permissions
if os.path.exists(chromedriver_path):
current_permissions = os.stat(chromedriver_path).st_mode
os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
else:
raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}")
raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}")
service = Service(chromedriver_path)
self.driver = webdriver.Chrome(service=service, options=chrome_options)