Add chromium para suporte no ARM
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -47,12 +47,10 @@ RUN apt-get update && apt-get install -y \
|
||||
xdg-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
|
||||
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
|
||||
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y google-chrome-stable \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
|
||||
RUN apt-get update && apt-get install -y \
|
||||
chromium \
|
||||
chromium-driver \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
platforms:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
container_name: proxy-scraper
|
||||
environment:
|
||||
# PostgreSQL Connection
|
||||
|
||||
@@ -207,7 +207,10 @@ class SeleniumScraper(ProxyScraper):
|
||||
self.driver: Optional[webdriver.Chrome] = None
|
||||
|
||||
def _init_driver(self):
|
||||
"""Initialize Chrome WebDriver with headless options."""
|
||||
"""Initialize Chrome/Chromium WebDriver with headless options."""
|
||||
import os
|
||||
import shutil
|
||||
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
@@ -218,25 +221,53 @@ class SeleniumScraper(ProxyScraper):
|
||||
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
)
|
||||
|
||||
# Get chromedriver path and fix webdriver-manager bug
|
||||
import os
|
||||
# Try to find chromium or chrome binary
|
||||
chromium_paths = [
|
||||
"/usr/bin/chromium",
|
||||
"/usr/bin/chromium-browser",
|
||||
"/usr/bin/google-chrome",
|
||||
"/usr/bin/google-chrome-stable",
|
||||
]
|
||||
|
||||
chrome_binary = None
|
||||
for path in chromium_paths:
|
||||
if os.path.exists(path):
|
||||
chrome_binary = path
|
||||
logger.info(f"Found browser binary: {chrome_binary}")
|
||||
break
|
||||
|
||||
if chrome_binary:
|
||||
chrome_options.binary_location = chrome_binary
|
||||
|
||||
# Try to find chromedriver binary
|
||||
chromedriver_paths = [
|
||||
"/usr/bin/chromedriver",
|
||||
shutil.which("chromedriver"),
|
||||
]
|
||||
|
||||
chromedriver_path = None
|
||||
for path in chromedriver_paths:
|
||||
if path and os.path.exists(path):
|
||||
chromedriver_path = path
|
||||
logger.info(f"Found chromedriver: {chromedriver_path}")
|
||||
break
|
||||
|
||||
# If not found, try to use webdriver-manager as fallback
|
||||
if not chromedriver_path:
|
||||
logger.info("Chromedriver not found in system paths, using webdriver-manager...")
|
||||
import stat
|
||||
|
||||
chromedriver_path = ChromeDriverManager().install()
|
||||
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
|
||||
|
||||
# Fix for webdriver-manager bug that may return wrong file
|
||||
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
|
||||
filename = os.path.basename(chromedriver_path)
|
||||
|
||||
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
|
||||
if filename not in ("chromedriver", "chromedriver.exe"):
|
||||
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
|
||||
|
||||
# Navigate to the base directory containing the chromedriver
|
||||
base_dir = os.path.dirname(chromedriver_path)
|
||||
|
||||
# Look for chromedriver in the same directory
|
||||
for potential_file in ["chromedriver", "chromedriver.exe"]:
|
||||
potential_path = os.path.join(base_dir, potential_file)
|
||||
if os.path.exists(potential_path):
|
||||
@@ -244,7 +275,6 @@ class SeleniumScraper(ProxyScraper):
|
||||
logger.info(f"Found correct chromedriver: {chromedriver_path}")
|
||||
break
|
||||
else:
|
||||
# If not found, search in parent directory or subdirectories
|
||||
parent_dir = os.path.dirname(base_dir)
|
||||
for root, dirs, files in os.walk(parent_dir):
|
||||
for file in files:
|
||||
|
||||
Reference in New Issue
Block a user