Add chromium para suporte no ARM
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -47,12 +47,10 @@ RUN apt-get update && apt-get install -y \
|
|||||||
xdg-utils \
|
xdg-utils \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
|
RUN apt-get update && apt-get install -y \
|
||||||
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
|
chromium \
|
||||||
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
chromium-driver \
|
||||||
&& apt-get update \
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
&& apt-get install -y google-chrome-stable \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,9 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
|
platforms:
|
||||||
|
- linux/amd64
|
||||||
|
- linux/arm64
|
||||||
container_name: proxy-scraper
|
container_name: proxy-scraper
|
||||||
environment:
|
environment:
|
||||||
# PostgreSQL Connection
|
# PostgreSQL Connection
|
||||||
|
|||||||
@@ -207,7 +207,10 @@ class SeleniumScraper(ProxyScraper):
|
|||||||
self.driver: Optional[webdriver.Chrome] = None
|
self.driver: Optional[webdriver.Chrome] = None
|
||||||
|
|
||||||
def _init_driver(self):
|
def _init_driver(self):
|
||||||
"""Initialize Chrome WebDriver with headless options."""
|
"""Initialize Chrome/Chromium WebDriver with headless options."""
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
chrome_options = Options()
|
chrome_options = Options()
|
||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
@@ -218,25 +221,53 @@ class SeleniumScraper(ProxyScraper):
|
|||||||
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get chromedriver path and fix webdriver-manager bug
|
# Try to find chromium or chrome binary
|
||||||
import os
|
chromium_paths = [
|
||||||
|
"/usr/bin/chromium",
|
||||||
|
"/usr/bin/chromium-browser",
|
||||||
|
"/usr/bin/google-chrome",
|
||||||
|
"/usr/bin/google-chrome-stable",
|
||||||
|
]
|
||||||
|
|
||||||
|
chrome_binary = None
|
||||||
|
for path in chromium_paths:
|
||||||
|
if os.path.exists(path):
|
||||||
|
chrome_binary = path
|
||||||
|
logger.info(f"Found browser binary: {chrome_binary}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if chrome_binary:
|
||||||
|
chrome_options.binary_location = chrome_binary
|
||||||
|
|
||||||
|
# Try to find chromedriver binary
|
||||||
|
chromedriver_paths = [
|
||||||
|
"/usr/bin/chromedriver",
|
||||||
|
shutil.which("chromedriver"),
|
||||||
|
]
|
||||||
|
|
||||||
|
chromedriver_path = None
|
||||||
|
for path in chromedriver_paths:
|
||||||
|
if path and os.path.exists(path):
|
||||||
|
chromedriver_path = path
|
||||||
|
logger.info(f"Found chromedriver: {chromedriver_path}")
|
||||||
|
break
|
||||||
|
|
||||||
|
# If not found, try to use webdriver-manager as fallback
|
||||||
|
if not chromedriver_path:
|
||||||
|
logger.info("Chromedriver not found in system paths, using webdriver-manager...")
|
||||||
import stat
|
import stat
|
||||||
|
|
||||||
chromedriver_path = ChromeDriverManager().install()
|
chromedriver_path = ChromeDriverManager().install()
|
||||||
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
|
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
|
||||||
|
|
||||||
# Fix for webdriver-manager bug that may return wrong file
|
# Fix for webdriver-manager bug that may return wrong file
|
||||||
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
|
|
||||||
filename = os.path.basename(chromedriver_path)
|
filename = os.path.basename(chromedriver_path)
|
||||||
|
|
||||||
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
|
|
||||||
if filename not in ("chromedriver", "chromedriver.exe"):
|
if filename not in ("chromedriver", "chromedriver.exe"):
|
||||||
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
|
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
|
||||||
|
|
||||||
# Navigate to the base directory containing the chromedriver
|
|
||||||
base_dir = os.path.dirname(chromedriver_path)
|
base_dir = os.path.dirname(chromedriver_path)
|
||||||
|
|
||||||
# Look for chromedriver in the same directory
|
|
||||||
for potential_file in ["chromedriver", "chromedriver.exe"]:
|
for potential_file in ["chromedriver", "chromedriver.exe"]:
|
||||||
potential_path = os.path.join(base_dir, potential_file)
|
potential_path = os.path.join(base_dir, potential_file)
|
||||||
if os.path.exists(potential_path):
|
if os.path.exists(potential_path):
|
||||||
@@ -244,7 +275,6 @@ class SeleniumScraper(ProxyScraper):
|
|||||||
logger.info(f"Found correct chromedriver: {chromedriver_path}")
|
logger.info(f"Found correct chromedriver: {chromedriver_path}")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# If not found, search in parent directory or subdirectories
|
|
||||||
parent_dir = os.path.dirname(base_dir)
|
parent_dir = os.path.dirname(base_dir)
|
||||||
for root, dirs, files in os.walk(parent_dir):
|
for root, dirs, files in os.walk(parent_dir):
|
||||||
for file in files:
|
for file in files:
|
||||||
|
|||||||
Reference in New Issue
Block a user