Faz ajustes de tempo entre testes de Proxy e ajusta compose

This commit is contained in:
LeoMortari
2025-12-02 00:38:09 -03:00
parent 23c8133f3d
commit d5eebb5cc4
7 changed files with 167 additions and 16 deletions

View File

@@ -5,6 +5,11 @@ POSTGRES_DB=proxies
POSTGRES_USER=postgres POSTGRES_USER=postgres
POSTGRES_PASSWORD=your_secure_password_here POSTGRES_PASSWORD=your_secure_password_here
# Development Mode
# Set to True to disable scheduler and run scraping job once then exit
# Set to False for production mode with daily scheduled jobs
DEVELOPMENT=False
# Proxy Validation Settings # Proxy Validation Settings
PROXY_TIMEOUT=10 PROXY_TIMEOUT=10
VALIDATION_URL=http://httpbin.org/ip VALIDATION_URL=http://httpbin.org/ip

View File

@@ -47,11 +47,12 @@ RUN apt-get update && apt-get install -y \
xdg-utils \ xdg-utils \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \ && gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
&& apt-get update \ && apt-get update \
&& apt-get install -y google-chrome-stable \ && apt-get install -y google-chrome-stable \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
WORKDIR /app WORKDIR /app

View File

@@ -6,11 +6,14 @@ services:
container_name: proxy-scraper container_name: proxy-scraper
environment: environment:
# PostgreSQL Connection # PostgreSQL Connection
POSTGRES_HOST: postgres POSTGRES_HOST: ${POSTGRES_HOST}
POSTGRES_PORT: 5432 POSTGRES_PORT: ${POSTGRES_PORT:}
POSTGRES_DB: ${POSTGRES_DB:-proxies} POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
# Development Mode
DEVELOPMENT: False
# Proxy Validation # Proxy Validation
PROXY_TIMEOUT: ${PROXY_TIMEOUT:-10} PROXY_TIMEOUT: ${PROXY_TIMEOUT:-10}
@@ -34,7 +37,7 @@ services:
- scraper_logs:/app/logs - scraper_logs:/app/logs
restart: unless-stopped restart: unless-stopped
networks: networks:
- proxy-network - dokploy-network
networks: networks:
dokploy-network: dokploy-network:

View File

@@ -32,12 +32,13 @@ class Settings(BaseSettings):
# File paths # File paths
proxies_file: str = Field(default="/app/proxies.txt", alias="PROXIES_FILE") proxies_file: str = Field(default="/app/proxies.txt", alias="PROXIES_FILE")
# Development mode
development: bool = Field(default=False, alias="DEVELOPMENT")
# Logging # Logging
log_level: str = Field(default="INFO", alias="LOG_LEVEL") log_level: str = Field(default="INFO", alias="LOG_LEVEL")
class Config: class Config:
env_file = ".env"
env_file_encoding = "utf-8"
case_sensitive = False case_sensitive = False

View File

@@ -231,16 +231,20 @@ def main():
) )
logger.info(f" Proxy Timeout: {settings.proxy_timeout}s") logger.info(f" Proxy Timeout: {settings.proxy_timeout}s")
logger.info(f" Validation URL: {settings.validation_url}") logger.info(f" Validation URL: {settings.validation_url}")
logger.info(f" Development Mode: {settings.development}")
service = ProxyScrapingService() service = ProxyScrapingService()
# Check for command line arguments # Check for development mode or command line arguments
if len(sys.argv) > 1 and sys.argv[1] == "--immediate": if settings.development or (len(sys.argv) > 1 and sys.argv[1] == "--immediate"):
# Run immediately and exit # Run immediately and exit (development mode)
if settings.development:
logger.info("Running in DEVELOPMENT mode - executing scraping job and exiting")
service.run_immediate() service.run_immediate()
service.db.close() service.db.close()
logger.info("Development run completed. Exiting.")
else: else:
# Start scheduler for recurring jobs # Start scheduler for recurring jobs (production mode)
service.start_scheduler() service.start_scheduler()

View File

@@ -218,7 +218,51 @@ class SeleniumScraper(ProxyScraper):
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
) )
service = Service(ChromeDriverManager().install()) # Get chromedriver path and fix webdriver-manager bug
import os
import stat
chromedriver_path = ChromeDriverManager().install()
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
# Fix for webdriver-manager bug that may return wrong file
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
filename = os.path.basename(chromedriver_path)
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
if filename not in ("chromedriver", "chromedriver.exe"):
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
# Navigate to the base directory containing the chromedriver
base_dir = os.path.dirname(chromedriver_path)
# Look for chromedriver in the same directory
for potential_file in ["chromedriver", "chromedriver.exe"]:
potential_path = os.path.join(base_dir, potential_file)
if os.path.exists(potential_path):
chromedriver_path = potential_path
logger.info(f"Found correct chromedriver: {chromedriver_path}")
break
else:
# If not found, search in parent directory or subdirectories
parent_dir = os.path.dirname(base_dir)
for root, dirs, files in os.walk(parent_dir):
for file in files:
if file == "chromedriver" or file == "chromedriver.exe":
chromedriver_path = os.path.join(root, file)
logger.info(f"Found chromedriver at: {chromedriver_path}")
break
logger.info(f"Using chromedriver: {chromedriver_path}")
# Ensure the file has execute permissions
if os.path.exists(chromedriver_path):
current_permissions = os.stat(chromedriver_path).st_mode
os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
else:
raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}")
service = Service(chromedriver_path)
self.driver = webdriver.Chrome(service=service, options=chrome_options) self.driver = webdriver.Chrome(service=service, options=chrome_options)
self.driver.implicitly_wait(10) self.driver.implicitly_wait(10)

93
test_connection.py Normal file
View File

@@ -0,0 +1,93 @@
"""Test database connection and list available databases."""
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
# Connection details
HOST = "154.12.229.181"
PORT = 5666
USER = "leolitas"
PASSWORD = "L@l321321321"
print(f"Testing connection to {HOST}:{PORT}")
print(f"User: {USER}")
print("-" * 50)
try:
# First, connect to postgres database to list all databases
print("\n1. Connecting to 'postgres' database...")
conn = psycopg2.connect(
host=HOST,
port=PORT,
database="postgres",
user=USER,
password=PASSWORD
)
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cursor = conn.cursor()
# List all databases
print("\n2. Available databases:")
cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;")
databases = cursor.fetchall()
for db in databases:
print(f" - {db[0]}")
# Check if 'proxies' database exists
cursor.execute("SELECT 1 FROM pg_database WHERE datname = 'proxies';")
exists = cursor.fetchone()
if exists:
print("\n✓ Database 'proxies' EXISTS")
# Try to connect to proxies database
print("\n3. Connecting to 'proxies' database...")
conn.close()
conn_proxies = psycopg2.connect(
host=HOST,
port=PORT,
database="proxies",
user=USER,
password=PASSWORD
)
cursor_proxies = conn_proxies.cursor()
# Check if table exists
cursor_proxies.execute("""
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'proxies'
);
""")
table_exists = cursor_proxies.fetchone()[0]
if table_exists:
print("✓ Table 'proxies' EXISTS")
# Get row count
cursor_proxies.execute("SELECT COUNT(*) FROM proxies;")
count = cursor_proxies.fetchone()[0]
print(f"✓ Table has {count} rows")
else:
print("✗ Table 'proxies' DOES NOT EXIST")
print("\nYou need to run init-db.sql")
cursor_proxies.close()
conn_proxies.close()
else:
print("\n✗ Database 'proxies' DOES NOT EXIST")
print("\nTo create it, run:")
print(f" CREATE DATABASE proxies;")
cursor.close()
conn.close()
print("\n" + "=" * 50)
print("Connection test completed successfully!")
except psycopg2.OperationalError as e:
print(f"\n✗ Connection ERROR: {e}")
except Exception as e:
print(f"\n✗ Unexpected ERROR: {e}")