Faz ajustes de tempo entre testes de Proxy e ajusta compose
This commit is contained in:
@@ -5,6 +5,11 @@ POSTGRES_DB=proxies
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=your_secure_password_here
|
||||
|
||||
# Development Mode
|
||||
# Set to True to disable scheduler and run scraping job once then exit
|
||||
# Set to False for production mode with daily scheduled jobs
|
||||
DEVELOPMENT=False
|
||||
|
||||
# Proxy Validation Settings
|
||||
PROXY_TIMEOUT=10
|
||||
VALIDATION_URL=http://httpbin.org/ip
|
||||
|
||||
@@ -47,11 +47,12 @@ RUN apt-get update && apt-get install -y \
|
||||
xdg-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
||||
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \
|
||||
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
|
||||
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
|
||||
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y google-chrome-stable \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -6,11 +6,14 @@ services:
|
||||
container_name: proxy-scraper
|
||||
environment:
|
||||
# PostgreSQL Connection
|
||||
POSTGRES_HOST: postgres
|
||||
POSTGRES_PORT: 5432
|
||||
POSTGRES_DB: ${POSTGRES_DB:-proxies}
|
||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
|
||||
POSTGRES_HOST: ${POSTGRES_HOST}
|
||||
POSTGRES_PORT: ${POSTGRES_PORT:}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
|
||||
# Development Mode
|
||||
DEVELOPMENT: False
|
||||
|
||||
# Proxy Validation
|
||||
PROXY_TIMEOUT: ${PROXY_TIMEOUT:-10}
|
||||
@@ -34,7 +37,7 @@ services:
|
||||
- scraper_logs:/app/logs
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- proxy-network
|
||||
- dokploy-network
|
||||
|
||||
networks:
|
||||
dokploy-network:
|
||||
|
||||
@@ -32,12 +32,13 @@ class Settings(BaseSettings):
|
||||
# File paths
|
||||
proxies_file: str = Field(default="/app/proxies.txt", alias="PROXIES_FILE")
|
||||
|
||||
# Development mode
|
||||
development: bool = Field(default=False, alias="DEVELOPMENT")
|
||||
|
||||
# Logging
|
||||
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
case_sensitive = False
|
||||
|
||||
|
||||
|
||||
12
src/main.py
12
src/main.py
@@ -231,16 +231,20 @@ def main():
|
||||
)
|
||||
logger.info(f" Proxy Timeout: {settings.proxy_timeout}s")
|
||||
logger.info(f" Validation URL: {settings.validation_url}")
|
||||
logger.info(f" Development Mode: {settings.development}")
|
||||
|
||||
service = ProxyScrapingService()
|
||||
|
||||
# Check for command line arguments
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "--immediate":
|
||||
# Run immediately and exit
|
||||
# Check for development mode or command line arguments
|
||||
if settings.development or (len(sys.argv) > 1 and sys.argv[1] == "--immediate"):
|
||||
# Run immediately and exit (development mode)
|
||||
if settings.development:
|
||||
logger.info("Running in DEVELOPMENT mode - executing scraping job and exiting")
|
||||
service.run_immediate()
|
||||
service.db.close()
|
||||
logger.info("Development run completed. Exiting.")
|
||||
else:
|
||||
# Start scheduler for recurring jobs
|
||||
# Start scheduler for recurring jobs (production mode)
|
||||
service.start_scheduler()
|
||||
|
||||
|
||||
|
||||
@@ -218,7 +218,51 @@ class SeleniumScraper(ProxyScraper):
|
||||
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
)
|
||||
|
||||
service = Service(ChromeDriverManager().install())
|
||||
# Get chromedriver path and fix webdriver-manager bug
|
||||
import os
|
||||
import stat
|
||||
|
||||
chromedriver_path = ChromeDriverManager().install()
|
||||
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
|
||||
|
||||
# Fix for webdriver-manager bug that may return wrong file
|
||||
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
|
||||
filename = os.path.basename(chromedriver_path)
|
||||
|
||||
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
|
||||
if filename not in ("chromedriver", "chromedriver.exe"):
|
||||
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
|
||||
|
||||
# Navigate to the base directory containing the chromedriver
|
||||
base_dir = os.path.dirname(chromedriver_path)
|
||||
|
||||
# Look for chromedriver in the same directory
|
||||
for potential_file in ["chromedriver", "chromedriver.exe"]:
|
||||
potential_path = os.path.join(base_dir, potential_file)
|
||||
if os.path.exists(potential_path):
|
||||
chromedriver_path = potential_path
|
||||
logger.info(f"Found correct chromedriver: {chromedriver_path}")
|
||||
break
|
||||
else:
|
||||
# If not found, search in parent directory or subdirectories
|
||||
parent_dir = os.path.dirname(base_dir)
|
||||
for root, dirs, files in os.walk(parent_dir):
|
||||
for file in files:
|
||||
if file == "chromedriver" or file == "chromedriver.exe":
|
||||
chromedriver_path = os.path.join(root, file)
|
||||
logger.info(f"Found chromedriver at: {chromedriver_path}")
|
||||
break
|
||||
|
||||
logger.info(f"Using chromedriver: {chromedriver_path}")
|
||||
|
||||
# Ensure the file has execute permissions
|
||||
if os.path.exists(chromedriver_path):
|
||||
current_permissions = os.stat(chromedriver_path).st_mode
|
||||
os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
else:
|
||||
raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}")
|
||||
|
||||
service = Service(chromedriver_path)
|
||||
self.driver = webdriver.Chrome(service=service, options=chrome_options)
|
||||
self.driver.implicitly_wait(10)
|
||||
|
||||
|
||||
93
test_connection.py
Normal file
93
test_connection.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Test database connection and list available databases."""
|
||||
import psycopg2
|
||||
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
|
||||
|
||||
# Connection details
|
||||
HOST = "154.12.229.181"
|
||||
PORT = 5666
|
||||
USER = "leolitas"
|
||||
PASSWORD = "L@l321321321"
|
||||
|
||||
print(f"Testing connection to {HOST}:{PORT}")
|
||||
print(f"User: {USER}")
|
||||
print("-" * 50)
|
||||
|
||||
try:
|
||||
# First, connect to postgres database to list all databases
|
||||
print("\n1. Connecting to 'postgres' database...")
|
||||
conn = psycopg2.connect(
|
||||
host=HOST,
|
||||
port=PORT,
|
||||
database="postgres",
|
||||
user=USER,
|
||||
password=PASSWORD
|
||||
)
|
||||
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
# List all databases
|
||||
print("\n2. Available databases:")
|
||||
cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;")
|
||||
databases = cursor.fetchall()
|
||||
for db in databases:
|
||||
print(f" - {db[0]}")
|
||||
|
||||
# Check if 'proxies' database exists
|
||||
cursor.execute("SELECT 1 FROM pg_database WHERE datname = 'proxies';")
|
||||
exists = cursor.fetchone()
|
||||
|
||||
if exists:
|
||||
print("\n✓ Database 'proxies' EXISTS")
|
||||
|
||||
# Try to connect to proxies database
|
||||
print("\n3. Connecting to 'proxies' database...")
|
||||
conn.close()
|
||||
|
||||
conn_proxies = psycopg2.connect(
|
||||
host=HOST,
|
||||
port=PORT,
|
||||
database="proxies",
|
||||
user=USER,
|
||||
password=PASSWORD
|
||||
)
|
||||
cursor_proxies = conn_proxies.cursor()
|
||||
|
||||
# Check if table exists
|
||||
cursor_proxies.execute("""
|
||||
SELECT EXISTS (
|
||||
SELECT FROM information_schema.tables
|
||||
WHERE table_name = 'proxies'
|
||||
);
|
||||
""")
|
||||
table_exists = cursor_proxies.fetchone()[0]
|
||||
|
||||
if table_exists:
|
||||
print("✓ Table 'proxies' EXISTS")
|
||||
|
||||
# Get row count
|
||||
cursor_proxies.execute("SELECT COUNT(*) FROM proxies;")
|
||||
count = cursor_proxies.fetchone()[0]
|
||||
print(f"✓ Table has {count} rows")
|
||||
else:
|
||||
print("✗ Table 'proxies' DOES NOT EXIST")
|
||||
print("\nYou need to run init-db.sql")
|
||||
|
||||
cursor_proxies.close()
|
||||
conn_proxies.close()
|
||||
|
||||
else:
|
||||
print("\n✗ Database 'proxies' DOES NOT EXIST")
|
||||
print("\nTo create it, run:")
|
||||
print(f" CREATE DATABASE proxies;")
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("Connection test completed successfully!")
|
||||
|
||||
except psycopg2.OperationalError as e:
|
||||
print(f"\n✗ Connection ERROR: {e}")
|
||||
except Exception as e:
|
||||
print(f"\n✗ Unexpected ERROR: {e}")
|
||||
Reference in New Issue
Block a user