Faz ajustes de tempo entre testes de Proxy e ajusta compose
This commit is contained in:
@@ -5,6 +5,11 @@ POSTGRES_DB=proxies
|
|||||||
POSTGRES_USER=postgres
|
POSTGRES_USER=postgres
|
||||||
POSTGRES_PASSWORD=your_secure_password_here
|
POSTGRES_PASSWORD=your_secure_password_here
|
||||||
|
|
||||||
|
# Development Mode
|
||||||
|
# Set to True to disable scheduler and run scraping job once then exit
|
||||||
|
# Set to False for production mode with daily scheduled jobs
|
||||||
|
DEVELOPMENT=False
|
||||||
|
|
||||||
# Proxy Validation Settings
|
# Proxy Validation Settings
|
||||||
PROXY_TIMEOUT=10
|
PROXY_TIMEOUT=10
|
||||||
VALIDATION_URL=http://httpbin.org/ip
|
VALIDATION_URL=http://httpbin.org/ip
|
||||||
|
|||||||
@@ -47,11 +47,12 @@ RUN apt-get update && apt-get install -y \
|
|||||||
xdg-utils \
|
xdg-utils \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
RUN wget -q -O /tmp/google-chrome.gpg https://dl-ssl.google.com/linux/linux_signing_key.pub \
|
||||||
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \
|
&& gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg /tmp/google-chrome.gpg \
|
||||||
|
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
||||||
&& apt-get update \
|
&& apt-get update \
|
||||||
&& apt-get install -y google-chrome-stable \
|
&& apt-get install -y google-chrome-stable \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/* /tmp/google-chrome.gpg
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
|||||||
@@ -6,11 +6,14 @@ services:
|
|||||||
container_name: proxy-scraper
|
container_name: proxy-scraper
|
||||||
environment:
|
environment:
|
||||||
# PostgreSQL Connection
|
# PostgreSQL Connection
|
||||||
POSTGRES_HOST: postgres
|
POSTGRES_HOST: ${POSTGRES_HOST}
|
||||||
POSTGRES_PORT: 5432
|
POSTGRES_PORT: ${POSTGRES_PORT:}
|
||||||
POSTGRES_DB: ${POSTGRES_DB:-proxies}
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
|
||||||
|
# Development Mode
|
||||||
|
DEVELOPMENT: False
|
||||||
|
|
||||||
# Proxy Validation
|
# Proxy Validation
|
||||||
PROXY_TIMEOUT: ${PROXY_TIMEOUT:-10}
|
PROXY_TIMEOUT: ${PROXY_TIMEOUT:-10}
|
||||||
@@ -34,7 +37,7 @@ services:
|
|||||||
- scraper_logs:/app/logs
|
- scraper_logs:/app/logs
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
networks:
|
networks:
|
||||||
- proxy-network
|
- dokploy-network
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
dokploy-network:
|
dokploy-network:
|
||||||
|
|||||||
@@ -32,12 +32,13 @@ class Settings(BaseSettings):
|
|||||||
# File paths
|
# File paths
|
||||||
proxies_file: str = Field(default="/app/proxies.txt", alias="PROXIES_FILE")
|
proxies_file: str = Field(default="/app/proxies.txt", alias="PROXIES_FILE")
|
||||||
|
|
||||||
|
# Development mode
|
||||||
|
development: bool = Field(default=False, alias="DEVELOPMENT")
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
|
log_level: str = Field(default="INFO", alias="LOG_LEVEL")
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
|
||||||
env_file_encoding = "utf-8"
|
|
||||||
case_sensitive = False
|
case_sensitive = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
12
src/main.py
12
src/main.py
@@ -231,16 +231,20 @@ def main():
|
|||||||
)
|
)
|
||||||
logger.info(f" Proxy Timeout: {settings.proxy_timeout}s")
|
logger.info(f" Proxy Timeout: {settings.proxy_timeout}s")
|
||||||
logger.info(f" Validation URL: {settings.validation_url}")
|
logger.info(f" Validation URL: {settings.validation_url}")
|
||||||
|
logger.info(f" Development Mode: {settings.development}")
|
||||||
|
|
||||||
service = ProxyScrapingService()
|
service = ProxyScrapingService()
|
||||||
|
|
||||||
# Check for command line arguments
|
# Check for development mode or command line arguments
|
||||||
if len(sys.argv) > 1 and sys.argv[1] == "--immediate":
|
if settings.development or (len(sys.argv) > 1 and sys.argv[1] == "--immediate"):
|
||||||
# Run immediately and exit
|
# Run immediately and exit (development mode)
|
||||||
|
if settings.development:
|
||||||
|
logger.info("Running in DEVELOPMENT mode - executing scraping job and exiting")
|
||||||
service.run_immediate()
|
service.run_immediate()
|
||||||
service.db.close()
|
service.db.close()
|
||||||
|
logger.info("Development run completed. Exiting.")
|
||||||
else:
|
else:
|
||||||
# Start scheduler for recurring jobs
|
# Start scheduler for recurring jobs (production mode)
|
||||||
service.start_scheduler()
|
service.start_scheduler()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -218,7 +218,51 @@ class SeleniumScraper(ProxyScraper):
|
|||||||
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
)
|
)
|
||||||
|
|
||||||
service = Service(ChromeDriverManager().install())
|
# Get chromedriver path and fix webdriver-manager bug
|
||||||
|
import os
|
||||||
|
import stat
|
||||||
|
|
||||||
|
chromedriver_path = ChromeDriverManager().install()
|
||||||
|
logger.debug(f"Initial chromedriver path from WDM: {chromedriver_path}")
|
||||||
|
|
||||||
|
# Fix for webdriver-manager bug that may return wrong file
|
||||||
|
# Check if the returned file is the actual binary (not THIRD_PARTY_NOTICES or LICENSE)
|
||||||
|
filename = os.path.basename(chromedriver_path)
|
||||||
|
|
||||||
|
# If filename contains anything other than just "chromedriver" or "chromedriver.exe", it's wrong
|
||||||
|
if filename not in ("chromedriver", "chromedriver.exe"):
|
||||||
|
logger.warning(f"WDM returned wrong file: {filename}. Searching for correct chromedriver...")
|
||||||
|
|
||||||
|
# Navigate to the base directory containing the chromedriver
|
||||||
|
base_dir = os.path.dirname(chromedriver_path)
|
||||||
|
|
||||||
|
# Look for chromedriver in the same directory
|
||||||
|
for potential_file in ["chromedriver", "chromedriver.exe"]:
|
||||||
|
potential_path = os.path.join(base_dir, potential_file)
|
||||||
|
if os.path.exists(potential_path):
|
||||||
|
chromedriver_path = potential_path
|
||||||
|
logger.info(f"Found correct chromedriver: {chromedriver_path}")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# If not found, search in parent directory or subdirectories
|
||||||
|
parent_dir = os.path.dirname(base_dir)
|
||||||
|
for root, dirs, files in os.walk(parent_dir):
|
||||||
|
for file in files:
|
||||||
|
if file == "chromedriver" or file == "chromedriver.exe":
|
||||||
|
chromedriver_path = os.path.join(root, file)
|
||||||
|
logger.info(f"Found chromedriver at: {chromedriver_path}")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"Using chromedriver: {chromedriver_path}")
|
||||||
|
|
||||||
|
# Ensure the file has execute permissions
|
||||||
|
if os.path.exists(chromedriver_path):
|
||||||
|
current_permissions = os.stat(chromedriver_path).st_mode
|
||||||
|
os.chmod(chromedriver_path, current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(f"Chromedriver not found at: {chromedriver_path}")
|
||||||
|
|
||||||
|
service = Service(chromedriver_path)
|
||||||
self.driver = webdriver.Chrome(service=service, options=chrome_options)
|
self.driver = webdriver.Chrome(service=service, options=chrome_options)
|
||||||
self.driver.implicitly_wait(10)
|
self.driver.implicitly_wait(10)
|
||||||
|
|
||||||
|
|||||||
93
test_connection.py
Normal file
93
test_connection.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
"""Test database connection and list available databases."""
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
|
||||||
|
|
||||||
|
# Connection details
|
||||||
|
HOST = "154.12.229.181"
|
||||||
|
PORT = 5666
|
||||||
|
USER = "leolitas"
|
||||||
|
PASSWORD = "L@l321321321"
|
||||||
|
|
||||||
|
print(f"Testing connection to {HOST}:{PORT}")
|
||||||
|
print(f"User: {USER}")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# First, connect to postgres database to list all databases
|
||||||
|
print("\n1. Connecting to 'postgres' database...")
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
host=HOST,
|
||||||
|
port=PORT,
|
||||||
|
database="postgres",
|
||||||
|
user=USER,
|
||||||
|
password=PASSWORD
|
||||||
|
)
|
||||||
|
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
|
||||||
|
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# List all databases
|
||||||
|
print("\n2. Available databases:")
|
||||||
|
cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;")
|
||||||
|
databases = cursor.fetchall()
|
||||||
|
for db in databases:
|
||||||
|
print(f" - {db[0]}")
|
||||||
|
|
||||||
|
# Check if 'proxies' database exists
|
||||||
|
cursor.execute("SELECT 1 FROM pg_database WHERE datname = 'proxies';")
|
||||||
|
exists = cursor.fetchone()
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
print("\n✓ Database 'proxies' EXISTS")
|
||||||
|
|
||||||
|
# Try to connect to proxies database
|
||||||
|
print("\n3. Connecting to 'proxies' database...")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
conn_proxies = psycopg2.connect(
|
||||||
|
host=HOST,
|
||||||
|
port=PORT,
|
||||||
|
database="proxies",
|
||||||
|
user=USER,
|
||||||
|
password=PASSWORD
|
||||||
|
)
|
||||||
|
cursor_proxies = conn_proxies.cursor()
|
||||||
|
|
||||||
|
# Check if table exists
|
||||||
|
cursor_proxies.execute("""
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = 'proxies'
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
table_exists = cursor_proxies.fetchone()[0]
|
||||||
|
|
||||||
|
if table_exists:
|
||||||
|
print("✓ Table 'proxies' EXISTS")
|
||||||
|
|
||||||
|
# Get row count
|
||||||
|
cursor_proxies.execute("SELECT COUNT(*) FROM proxies;")
|
||||||
|
count = cursor_proxies.fetchone()[0]
|
||||||
|
print(f"✓ Table has {count} rows")
|
||||||
|
else:
|
||||||
|
print("✗ Table 'proxies' DOES NOT EXIST")
|
||||||
|
print("\nYou need to run init-db.sql")
|
||||||
|
|
||||||
|
cursor_proxies.close()
|
||||||
|
conn_proxies.close()
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("\n✗ Database 'proxies' DOES NOT EXIST")
|
||||||
|
print("\nTo create it, run:")
|
||||||
|
print(f" CREATE DATABASE proxies;")
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("Connection test completed successfully!")
|
||||||
|
|
||||||
|
except psycopg2.OperationalError as e:
|
||||||
|
print(f"\n✗ Connection ERROR: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n✗ Unexpected ERROR: {e}")
|
||||||
Reference in New Issue
Block a user