Cria novos components

2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions
--- a/main.py
+++ b/main.py
@@ -1,265 +1,16 @@
-"""Entry point for the video processing pipeline.
-
-This script listens to a RabbitMQ queue for new video processing tasks. When
-a message arrives, it performs the following steps:
-
-1. Creates a working directory for the video based off of its filename.
-2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce
-   a transcription with word-level timestamps.
-3. Uses the Gemini model to determine which parts of the video have the
-   highest potential for engagement. These highlight segments are
-   represented as a list of objects containing start/end timestamps and
-   text.
-4. Uses the OpenRouter model to generate a sensational title for each
-   highlight. Only the ``topText`` field is kept; the description is
-   intentionally omitted since the caption will be burned into the video.
-5. Cuts the original video into individual clips corresponding to each
-   highlight and renders them vertically with a title above and a dynamic
-   caption below.
-6. Publishes a message to the upload queue with information about the
-   generated clips. On success, this message contains the list of output
-   files. On failure, ``hasError`` will be set to ``True`` and the
-   ``error`` field will describe what went wrong.
-7. Cleans up temporary files (audio, transcript, working directory) and
-   deletes the original source video from the ``videos`` directory to
-   conserve disk space.
-
-The queue names and RabbitMQ credentials are configured via environment
-variables. See the accompanying ``docker-compose.yml`` for defaults.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import shutil
-import time
-import traceback
-from typing import Any, Dict, List
-
-import pika
-
-from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds
-from .transcribe import transcribe
-from .llm import LLMError, select_highlights, generate_titles
-from .render import render_clip
+from video_render.config import load_settings
+from video_render.logging_utils import setup_logging
+from video_render.messaging import RabbitMQWorker
+from video_render.pipeline import VideoPipeline


-# Environment variables with sensible defaults
-RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq")
-RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672))
-RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin")
-RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS")
-RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render")
-RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
+def main() -> None:
+    setup_logging()
+    settings = load_settings()

-if not RABBITMQ_PASS:
-    raise RuntimeError("RABBITMQ_PASS não definido no ambiente")
-
-
-def get_next_message() -> Any:
-    """Retrieve a single message from the RABBITMQ_QUEUE.
-
-    Returns ``None`` if no messages are available. This helper opens a new
-    connection for each call to avoid keeping stale connections alive.
-    """
-    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
-    parameters = pika.ConnectionParameters(
-        host=RABBITMQ_HOST,
-        port=RABBITMQ_PORT,
-        credentials=credentials,
-        heartbeat=60,
-        blocked_connection_timeout=300,
-    )
-    connection = pika.BlockingConnection(parameters)
-    channel = connection.channel()
-    method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE)
-    if method_frame:
-        channel.basic_ack(method_frame.delivery_tag)
-        connection.close()
-        return body
-    connection.close()
-    return None
-
-
-def publish_to_queue(payload: Dict[str, Any]) -> None:
-    """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE."""
-    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
-    parameters = pika.ConnectionParameters(
-        host=RABBITMQ_HOST,
-        port=RABBITMQ_PORT,
-        credentials=credentials,
-        heartbeat=60,
-        blocked_connection_timeout=300,
-    )
-    connection = pika.BlockingConnection(parameters)
-    channel = connection.channel()
-    channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True)
-    channel.basic_publish(
-        exchange="",
-        routing_key=RABBITMQ_UPLOAD_QUEUE,
-        body=json.dumps(payload),
-        properties=pika.BasicProperties(delivery_mode=2),
-    )
-    connection.close()
-
-
-def build_srt(segments: List[Dict[str, Any]]) -> str:
-    """Build an SRT-like string from a list of segments.
-
-    Each segment should have ``start``, ``end`` and ``text`` fields. The
-    timestamps are converted to the ``HH:MM:SS,mmm`` format expected by
-    the Gemini prompt. Segments are separated by a blank line.
-    """
-    lines = []
-    for seg in segments:
-        start_ts = seconds_to_timestamp(seg["start"])
-        end_ts = seconds_to_timestamp(seg["end"])
-        lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}")
-    return "\n\n".join(lines)
-
-
-def process_message(data: Dict[str, Any]) -> Dict[str, Any]:
-    """Process a single video task described in ``data``.
-
-    Returns the payload to be sent to the upload queue. Raises an
-    exception on failure; the caller is responsible for catching it and
-    posting an error payload.
-    """
-    filename = data.get("filename")
-    if not filename:
-        raise ValueError("Campo 'filename' ausente na mensagem")
-    url = data.get("url")
-    video_id = data.get("videoId")
-    # Determine source video path; n8n stores videos in the 'videos' directory
-    video_path = os.path.join("videos", filename)
-    if not os.path.exists(video_path):
-        raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}")
-    # Sanitize the filename to use as directory name
-    base_no_ext = os.path.splitext(filename)[0]
-    sanitized = sanitize_filename(base_no_ext)
-    work_dir = os.path.join("app", "videos", sanitized)
-    # Transcribe video
-    segments, words = transcribe(video_path, work_dir)
-    # Build SRT string
-    srt_str = build_srt(segments)
-    # Call Gemini to select highlights
-    highlights = select_highlights(srt_str)
-    # Convert start/end times to floats and keep original strings for openrouter
-    for item in highlights:
-        item["start"] = item["start"].strip()
-        item["end"] = item["end"].strip()
-    # Generate titles
-    titles = generate_titles(highlights)
-    # Render clips
-    output_dir = os.path.join("outputs", sanitized)
-    processed_files: List[str] = []
-    for idx, item in enumerate(titles, start=1):
-        start_sec = timestamp_to_seconds(item.get("start"))
-        end_sec = timestamp_to_seconds(item.get("end"))
-        # Extract relative words for caption
-        relative_words = []
-        for w in words:
-            # Word must overlap clip interval
-            if w["end"] <= start_sec or w["start"] >= end_sec:
-                continue
-            rel_start = max(0.0, w["start"] - start_sec)
-            rel_end = max(0.0, w["end"] - start_sec)
-            relative_words.append({
-                "start": rel_start,
-                "end": rel_end,
-                "word": w["word"],
-            })
-        # If no words found (e.g. silence), create a dummy word to avoid errors
-        if not relative_words:
-            relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""})
-        out_path = render_clip(
-            video_path=video_path,
-            start=start_sec,
-            end=end_sec,
-            top_text=item.get("topText", ""),
-            words=relative_words,
-            out_dir=output_dir,
-            base_name=sanitized,
-            idx=idx,
-        )
-        processed_files.append(out_path)
-    # Compose payload
-    payload = {
-        "videosProcessedQuantity": len(processed_files),
-        "filename": filename,
-        "processedFiles": processed_files,
-        "url": url,
-        "videoId": video_id,
-        "hasError": False,
-        "error": None,
-    }
-    # Clean up working directory and original video
-    shutil.rmtree(work_dir, ignore_errors=True)
-    try:
-        os.remove(video_path)
-    except FileNotFoundError:
-        pass
-    return payload
-
-
-def main():
-    print(" [*] Esperando mensagens. Para sair: CTRL+C")
-    while True:
-        body = get_next_message()
-        if body is None:
-            time.sleep(5)
-            continue
-        try:
-            data = json.loads(body)
-        except Exception:
-            print("⚠️  Mensagem inválida recebida (não é JSON)")
-            continue
-        try:
-            result = process_message(data)
-        except Exception as exc:
-            # Print stack trace for debugging
-            traceback.print_exc()
-            # Attempt to clean up any directories based on filename
-            filename = data.get("filename")
-            sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else ""
-            work_dir = os.path.join("app", "videos", sanitized) if sanitized else None
-            output_dir = os.path.join("outputs", sanitized) if sanitized else None
-            # Remove working and output directories
-            if work_dir:
-                shutil.rmtree(work_dir, ignore_errors=True)
-            if output_dir:
-                shutil.rmtree(output_dir, ignore_errors=True)
-            # Remove original video if present
-            video_path = os.path.join("videos", filename) if filename else None
-            if video_path and os.path.exists(video_path):
-                try:
-                    os.remove(video_path)
-                except Exception:
-                    pass
-            # Build error payload
-            error_payload = {
-                "videosProcessedQuantity": 0,
-                "filename": filename,
-                "processedFiles": [],
-                "url": data.get("url"),
-                "videoId": data.get("videoId"),
-                "hasError": True,
-                "error": str(exc),
-            }
-            try:
-                publish_to_queue(error_payload)
-                print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
-            except Exception as publish_err:
-                print(f"Erro ao publicar mensagem de erro: {publish_err}")
-            continue
-        # On success publish payload
-        try:
-            publish_to_queue(result)
-            print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
-        except Exception as publish_err:
-            print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}")
-        # Loop continues
+    pipeline = VideoPipeline(settings)
+    worker = RabbitMQWorker(settings)
+    worker.consume_forever(pipeline.process_message)


 if __name__ == "__main__":