Cria novos components

2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,98 +0,0 @@
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 *.manifest
 *.spec
 pip-log.txt
 pip-delete-this-directory.txt
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 *.mo
 *.pot
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 instance/
 .webassets-cache
 .scrapy
 docs/_build/
 .pybuilder/
 target/
 .ipynb_checkpoints
 profile_default/
 ipython_config.py
 .pdm.toml
 __pypackages__/
 celerybeat-schedule
 celerybeat.pid
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/   
 .spyderproject
 .spyproject
 .ropeproject
 /site
 .mypy_cache/
 .dmypy.json
 dmypy.json
 .pyre/
 .pytype/
 cython_debug/
 .idea/
 .vscode/
 *.code-workspace
 *.local
 *.mp4
 *.wav
 *.mp3
 *.srt
 *.vtt
 *.json
 *.csv
 *.xlsx
 *.db
 *.sqlite3
--- a/init.py
+++ b/init.py
@@ -1 +0,0 @@
 """Top-level package for the video processing pipeline."""
--- a/pycache/llm.cpython-311.pyc
+++ b/pycache/llm.cpython-311.pyc
--- a/pycache/main.cpython-311.pyc
+++ b/pycache/main.cpython-311.pyc
--- a/pycache/render.cpython-311.pyc
+++ b/pycache/render.cpython-311.pyc
--- a/pycache/transcribe.cpython-311.pyc
+++ b/pycache/transcribe.cpython-311.pyc
--- a/pycache/utils.cpython-311.pyc
+++ b/pycache/utils.cpython-311.pyc
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,3 +1,8 @@
 # GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw"
 # YOUTUBE_API="https://totally-real-dingo.ngrok-free.app"
 # OPENROUTER_API_KEY="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"
 # OPENROUTER_MODEL="openai/gpt-oss-20b:free"
 services:
  video-render-new:
    restart: unless-stopped
@@ -6,19 +11,13 @@ services:
    environment:
      # RabbitMQ credentials
      - RABBITMQ_PASS=${RABBITMQ_PASS}
      - RABBITMQ_HOST=${RABBITMQ_HOST}
      - RABBITMQ_USER=${RABBITMQ_USER}
      - RABBITMQ_PORT=${RABBITMQ_PORT}
      - RABBITMQ_QUEUE=${RABBITMQ_QUEUE}
      - RABBITMQ_UPLOAD_QUEUE=${RABBITMQ_UPLOAD_QUEUE}
      # API keys for the LLMs
      - GEMINI_API_KEY=${GEMINI_API_KEY}
      - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro}
      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
-      - OPENROUTER_MODEL=${OPENROUTER_MODEL}
+      - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free}
-      # Optional whisper settings
+      - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small}
-      - WHISPER_MODEL=${WHISPER_MODEL}
+    ports:
-      - WHISPER_DEVICE=${WHISPER_DEVICE}
+      - "5000:5000"
      - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE}
    volumes:
      # Mount host directories into the container so that videos can be
      # provided and outputs collected. These paths can be customised when
@@ -27,9 +26,18 @@ services:
      - "/root/videos:/app/videos"
      - "/root/outputs:/app/outputs"
    command: "python -u main.py"
-    networks:
+    # runtime: nvidia
      - dokploy-network
-networks:
+    # networks:
-  dokploy-network:
+    #   - dokploy-network
-    external: true
+
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]
 # networks:
 #   dokploy-network:
 #     external: true
--- a/4
+++ b/4
@@ -21,6 +21,10 @@ RUN apt-get update && \
        xdg-utils \
        wget \
        unzip \
        ffmpeg \
        libgomp1 \
        libpq-dev \
        vim \
        libmagick++-dev \
        imagemagick \
        fonts-liberation \
--- a/llm.py
+++ b/llm.py
@@ -1,234 +0,0 @@
 """High-level helpers for interacting with the Gemini and OpenRouter APIs.
 This module encapsulates all of the logic needed to call the LLM endpoints
 used throughout the application. It uses the OpenAI Python client under the
 hood because both Gemini and OpenRouter expose OpenAI-compatible APIs.
 Two functions are exposed:
 * ``select_highlights`` takes an SRT-like string (the transcription of a
  video) and returns a list of highlight objects with start and end
  timestamps and their corresponding text. It uses the Gemini model to
  identify which parts of the video are most likely to engage viewers on
  social media.
 * ``generate_titles`` takes a list of highlight objects and returns a list
  of the same objects enriched with a ``topText`` field, which contains a
  sensational title for the clip. It uses the OpenRouter API with a model
  specified via the ``OPENROUTER_MODEL`` environment variable.
 Both functions are resilient to malformed outputs from the models. They try
 to extract the first JSON array found in the model responses; if that
 fails, a descriptive exception is raised. These exceptions should be
 handled by callers to post appropriate error messages back to the queue.
 """
 from __future__ import annotations
 import json
 import os
 import re
 from typing import Any, Dict, List
 import openai
 class LLMError(Exception):
    """Raised when the LLM response cannot be parsed into the expected format."""
 def _extract_json_array(text: str) -> Any:
    """Extract the first JSON array from a string.
    LLMs sometimes return explanatory text before or after the JSON. This
    helper uses a regular expression to find the first substring that
    resembles a JSON array (i.e. starts with '[' and ends with ']'). It
    returns the corresponding Python object if successful, otherwise
    raises a ``LLMError``.
    """
    # Remove Markdown code fences and other formatting noise
    cleaned = text.replace("`", "").replace("json", "")
    # Find the first [ ... ] block
    match = re.search(r"\[.*\]", cleaned, re.DOTALL)
    if not match:
        raise LLMError("Não foi possível encontrar um JSON válido na resposta da IA.")
    json_str = match.group(0)
    try:
        return json.loads(json_str)
    except json.JSONDecodeError as exc:
        raise LLMError(f"Erro ao decodificar JSON: {exc}")
 def select_highlights(srt_text: str) -> List[Dict[str, Any]]:
    """Call the Gemini API to select highlight segments from a transcription.
    The input ``srt_text`` should be a string containing the transcription
    formatted like an SRT file, with lines of the form
    ``00:00:10,140 --> 00:01:00,990`` followed by the spoken text.
    Returns a list of dictionaries, each with ``start``, ``end`` and
    ``text`` keys. On failure to parse the response, a ``LLMError`` is
    raised.
    """
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        raise ValueError("GEMINI_API_KEY não definido no ambiente")
    model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
    # Initialise client for Gemini. The base_url points to the
    # generativelanguage API; see the official docs for details.
    client = openai.OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
    # System prompt: instructs Gemini how to behave.
    system_prompt = (
        "Você é um assistente especializado em selecionar **HIGHLIGHTS** de vídeo "
        "a partir da transcrição com timestamps.\n"
        "Sua única função é **selecionar os trechos** conforme solicitado.\n"
        "- **Não resuma, não interprete, não gere comentários ou textos complementares.**\n"
        "- **Retorne a resposta exatamente no formato proposto pelo usuário**, sem adicionar ou remover nada além do pedido.\n"
        "- Cada trecho selecionado deve ter **no mínimo 60 segundos e no máximo 120 segundos** de duração.\n"
        "- Sempre responda **em português (PT-BR)**."
    )
    # Base prompt: describes how to select highlights and the format to return.
    base_prompt = (
        "Você assumirá o papel de um especialista em Marketing e Social Media, "
        "sua tarefa é selecionar as melhores partes de uma transcrição que irei fornecer.\n\n"
        "## Critérios de Seleção\n\n"
        "- Escolha trechos baseando-se em:\n"
        "  - **Picos de emoção ou impacto**\n"
        "  - **Viradas de assunto**\n"
        "  - **Punchlines** (frases de efeito, momentos de virada)\n"
        "  - **Informações-chave**\n\n"
        "## Regras Rápidas\n\n"
        "- Sempre devolver pelo menos 3 trechos, não possui limite máximo\n"
        "- Garanta que cada trecho fique com no MÍNIMO 60 segundos e no MÁXIMO 120 segundos.\n"
        "- Nenhum outro texto além do JSON final.\n\n"
        "## Restrições de Duração\n\n"
        "- **Duração mínima do trecho escolhido:** 60 segundos\n"
        "- **Duração máxima do trecho escolhido:** 90 a 120 segundos\n\n"
        "## Tarefa\n\n"
        "- Proponha o **máximo de trechos** com potencial, mas **sempre devolva no mínimo 3 trechos**.\n"
        "- Extraia os trechos **apenas** da transcrição fornecida abaixo.\n\n"
        "## IMPORTANTE\n"
        "- Cada trecho deve ter no mínimo 60 segundos, e no máximo 120 segundos. Isso é indiscutível\n\n"
        "## Entrada\n\n"
        "- Transcrição:\n\n"
        f"{srt_text}\n\n"
        "## Saída\n\n"
        "- Retorne **somente** a lista de trechos selecionados em formato JSON, conforme o exemplo abaixo.\n"
        "- **Não escreva comentários ou qualquer texto extra.**\n"
        "- No atributo \"text\", inclua o texto presente no trecho escolhido.\n\n"
        "### Exemplo de Conversão\n\n"
        "#### De SRT:\n"
        "00:00:10,140 --> 00:01:00,990\n"
        "Exemplo de escrita presente no trecho\n\n"
        "#### Para JSON:\n"
        "[\n"
        "  {\n"
        "    \"start\": \"00:00:10,140\",\n"
        "    \"end\": \"00:01:00,990\",\n"
        "    \"text\": \"Exemplo de escrita presente no trecho\"\n"
        "  }\n"
        "]\n"
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": base_prompt},
    ]
    try:
        response = client.chat.completions.create(model=model, messages=messages)
    except Exception as exc:
        raise LLMError(f"Erro ao chamar a API Gemini: {exc}")
    # Extract message content
    content = response.choices[0].message.content if response.choices else None
    if not content:
        raise LLMError("A resposta da Gemini veio vazia.")
    result = _extract_json_array(content)
    if not isinstance(result, list):
        raise LLMError("O JSON retornado pela Gemini não é uma lista.")
    return result
 def generate_titles(highlights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Call the OpenRouter API to generate a title (topText) for each highlight.
    The ``highlights`` argument should be a list of dictionaries as returned
    by ``select_highlights``, each containing ``start``, ``end`` and ``text``.
    This function adds a ``topText`` field to each dictionary using the
    OpenRouter model specified via the ``OPENROUTER_MODEL`` environment
    variable. If parsing fails, an ``LLMError`` is raised.
    """
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        raise ValueError("OPENROUTER_API_KEY não definido no ambiente")
    model = os.environ.get("OPENROUTER_MODEL")
    if not model:
        raise ValueError("OPENROUTER_MODEL não definido no ambiente")
    # Create client for OpenRouter
    client = openai.OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1")
    # Compose prompt: instruct to generate titles only
    prompt_header = (
        "Você é um especialista em Marketing Digital e Criação de Conteúdo Viral.\n\n"
        "Sua tarefa é criar **títulos sensacionalistas** (*topText*) para cada trecho "
        "de transcrição recebido em formato JSON.\n\n"
        "## Instruções\n\n"
        "- O texto deve ser **chamativo, impactante** e com alto potencial de viralização "
        "em redes sociais, **mas sem sair do contexto do trecho**.\n"
        "- Use expressões fortes e curiosas, mas **nunca palavras de baixo calão**.\n"
        "- Cada *topText* deve ter **no máximo 2 linhas**.\n"
        "- Utilize **exclusivamente** o conteúdo do trecho; não invente fatos.\n"
        "- Não adicione comentários, explicações, ou qualquer texto extra na resposta.\n"
        "- Responda **apenas** no seguinte formato (mantendo as chaves e colchetes):\n\n"
        "[\n  {\n    \"start\": \"00:00:10,140\",\n    \"end\": \"00:01:00,990\",\n    \"topText\": \"Título impactante\"\n  }\n]\n\n"
        "## Observações:\n\n"
        "- Nunca fuja do contexto do trecho.\n"
        "- Não invente informações.\n"
        "- Não utilize palavrões.\n"
        "- Não escreva nada além do JSON de saída.\n\n"
        "Aqui estão os trechos em JSON:\n"
    )
    # Compose input JSON for the model
    json_input = json.dumps(highlights, ensure_ascii=False)
    full_message = prompt_header + json_input
    messages = [
        {
            "role": "system",
            "content": "Você é um assistente útil e objetivo."
        },
        {
            "role": "user",
            "content": full_message
        },
    ]
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.7,
        )
    except Exception as exc:
        raise LLMError(f"Erro ao chamar a API OpenRouter: {exc}")
    content = response.choices[0].message.content if response.choices else None
    if not content:
        raise LLMError("A resposta da OpenRouter veio vazia.")
    result = _extract_json_array(content)
    if not isinstance(result, list):
        raise LLMError("O JSON retornado pela OpenRouter não é uma lista.")
    # Merge topText back into highlights
    # We assume the result list has the same order and length as input highlights
    enriched: List[Dict[str, Any]] = []
    input_map = {(item["start"], item["end"]): item for item in highlights}
    for item in result:
        key = (item.get("start"), item.get("end"))
        original = input_map.get(key)
        if original is None:
            # If the model returns unexpected entries, skip them
            continue
        enriched_item = original.copy()
        # Only topText is expected
        enriched_item["topText"] = item.get("topText", "").strip()
        enriched.append(enriched_item)
    return enriched
--- a/main.py
+++ b/main.py
@@ -1,265 +1,16 @@
-"""Entry point for the video processing pipeline.
+from video_render.config import load_settings
-
+from video_render.logging_utils import setup_logging
-This script listens to a RabbitMQ queue for new video processing tasks. When
+from video_render.messaging import RabbitMQWorker
-a message arrives, it performs the following steps:
+from video_render.pipeline import VideoPipeline
 1. Creates a working directory for the video based off of its filename.
 2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce
   a transcription with word-level timestamps.
 3. Uses the Gemini model to determine which parts of the video have the
   highest potential for engagement. These highlight segments are
   represented as a list of objects containing start/end timestamps and
   text.
 4. Uses the OpenRouter model to generate a sensational title for each
   highlight. Only the ``topText`` field is kept; the description is
   intentionally omitted since the caption will be burned into the video.
 5. Cuts the original video into individual clips corresponding to each
   highlight and renders them vertically with a title above and a dynamic
   caption below.
 6. Publishes a message to the upload queue with information about the
   generated clips. On success, this message contains the list of output
   files. On failure, ``hasError`` will be set to ``True`` and the
   ``error`` field will describe what went wrong.
 7. Cleans up temporary files (audio, transcript, working directory) and
   deletes the original source video from the ``videos`` directory to
   conserve disk space.
 The queue names and RabbitMQ credentials are configured via environment
 variables. See the accompanying ``docker-compose.yml`` for defaults.
 """
 from __future__ import annotations
 import json
 import os
 import shutil
 import time
 import traceback
 from typing import Any, Dict, List
 import pika
 from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds
 from .transcribe import transcribe
 from .llm import LLMError, select_highlights, generate_titles
 from .render import render_clip
-# Environment variables with sensible defaults
+def main() -> None:
-RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq")
+    setup_logging()
-RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672))
+    settings = load_settings()
 RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin")
 RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS")
 RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render")
 RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
-if not RABBITMQ_PASS:
+    pipeline = VideoPipeline(settings)
-    raise RuntimeError("RABBITMQ_PASS não definido no ambiente")
+    worker = RabbitMQWorker(settings)
-
+    worker.consume_forever(pipeline.process_message)
 def get_next_message() -> Any:
    """Retrieve a single message from the RABBITMQ_QUEUE.
    Returns ``None`` if no messages are available. This helper opens a new
    connection for each call to avoid keeping stale connections alive.
    """
    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
    parameters = pika.ConnectionParameters(
        host=RABBITMQ_HOST,
        port=RABBITMQ_PORT,
        credentials=credentials,
        heartbeat=60,
        blocked_connection_timeout=300,
    )
    connection = pika.BlockingConnection(parameters)
    channel = connection.channel()
    method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE)
    if method_frame:
        channel.basic_ack(method_frame.delivery_tag)
        connection.close()
        return body
    connection.close()
    return None
 def publish_to_queue(payload: Dict[str, Any]) -> None:
    """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE."""
    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
    parameters = pika.ConnectionParameters(
        host=RABBITMQ_HOST,
        port=RABBITMQ_PORT,
        credentials=credentials,
        heartbeat=60,
        blocked_connection_timeout=300,
    )
    connection = pika.BlockingConnection(parameters)
    channel = connection.channel()
    channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True)
    channel.basic_publish(
        exchange="",
        routing_key=RABBITMQ_UPLOAD_QUEUE,
        body=json.dumps(payload),
        properties=pika.BasicProperties(delivery_mode=2),
    )
    connection.close()
 def build_srt(segments: List[Dict[str, Any]]) -> str:
    """Build an SRT-like string from a list of segments.
    Each segment should have ``start``, ``end`` and ``text`` fields. The
    timestamps are converted to the ``HH:MM:SS,mmm`` format expected by
    the Gemini prompt. Segments are separated by a blank line.
    """
    lines = []
    for seg in segments:
        start_ts = seconds_to_timestamp(seg["start"])
        end_ts = seconds_to_timestamp(seg["end"])
        lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}")
    return "\n\n".join(lines)
 def process_message(data: Dict[str, Any]) -> Dict[str, Any]:
    """Process a single video task described in ``data``.
    Returns the payload to be sent to the upload queue. Raises an
    exception on failure; the caller is responsible for catching it and
    posting an error payload.
    """
    filename = data.get("filename")
    if not filename:
        raise ValueError("Campo 'filename' ausente na mensagem")
    url = data.get("url")
    video_id = data.get("videoId")
    # Determine source video path; n8n stores videos in the 'videos' directory
    video_path = os.path.join("videos", filename)
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}")
    # Sanitize the filename to use as directory name
    base_no_ext = os.path.splitext(filename)[0]
    sanitized = sanitize_filename(base_no_ext)
    work_dir = os.path.join("app", "videos", sanitized)
    # Transcribe video
    segments, words = transcribe(video_path, work_dir)
    # Build SRT string
    srt_str = build_srt(segments)
    # Call Gemini to select highlights
    highlights = select_highlights(srt_str)
    # Convert start/end times to floats and keep original strings for openrouter
    for item in highlights:
        item["start"] = item["start"].strip()
        item["end"] = item["end"].strip()
    # Generate titles
    titles = generate_titles(highlights)
    # Render clips
    output_dir = os.path.join("outputs", sanitized)
    processed_files: List[str] = []
    for idx, item in enumerate(titles, start=1):
        start_sec = timestamp_to_seconds(item.get("start"))
        end_sec = timestamp_to_seconds(item.get("end"))
        # Extract relative words for caption
        relative_words = []
        for w in words:
            # Word must overlap clip interval
            if w["end"] <= start_sec or w["start"] >= end_sec:
                continue
            rel_start = max(0.0, w["start"] - start_sec)
            rel_end = max(0.0, w["end"] - start_sec)
            relative_words.append({
                "start": rel_start,
                "end": rel_end,
                "word": w["word"],
            })
        # If no words found (e.g. silence), create a dummy word to avoid errors
        if not relative_words:
            relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""})
        out_path = render_clip(
            video_path=video_path,
            start=start_sec,
            end=end_sec,
            top_text=item.get("topText", ""),
            words=relative_words,
            out_dir=output_dir,
            base_name=sanitized,
            idx=idx,
        )
        processed_files.append(out_path)
    # Compose payload
    payload = {
        "videosProcessedQuantity": len(processed_files),
        "filename": filename,
        "processedFiles": processed_files,
        "url": url,
        "videoId": video_id,
        "hasError": False,
        "error": None,
    }
    # Clean up working directory and original video
    shutil.rmtree(work_dir, ignore_errors=True)
    try:
        os.remove(video_path)
    except FileNotFoundError:
        pass
    return payload
 def main():
    print(" [*] Esperando mensagens. Para sair: CTRL+C")
    while True:
        body = get_next_message()
        if body is None:
            time.sleep(5)
            continue
        try:
            data = json.loads(body)
        except Exception:
            print("⚠️  Mensagem inválida recebida (não é JSON)")
            continue
        try:
            result = process_message(data)
        except Exception as exc:
            # Print stack trace for debugging
            traceback.print_exc()
            # Attempt to clean up any directories based on filename
            filename = data.get("filename")
            sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else ""
            work_dir = os.path.join("app", "videos", sanitized) if sanitized else None
            output_dir = os.path.join("outputs", sanitized) if sanitized else None
            # Remove working and output directories
            if work_dir:
                shutil.rmtree(work_dir, ignore_errors=True)
            if output_dir:
                shutil.rmtree(output_dir, ignore_errors=True)
            # Remove original video if present
            video_path = os.path.join("videos", filename) if filename else None
            if video_path and os.path.exists(video_path):
                try:
                    os.remove(video_path)
                except Exception:
                    pass
            # Build error payload
            error_payload = {
                "videosProcessedQuantity": 0,
                "filename": filename,
                "processedFiles": [],
                "url": data.get("url"),
                "videoId": data.get("videoId"),
                "hasError": True,
                "error": str(exc),
            }
            try:
                publish_to_queue(error_payload)
                print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
            except Exception as publish_err:
                print(f"Erro ao publicar mensagem de erro: {publish_err}")
            continue
        # On success publish payload
        try:
            publish_to_queue(result)
            print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
        except Exception as publish_err:
            print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}")
        # Loop continues
 if __name__ == "__main__":
--- a/prompts/generate.txt
+++ b/prompts/generate.txt
@@ -0,0 +1,35 @@
 Voce e um estrategista de conteudo especializado em identificar cortes curtos de videos longos que performam bem em redes sociais.
 FUNCAO:
 - Analisar a transcricao completa de um video.
 - Escolher trechos curtos (entre 20s e 90s) com maior chance de engajamento.
 - Responder APENAS em JSON valido.
 FORMATO DA RESPOSTA:
 {
  "highlights": [
    {
      "start": <segundos_inicio_float>,
      "end": <segundos_fim_float>,
      "summary": "Resumo conciso do porque este trecho engaja"
    }
  ]
 }
 REGRAS:
 - Liste no maximo 6 destaques.
 - Respeite a ordem cronologica.
 - Nunca deixe listas vazias; se nada for relevante, inclua uma entrada com start = 0, end = 0 e summary explicando a ausencia de cortes.
 - Utilize apenas valores numericos simples (ponto como separador decimal).
 - Nao repita um mesmo trecho.
 PERSPECTIVA DE ANALISE:
 - Concentre-se em momentos com gatilhos emocionais, insights, storytelling ou chamadas para acao fortes.
 - Prefira trechos com comeco, meio e fim claros.
 - Evite partes redundantes, silenciosas ou extremamente tecnicas.
 TAREFA:
 - Leia a transcricao recebida no campo "transcript".
 - Use a lista de marcas de tempo detalhadas no campo "segments" para embasar suas escolhas.
 - Produza a saida JSON descrita acima.
--- a/render.py
+++ b/render.py
@@ -1,205 +0,0 @@
 """Rendering logic for producing vertical clips with dynamic captions.
 This module defines a single function ``render_clip`` which takes a video
 segment and produces a vertical clip suitable for social media. Each clip
 contains three regions:
 * A top region (480px high) showing a title generated by an LLM.
 * A middle region (960px high) containing the original video, scaled to
  fit horizontally while preserving aspect ratio and centred vertically.
 * A bottom region (480px high) showing a dynamic caption. The caption
  displays a sliding window of three to five words from the transcript,
  colouring the currently spoken word differently to draw the viewer's
  attention.
 The function uses the MoviePy library to compose the various elements and
 writes the resulting video to disk. It returns the path to the created
 file.
 """
 from __future__ import annotations
 import os
 from typing import Dict, List
 import numpy as np
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.video.VideoClip import ColorClip, VideoClip
 from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
 from moviepy.video.VideoClip import TextClip
 from PIL import Image, ImageDraw, ImageFont
 from .utils import wrap_text
 def render_clip(
    video_path: str,
    start: float,
    end: float,
    top_text: str,
    words: List[Dict[str, float]],
    out_dir: str,
    base_name: str,
    idx: int,
    # Use a widely available system font by default. DejaVuSans is installed
    # in most Debian-based containers. The caller can override this path.
    font_path: str = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
    final_width: int = 1080,
    final_height: int = 1920,
    top_h: int = 480,
    middle_h: int = 960,
    bottom_h: int = 480,
    video_codec: str = "libx264",
    bitrate: str = "3000k",
 ) -> str:
    """Render a single clip with title and dynamic caption.
    Parameters
    ----------
    video_path: str
        Path to the source video file.
    start: float
        Start time of the clip in seconds.
    end: float
        End time of the clip in seconds.
    top_text: str
        The title to display in the top region.
    words: List[Dict[str, float]]
        List of word-level timestamps for this clip. Each dict must have
        ``start``, ``end`` and ``word`` keys. The start and end values
        should be relative to the beginning of this clip (i.e. start at 0).
    out_dir: str
        Directory where the output file should be saved. The function
        creates this directory if it doesn't exist.
    base_name: str
        Base name of the original video (sanitized). Used to build the
        output filename.
    idx: int
        Index of the clip. Output will be named ``clip_{idx}.mp4``.
    font_path: str
        Path to the TrueType font to use for both title and caption.
    final_width: int
        Width of the final video in pixels.
    final_height: int
        Height of the final video in pixels.
    top_h: int
        Height of the title area in pixels.
    middle_h: int
        Height of the video area in pixels.
    bottom_h: int
        Height of the caption area in pixels.
    video_codec: str
        FFmpeg codec to use when writing the video.
    bitrate: str
        Bitrate for the output video.
    Returns
    -------
    str
        The path to the rendered video file.
    """
    os.makedirs(out_dir, exist_ok=True)
    # Extract the segment from the source video
    with VideoFileClip(video_path) as clip:
        segment = clip.subclip(start, end)
        dur = segment.duration
        # Background
        bg = ColorClip(size=(final_width, final_height), color=(0, 0, 0), duration=dur)
        # Resize video to fit width
        video_resized = segment.resize(width=final_width)
        # Compute vertical position to centre in the middle region
        y = top_h + (middle_h - video_resized.h) // 2
        video_resized = video_resized.set_position((0, y))
        # Build title clip
        # Wrap the title to avoid overflow
        wrapped_lines = wrap_text(top_text, max_chars=40)
        wrapped_title = "\n".join(wrapped_lines)
        title_clip = TextClip(
            wrapped_title,
            font=font_path,
            fontsize=70,
            color="white",
            method="caption",
            size=(final_width, top_h),
            align="center",
        ).set_duration(dur).set_position((0, 0))
        # Prepare font for caption rendering
        pil_font = ImageFont.truetype(font_path, size=60)
        default_color = (255, 255, 255)  # white
        highlight_color = (255, 215, 0)  # gold-like yellow
        # Precompute widths of a space and bounding box height for vertical centering
        space_width = pil_font.getbbox(" ")[2] - pil_font.getbbox(" ")[0]
        bbox = pil_font.getbbox("A")
        text_height = bbox[3] - bbox[1]
        def make_caption_frame(t: float):
            """Generate an image for the caption at time t."""
            # Determine current word index
            idx_cur = 0
            for i, w in enumerate(words):
                if w["start"] <= t < w["end"]:
                    idx_cur = i
                    break
                if t >= w["end"]:
                    idx_cur = i
            # Define window of words to display: show up to 5 words
            start_idx = max(0, idx_cur - 2)
            end_idx = min(len(words), idx_cur + 3)
            window = words[start_idx:end_idx]
            # Compute widths for each word
            word_sizes = []
            for w in window:
                bbox = pil_font.getbbox(w["word"])
                word_width = bbox[2] - bbox[0]
                word_sizes.append(word_width)
            total_width = sum(word_sizes) + space_width * (len(window) - 1 if window else 0)
            # Create blank image for caption area
            img = Image.new("RGB", (final_width, bottom_h), color=(0, 0, 0))
            draw = ImageDraw.Draw(img)
            x = int((final_width - total_width) / 2)
            y_pos = int((bottom_h - text_height) / 2)
            for j, w in enumerate(window):
                color = highlight_color if (start_idx + j) == idx_cur else default_color
                draw.text((x, y_pos), w["word"], font=pil_font, fill=color)
                x += word_sizes[j] + space_width
            return np.array(img)
        caption_clip = VideoClip(make_frame=make_caption_frame, duration=dur)
        caption_clip = caption_clip.set_position((0, final_height - bottom_h))
        # Compose final clip
        final = CompositeVideoClip([
            bg,
            video_resized,
            title_clip,
            caption_clip,
        ], size=(final_width, final_height))
        # Use the original audio from the video segment
        final_audio = segment.audio
        if final_audio is not None:
            final = final.set_audio(final_audio)
        # Define output path
        out_path = os.path.join(out_dir, f"clip_{idx}.mp4")
        # Write to disk
        final.write_videofile(
            out_path,
            codec=video_codec,
            fps=30,
            bitrate=bitrate,
            audio_codec="aac",
            preset="ultrafast",
            ffmpeg_params=[
                "-tune", "zerolatency",
                "-pix_fmt", "yuv420p",
                "-profile:v", "high",
                "-level", "4.1",
            ],
            threads=4,
        )
        # Close clips to free resources
        final.close()
        segment.close()
    return out_path
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
-pika==1.3.2
+moviepy==2.2.0
-moviepy==2.0.0
+pillow==10.3.0
-faster-whisper==1.2.0
+numpy>=1.26.0
-openai==1.16.0
+requests>=2.31.0
-numpy==1.26.4
+pika>=1.3.2
-Pillow==10.1.0
+faster-whisper==1.0.0
 unidecode==1.3.6
--- a/transcribe.py
+++ b/transcribe.py
@@ -1,111 +0,0 @@
 """Utilities for extracting audio from video and generating transcriptions.
 This module handles two tasks:
 1. Use FFMPEG to extract the audio track from a video file into a WAV file
   suitable for consumption by the Whisper model. The audio is resampled to
   16 kHz mono PCM as required by Whisper.
 2. Use the Faster-Whisper implementation to generate a transcription with
   word-level timestamps. The transcription is returned both as a list of
   segments (for building an SRT) and as a flattened list of words (for
   building dynamic subtitles).
 If FFMPEG is not installed or fails, a ``RuntimeError`` is raised. The caller
 is responsible for cleaning up the temporary files created in the working
 directory.
 """
 from __future__ import annotations
 import os
 import subprocess
 from typing import Dict, List, Tuple
 from faster_whisper import WhisperModel
 def extract_audio_ffmpeg(video_path: str, audio_path: str) -> None:
    """Use FFMPEG to extract audio from ``video_path`` into ``audio_path``.
    The output will be a 16 kHz mono WAV file in PCM S16LE format. Any
    existing file at ``audio_path`` will be overwritten. If ffmpeg returns
    a non-zero exit code, a ``RuntimeError`` is raised with the stderr.
    """
    cmd = [
        "ffmpeg",
        "-y",  # overwrite output
        "-i",
        video_path,
        "-vn",  # disable video recording
        "-acodec",
        "pcm_s16le",
        "-ar",
        "16000",
        "-ac",
        "1",
        audio_path,
    ]
    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    if proc.returncode != 0:
        raise RuntimeError(f"FFMPEG error: {proc.stderr.decode(errors='ignore')}")
 def load_whisper_model() -> WhisperModel:
    """Instantiate and cache a Faster-Whisper model.
    The model name and device can be configured via the ``WHISPER_MODEL`` and
    ``WHISPER_DEVICE`` environment variables. The default model is
    ``large-v3`` for best accuracy. The device can be ``cuda`` or ``cpu``.
    A module-level cache is used to prevent loading the model multiple times.
    """
    if hasattr(load_whisper_model, "_cache"):
        return load_whisper_model._cache  # type: ignore[attr-defined]
    model_name = os.environ.get("WHISPER_MODEL", "large-v3")
    device = os.environ.get("WHISPER_DEVICE", "cpu")
    # Compute type can be set via WHISPER_COMPUTE_TYPE; default to float16 on GPU
    compute_type = os.environ.get("WHISPER_COMPUTE_TYPE")
    # If not explicitly set, choose sensible defaults
    if compute_type is None:
        compute_type = "float16" if device == "cuda" else "int8"
    model = WhisperModel(model_name, device=device, compute_type=compute_type)
    load_whisper_model._cache = model  # type: ignore[attr-defined]
    return model
 def transcribe(video_path: str, work_dir: str) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]:
    """Transcribe a video file using Faster-Whisper.
    ``video_path`` is the path to the video to transcribe. ``work_dir`` is a
    directory where temporary files will be stored (audio file and
    transcription). The function returns a tuple ``(segments, words)`` where
    ``segments`` is a list of dictionaries with ``start``, ``end`` and
    ``text`` fields, and ``words`` is a flat list of dictionaries with
    ``start``, ``end`` and ``word`` fields covering the entire video.
    The timestamps are expressed in seconds as floats.
    """
    os.makedirs(work_dir, exist_ok=True)
    audio_path = os.path.join(work_dir, "audio.wav")
    # Extract audio
    extract_audio_ffmpeg(video_path, audio_path)
    # Load Whisper model
    model = load_whisper_model()
    # Run transcription with word-level timestamps
    segments, info = model.transcribe(audio_path, word_timestamps=True)
    seg_list: List[Dict[str, float]] = []
    words_list: List[Dict[str, float]] = []
    for seg in segments:
        seg_list.append({
            "start": float(seg.start),
            "end": float(seg.end),
            "text": seg.text.strip(),
        })
        # Each segment may contain words attribute
        for w in getattr(seg, "words", []) or []:
            words_list.append({
                "start": float(w.start),
                "end": float(w.end),
                "word": w.word,
            })
    # Sort words by start time to be safe
    words_list.sort(key=lambda d: d["start"])
    return seg_list, words_list
--- a/utils.py
+++ b/utils.py
@@ -1,93 +0,0 @@
 import re
 import unicodedata
 from typing import List, Tuple
 def sanitize_filename(name: str) -> str:
    """Return a sanitized version of a filename.
    This helper removes accents, converts to lowercase, replaces spaces
    with underscores and removes any non alphanumeric characters except
    underscores and dots. This makes the directory names safe to use on
    most filesystems and matches the behaviour described in the spec.
    """
    if not name:
        return ""
    # Decompose Unicode characters and strip accents
    nfkd_form = unicodedata.normalize("NFKD", name)
    no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
    # Replace spaces with underscores
    no_spaces = no_accents.replace(" ", "_")
    # Lowercase and remove any character that is not a letter, digit, dot or underscore
    sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
    return sanitized
 def timestamp_to_seconds(ts: str) -> float:
    """Convert a timestamp in HH:MM:SS,mmm format to seconds.
    The Gemini and OpenRouter prompts use timestamps formatted with a comma
    as the decimal separator. This helper splits the string into hours,
    minutes and seconds and returns a float expressed in seconds.
    """
    if ts is None:
        return 0.0
    ts = ts.strip()
    if not ts:
        return 0.0
    # Replace comma by dot for decimal seconds
    ts = ts.replace(",", ".")
    parts = ts.split(":")
    parts = [float(p) for p in parts]
    if len(parts) == 3:
        h, m, s = parts
        return h * 3600 + m * 60 + s
    elif len(parts) == 2:
        m, s = parts
        return m * 60 + s
    else:
        # only seconds
        return parts[0]
 def seconds_to_timestamp(seconds: float) -> str:
    """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
    if seconds < 0:
        seconds = 0
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = seconds % 60
    # Format with comma as decimal separator and three decimal places
    return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
 def wrap_text(text: str, max_chars: int = 80) -> List[str]:
    """Simple word-wrap for a string.
    Splits ``text`` into a list of lines, each at most ``max_chars``
    characters long. This does not attempt to hyphenate words – a word
    longer than ``max_chars`` will occupy its own line. The return value
    is a list of lines without trailing whitespace.
    """
    if not text:
        return []
    words = text.split()
    lines: List[str] = []
    current: List[str] = []
    current_len = 0
    for word in words:
        # If adding this word would exceed the max, flush current line
        if current and current_len + 1 + len(word) > max_chars:
            lines.append(" ".join(current))
            current = [word]
            current_len = len(word)
        else:
            # Add to current line
            if current:
                current_len += 1 + len(word)
            else:
                current_len = len(word)
            current.append(word)
    if current:
        lines.append(" ".join(current))
    return lines
--- a/video_render/init.py
+++ b/video_render/init.py
@@ -0,0 +1,4 @@
 """
 Core package for the revamped video rendering pipeline.
 """
--- a/video_render/pycache/init.cpython-39.pyc
+++ b/video_render/pycache/init.cpython-39.pyc
--- a/video_render/pycache/config.cpython-39.pyc
+++ b/video_render/pycache/config.cpython-39.pyc
--- a/video_render/pycache/ffmpeg.cpython-39.pyc
+++ b/video_render/pycache/ffmpeg.cpython-39.pyc
--- a/video_render/pycache/llm.cpython-39.pyc
+++ b/video_render/pycache/llm.cpython-39.pyc
--- a/video_render/pycache/logging_utils.cpython-39.pyc
+++ b/video_render/pycache/logging_utils.cpython-39.pyc
--- a/video_render/pycache/media.cpython-39.pyc
+++ b/video_render/pycache/media.cpython-39.pyc
--- a/video_render/pycache/messaging.cpython-39.pyc
+++ b/video_render/pycache/messaging.cpython-39.pyc
--- a/video_render/pycache/pipeline.cpython-39.pyc
+++ b/video_render/pycache/pipeline.cpython-39.pyc
--- a/video_render/pycache/rendering.cpython-39.pyc
+++ b/video_render/pycache/rendering.cpython-39.pyc
--- a/video_render/pycache/transcription.cpython-39.pyc
+++ b/video_render/pycache/transcription.cpython-39.pyc
--- a/video_render/pycache/utils.cpython-39.pyc
+++ b/video_render/pycache/utils.cpython-39.pyc
--- a/video_render/config.py
+++ b/video_render/config.py
@@ -0,0 +1,103 @@
 from __future__ import annotations
 import os
 from dataclasses import dataclass
 from pathlib import Path
 BASE_DIR = Path(__file__).resolve().parent.parent
 VIDEOS_ROOT = BASE_DIR / "videos"
 OUTPUTS_ROOT = BASE_DIR / "outputs"
 TEMP_ROOT = BASE_DIR / "temp"
@dataclass(frozen=True)
 class RabbitMQSettings:
    host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq")
    port: int = int(os.environ.get("RABBITMQ_PORT", 5672))
    user: str = os.environ.get("RABBITMQ_USER", "admin")
    password: str = os.environ.get("RABBITMQ_PASS", "")
    consume_queue: str = os.environ.get("RABBITMQ_QUEUE", "to-render")
    publish_queue: str = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
    prefetch_count: int = int(os.environ.get("RABBITMQ_PREFETCH", 1))
    heartbeat: int = int(os.environ.get("RABBITMQ_HEARTBEAT", 60))
    blocked_timeout: int = int(os.environ.get("RABBITMQ_BLOCKED_TIMEOUT", 300))
@dataclass(frozen=True)
 class GeminiSettings:
    api_key: str = os.environ.get("GEMINI_API_KEY", "")
    model: str = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro-latest")
    safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS")
    temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2))
    top_k: int | None = (
        int(os.environ["GEMINI_TOP_K"]) if os.environ.get("GEMINI_TOP_K") else None
    )
    top_p: float | None = (
        float(os.environ["GEMINI_TOP_P"]) if os.environ.get("GEMINI_TOP_P") else None
    )
    prompt_path: str = os.environ.get("GEMINI_PROMPT_PATH", "prompts/generate.txt")
@dataclass(frozen=True)
 class OpenRouterSettings:
    api_key: str = os.environ.get("OPENROUTER_API_KEY", "")
    model: str = os.environ.get(
        "OPENROUTER_MODEL", "anthropic/claude-3-haiku:beta"
    )
    temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6))
    max_output_tokens: int = int(os.environ.get("OPENROUTER_MAX_OUTPUT_TOKENS", 256))
@dataclass(frozen=True)
 class WhisperSettings:
    model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium")
    device: str | None = os.environ.get("FASTER_WHISPER_DEVICE")
    compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE")
    download_root: Path = Path(
        os.environ.get("FASTER_WHISPER_DOWNLOAD_ROOT", str(BASE_DIR / ".whisper"))
    )
@dataclass(frozen=True)
 class RenderingSettings:
    frame_width: int = int(os.environ.get("RENDER_WIDTH", 1080))
    frame_height: int = int(os.environ.get("RENDER_HEIGHT", 1920))
    fps: int = int(os.environ.get("RENDER_FPS", 30))
    video_codec: str = os.environ.get("RENDER_CODEC", "libx264")
    audio_codec: str = os.environ.get("RENDER_AUDIO_CODEC", "aac")
    bitrate: str = os.environ.get("RENDER_BITRATE", "5000k")
    preset: str = os.environ.get("RENDER_PRESET", "faster")
    highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#FFD200")
    base_color: str = os.environ.get("SUBTITLE_BASE_COLOR", "#FFFFFF")
    font_path: Path = Path(os.environ.get("RENDER_FONT_PATH", "./Montserrat.ttf"))
    title_font_size: int = int(os.environ.get("RENDER_TITLE_FONT_SIZE", 110))
    subtitle_font_size: int = int(os.environ.get("RENDER_SUBTITLE_FONT_SIZE", 64))
    caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 3))
    caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 4))
@dataclass(frozen=True)
 class Settings:
    rabbitmq: RabbitMQSettings = RabbitMQSettings()
    gemini: GeminiSettings = GeminiSettings()
    openrouter: OpenRouterSettings = OpenRouterSettings()
    whisper: WhisperSettings = WhisperSettings()
    rendering: RenderingSettings = RenderingSettings()
    videos_dir: Path = VIDEOS_ROOT
    outputs_dir: Path = OUTPUTS_ROOT
    temp_dir: Path = TEMP_ROOT
 def load_settings() -> Settings:
    settings = Settings()
    if not settings.rabbitmq.password:
        raise RuntimeError("RABBITMQ_PASS must be provided")
    settings.videos_dir.mkdir(parents=True, exist_ok=True)
    settings.outputs_dir.mkdir(parents=True, exist_ok=True)
    settings.temp_dir.mkdir(parents=True, exist_ok=True)
    return settings
--- a/video_render/ffmpeg.py
+++ b/video_render/ffmpeg.py
@@ -0,0 +1,54 @@
 from __future__ import annotations
 import logging
 import shlex
 import subprocess
 from pathlib import Path
 from typing import Sequence
 logger = logging.getLogger(__name__)
 def _run_ffmpeg(args: Sequence[str]) -> None:
    cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", *args]
    logger.debug("Executando ffmpeg: %s", " ".join(shlex.quote(part) for part in cmd))
    completed = subprocess.run(cmd, check=False)
    if completed.returncode != 0:
        raise RuntimeError(f"ffmpeg falhou com exit code {completed.returncode}")
 def extract_audio_to_wav(input_video: Path, output_wav: Path) -> Path:
    _run_ffmpeg(
        [
            "-y",
            "-i",
            str(input_video),
            "-ac",
            "1",
            "-ar",
            "16000",
            "-vn",
            str(output_wav),
        ]
    )
    return output_wav
 def create_video_segment(input_video: Path, start: float, end: float, output_path: Path) -> Path:
    duration = max(0.01, end - start)
    _run_ffmpeg(
        [
            "-y",
            "-i",
            str(input_video),
            "-ss",
            f"{start:.3f}",
            "-t",
            f"{duration:.3f}",
            "-c",
            "copy",
            str(output_path),
        ]
    )
    return output_path
--- a/video_render/llm.py
+++ b/video_render/llm.py
@@ -0,0 +1,187 @@
 from __future__ import annotations
 import json
 import logging
 from pathlib import Path
 from typing import Dict, List
 import requests
 from .config import BASE_DIR, Settings
 from .transcription import TranscriptionResult
 logger = logging.getLogger(__name__)
 GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
 OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
 class GeminiHighlighter:
    def __init__(self, settings: Settings) -> None:
        if not settings.gemini.api_key:
            raise RuntimeError("GEMINI_API_KEY nao foi definido")
        prompt_path = Path(settings.gemini.prompt_path)
        if not prompt_path.is_absolute():
            prompt_path = BASE_DIR / prompt_path
        if not prompt_path.exists():
            raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}")
        self.prompt_template = prompt_path.read_text(encoding="utf-8")
        self.settings = settings
    def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
        payload = {
            "transcript": transcription.full_text,
            "segments": [
                {
                    "start": segment.start,
                    "end": segment.end,
                    "text": segment.text,
                }
                for segment in transcription.segments
            ],
        }
        body = {
            "contents": [
                {
                    "role": "user",
                    "parts": [
                        {"text": self.prompt_template},
                        {"text": json.dumps(payload, ensure_ascii=False)},
                    ],
                }
            ]
        }
        if self.settings.gemini.temperature is not None:
            body["generationConfig"] = {
                "temperature": self.settings.gemini.temperature,
            }
            if self.settings.gemini.top_p is not None:
                body["generationConfig"]["topP"] = self.settings.gemini.top_p
            if self.settings.gemini.top_k is not None:
                body["generationConfig"]["topK"] = self.settings.gemini.top_k
        url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
        params = {"key": self.settings.gemini.api_key}
        response = requests.post(url, params=params, json=body, timeout=120)
        response.raise_for_status()
        data = response.json()
        candidates = data.get("candidates") or []
        if not candidates:
            raise RuntimeError("Gemini nao retornou candidatos")
        text_parts = candidates[0].get("content", {}).get("parts", [])
        if not text_parts:
            raise RuntimeError("Resposta do Gemini sem conteudo")
        raw_text = text_parts[0].get("text")
        if not raw_text:
            raise RuntimeError("Resposta do Gemini sem texto")
        parsed = self._extract_json(raw_text)
        highlights = parsed.get("highlights")
        if not isinstance(highlights, list):
            raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
        return highlights
    @staticmethod
    def _extract_json(response_text: str) -> Dict:
        try:
            return json.loads(response_text)
        except json.JSONDecodeError:
            start = response_text.find("{")
            end = response_text.rfind("}")
            if start == -1 or end == -1:
                raise
            subset = response_text[start : end + 1]
            return json.loads(subset)
 class OpenRouterCopywriter:
    def __init__(self, settings: Settings) -> None:
        if not settings.openrouter.api_key:
            raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
        self.settings = settings
    def generate_titles(self, highlights: List[Dict]) -> List[str]:
        if not highlights:
            return []
        prompt = (
            "Voce e um copywriter especializado em titulos curtos e virais para reels.\n"
            "Recebera uma lista de trechos destacados de um video com resumo e tempo.\n"
            "Produza um titulo envolvente (ate 60 caracteres) para cada item.\n"
            "Responda apenas em JSON com a seguinte estrutura:\n"
            '{"titles": ["titulo 1", "titulo 2"]}\n'
            "Titulos devem ser em portugues, usar verbos fortes e refletir o resumo."
        )
        user_payload = {
            "highlights": [
                {
                    "start": item.get("start"),
                    "end": item.get("end"),
                    "summary": item.get("summary"),
                }
                for item in highlights
            ]
        }
        body = {
            "model": self.settings.openrouter.model,
            "temperature": self.settings.openrouter.temperature,
            "max_tokens": self.settings.openrouter.max_output_tokens,
            "messages": [
                {"role": "system", "content": prompt},
                {
                    "role": "user",
                    "content": json.dumps(user_payload, ensure_ascii=False),
                },
            ],
        }
        headers = {
            "Authorization": f"Bearer {self.settings.openrouter.api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://localhost",
            "X-Title": "video-render-pipeline",
        }
        response = requests.post(
            OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120
        )
        response.raise_for_status()
        data = response.json()
        choices = data.get("choices") or []
        if not choices:
            raise RuntimeError("OpenRouter nao retornou escolhas")
        message = choices[0].get("message", {}).get("content")
        if not message:
            raise RuntimeError("Resposta do OpenRouter sem conteudo")
        parsed = self._extract_json(message)
        titles = parsed.get("titles")
        if not isinstance(titles, list):
            raise ValueError("Resposta do OpenRouter invalida: campo 'titles'")
        return [str(title) for title in titles]
    @staticmethod
    def _extract_json(response_text: str) -> Dict:
        try:
            return json.loads(response_text)
        except json.JSONDecodeError:
            start = response_text.find("{")
            end = response_text.rfind("}")
            if start == -1 or end == -1:
                raise
            subset = response_text[start : end + 1]
            return json.loads(subset)
--- a/video_render/logging_utils.py
+++ b/video_render/logging_utils.py
@@ -0,0 +1,13 @@
 from __future__ import annotations
 import logging
 import os
 def setup_logging() -> None:
    log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    )
--- a/video_render/media.py
+++ b/video_render/media.py
@@ -0,0 +1,64 @@
 from __future__ import annotations
 import logging
 import shutil
 from dataclasses import dataclass
 from pathlib import Path
 from .config import Settings
 from .ffmpeg import extract_audio_to_wav
 from .utils import ensure_workspace, remove_paths, sanitize_filename
 logger = logging.getLogger(__name__)
@dataclass
 class VideoWorkspace:
    original_filename: str
    sanitized_name: str
    workspace_dir: Path
    output_dir: Path
    source_path: Path
    working_video_path: Path
    audio_path: Path
 class MediaPreparer:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
    def prepare(self, filename: str) -> VideoWorkspace:
        source_path = self.settings.videos_dir / filename
        if not source_path.exists():
            raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {source_path}")
        sanitized_name = sanitize_filename(Path(filename).stem)
        workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name)
        existing_children = list(workspace_dir.iterdir())
        if existing_children:
            logger.info("Limpando workspace existente para %s", sanitized_name)
            remove_paths(existing_children)
        destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
        working_video_path = workspace_dir / destination_name
        shutil.copy2(source_path, working_video_path)
        logger.info("Cópia do vídeo criada em %s", working_video_path)
        output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
        existing_outputs = list(output_dir.iterdir())
        if existing_outputs:
            remove_paths(existing_outputs)
        audio_path = workspace_dir / "audio.wav"
        extract_audio_to_wav(working_video_path, audio_path)
        return VideoWorkspace(
            original_filename=filename,
            sanitized_name=sanitized_name,
            workspace_dir=workspace_dir,
            output_dir=output_dir,
            source_path=source_path,
            working_video_path=working_video_path,
            audio_path=audio_path,
        )
--- a/video_render/messaging.py
+++ b/video_render/messaging.py
@@ -0,0 +1,85 @@
 from __future__ import annotations
 import json
 import logging
 from typing import Any, Callable, Dict
 import pika
 from .config import Settings
 logger = logging.getLogger(__name__)
 MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
 class RabbitMQWorker:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self._params = pika.ConnectionParameters(
            host=settings.rabbitmq.host,
            port=settings.rabbitmq.port,
            credentials=pika.PlainCredentials(
                settings.rabbitmq.user, settings.rabbitmq.password
            ),
            heartbeat=settings.rabbitmq.heartbeat,
            blocked_connection_timeout=settings.rabbitmq.blocked_timeout,
        )
    def consume_forever(self, handler: MessageHandler) -> None:
        while True:
            try:
                with pika.BlockingConnection(self._params) as connection:
                    channel = connection.channel()
                    channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True)
                    channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True)
                    channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count)
                    def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body):
                        try:
                            message = json.loads(body)
                        except json.JSONDecodeError:
                            logger.error("Mensagem inválida recebida: %s", body)
                            ch.basic_ack(delivery_tag=method.delivery_tag)
                            return
                        logger.info("Mensagem recebida: %s", message.get("filename", "<sem_nome>"))
                        try:
                            response = handler(message)
                        except Exception:
                            logger.exception("Erro não tratado durante o processamento")
                            response = {
                                "hasError": True,
                                "error": "Erro não tratado no pipeline",
                                "filename": message.get("filename"),
                                "videoId": message.get("videoId"),
                                "url": message.get("url"),
                                "processedFiles": [],
                            }
                        try:
                            payload = json.dumps(response)
                            ch.basic_publish(
                                exchange="",
                                routing_key=self.settings.rabbitmq.publish_queue,
                                body=payload,
                                properties=pika.BasicProperties(delivery_mode=2),
                            )
                            logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue)
                        except Exception:
                            logger.exception("Falha ao publicar a resposta na fila de upload")
                        finally:
                            ch.basic_ack(delivery_tag=method.delivery_tag)
                    channel.basic_consume(
                        queue=self.settings.rabbitmq.consume_queue,
                        on_message_callback=_on_message,
                        auto_ack=False,
                    )
                    logger.info("Consumidor iniciado. Aguardando mensagens...")
                    channel.start_consuming()
            except pika.exceptions.AMQPConnectionError:
                logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...")
            except KeyboardInterrupt:
                logger.info("Encerrando consumidor por interrupção do usuário.")
                break
--- a/video_render/pipeline.py
+++ b/video_render/pipeline.py
@@ -0,0 +1,236 @@
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from .config import Settings
 from .llm import GeminiHighlighter, OpenRouterCopywriter
 from .media import MediaPreparer, VideoWorkspace
 from .transcription import TranscriptionResult, TranscriptionService
 from .utils import remove_paths, sanitize_filename
 from .rendering import VideoRenderer
 logger = logging.getLogger(__name__)
@dataclass
 class JobMessage:
    filename: str
    url: Optional[str]
    video_id: Optional[str]
    extras: Dict[str, Any] = field(default_factory=dict)
@dataclass
 class HighlightWindow:
    start: float
    end: float
    summary: str
    title: Optional[str] = None
@dataclass
 class RenderedClip:
    path: Path
    start: float
    end: float
    title: str
    summary: str
    index: int
@dataclass
 class PipelineContext:
    job: JobMessage
    workspace: Optional[VideoWorkspace] = None
    transcription: Optional[TranscriptionResult] = None
    highlight_windows: List[HighlightWindow] = field(default_factory=list)
    rendered_clips: List[RenderedClip] = field(default_factory=list)
 class VideoPipeline:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self.media_preparer = MediaPreparer(settings)
        self.transcriber = TranscriptionService(settings)
        self.highlighter = GeminiHighlighter(settings)
        self.copywriter = OpenRouterCopywriter(settings)
        self.renderer = VideoRenderer(settings)
    def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
        context = PipelineContext(job=self._parse_job(message))
        try:
            self._prepare_workspace(context)
            self._generate_transcription(context)
            self._determine_highlights(context)
            self._generate_titles(context)
            self._render_clips(context)
            return self._build_success_payload(context)
        except Exception as exc:
            logger.exception("Falha ao processar vídeo %s", context.job.filename)
            return self._handle_failure(context, exc)
    def _parse_job(self, message: Dict[str, Any]) -> JobMessage:
        filename = message.get("filename")
        if not filename:
            raise ValueError("Mensagem inválida: 'filename' é obrigatório")
        url = message.get("url")
        video_id = message.get("videoId") or message.get("video_id")
        extras = {
            key: value
            for key, value in message.items()
            if key not in {"filename", "url", "videoId", "video_id"}
        }
        return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras)
    def _prepare_workspace(self, context: PipelineContext) -> None:
        context.workspace = self.media_preparer.prepare(context.job.filename)
    def _generate_transcription(self, context: PipelineContext) -> None:
        if not context.workspace:
            raise RuntimeError("Workspace não preparado")
        transcription = self.transcriber.transcribe(context.workspace.audio_path)
        TranscriptionService.persist(transcription, context.workspace.workspace_dir)
        context.transcription = transcription
    def _determine_highlights(self, context: PipelineContext) -> None:
        if not context.transcription:
            raise RuntimeError("Transcricao nao disponivel")
        highlights_raw = self.highlighter.generate_highlights(context.transcription)
        windows: List[HighlightWindow] = []
        for item in highlights_raw:
            try:
                start = float(item.get("start", 0))  # type: ignore[arg-type]
                end = float(item.get("end", start))  # type: ignore[arg-type]
            except (TypeError, ValueError):
                logger.warning("Highlight invalido ignorado: %s", item)
                continue
            summary = str(item.get("summary", "")).strip()
            if end <= start:
                logger.debug("Highlight com intervalo invalido ignorado: %s", item)
                continue
            windows.append(HighlightWindow(start=start, end=end, summary=summary))
        if not windows:
            last_end = (
                context.transcription.segments[-1].end
                if context.transcription.segments
                else 0
            )
            windows.append(
                HighlightWindow(
                    start=0.0,
                    end=max(last_end, 10.0),
                    summary="Sem destaque identificado; fallback automatico.",
                )
            )
        context.highlight_windows = windows
    def _generate_titles(self, context: PipelineContext) -> None:
        if not context.highlight_windows:
            return
        highlight_dicts = [
            {"start": window.start, "end": window.end, "summary": window.summary}
            for window in context.highlight_windows
        ]
        titles = self.copywriter.generate_titles(highlight_dicts)
        for window, title in zip(context.highlight_windows, titles):
            window.title = title.strip()
    def _render_clips(self, context: PipelineContext) -> None:
        if not context.workspace or not context.highlight_windows or not context.transcription:
            return
        titles = [
            window.title or window.summary for window in context.highlight_windows
        ]
        render_results = self.renderer.render(
            workspace_path=str(context.workspace.working_video_path),
            highlight_windows=context.highlight_windows,
            transcription=context.transcription,
            titles=titles,
            output_dir=context.workspace.output_dir,
        )
        context.rendered_clips = [
            RenderedClip(
                path=Path(path),
                start=start,
                end=end,
                title=title,
                summary=summary,
                index=index,
            )
            for path, start, end, title, summary, index in render_results
        ]
    def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]:
        return {
            "hasError": False,
            "videosProcessedQuantity": len(context.rendered_clips),
            "filename": context.job.filename,
            "videoId": context.job.video_id,
            "url": context.job.url,
            "workspaceFolder": context.workspace.sanitized_name if context.workspace else None,
            "outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None,
            "processedFiles": [
                {
                    "path": self._relative_path(clip.path),
                    "start": clip.start,
                    "end": clip.end,
                    "title": clip.title,
                    "summary": clip.summary,
                    "clipIndex": clip.index,
                }
                for clip in context.rendered_clips
            ],
        }
    def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]:
        logger.error("Erro no pipeline: %s", exc)
        cleanup_targets: List[Path] = []
        if context.workspace:
            cleanup_targets.append(context.workspace.workspace_dir)
            cleanup_targets.append(context.workspace.output_dir)
            original_path = context.workspace.source_path
            if original_path.exists():
                cleanup_targets.append(original_path)
        else:
            sanitized = sanitize_filename(Path(context.job.filename).stem)
            job_output_dir = self.settings.outputs_dir / sanitized
            if job_output_dir.exists():
                cleanup_targets.append(job_output_dir)
            original_path = self.settings.videos_dir / context.job.filename
            if original_path.exists():
                cleanup_targets.append(original_path)
        remove_paths(cleanup_targets)
        return {
            "hasError": True,
            "error": str(exc),
            "filename": context.job.filename,
            "videoId": context.job.video_id,
            "url": context.job.url,
            "processedFiles": [],
        }
    def _relative_path(self, path: Path) -> str:
        base = self.settings.videos_dir.parent
        try:
            return str(path.relative_to(base))
        except ValueError:
            return str(path)
--- a/video_render/rendering.py
+++ b/video_render/rendering.py
@@ -0,0 +1,406 @@
 from __future__ import annotations
 import logging
 import math
 import re
 from dataclasses import dataclass
 from typing import Iterable, List, Sequence, Tuple
 import numpy as np
 from moviepy.editor import (
    ColorClip,
    CompositeVideoClip,
    ImageClip,
    TextClip,
    VideoFileClip,
 )
 from PIL import Image, ImageColor, ImageDraw, ImageFont
 from .config import Settings
 from .transcription import TranscriptionResult, WordTiming
 logger = logging.getLogger(__name__)
 def clamp_time(value: float, minimum: float = 0.0) -> float:
    return max(minimum, float(value))
@dataclass
 class CaptionClipSet:
    base: ImageClip
    highlights: List[ImageClip]
 class CaptionBuilder:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self.font_path = settings.rendering.font_path
        if not self.font_path.exists():
            raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}")
        self.font = ImageFont.truetype(
            str(self.font_path), settings.rendering.subtitle_font_size
        )
        self.base_color = ImageColor.getrgb(settings.rendering.base_color)
        self.highlight_color = ImageColor.getrgb(settings.rendering.highlight_color)
        self.canvas_width = settings.rendering.frame_width - 160
        self.canvas_height = int(settings.rendering.subtitle_font_size * 2.2)
        self.min_words = settings.rendering.caption_min_words
        self.max_words = settings.rendering.caption_max_words
        bbox = self.font.getbbox("Ay")
        self.text_height = bbox[3] - bbox[1]
        self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1]
        self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0]
    def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]:
        grouped = self._group_words(words)
        clip_sets: List[CaptionClipSet] = []
        for group in grouped:
            group_start = clamp_time(group[0].start, minimum=clip_start)
            group_end = clamp_time(group[-1].end, minimum=group_start + 0.05)
            duration = max(0.05, group_end - group_start)
            start_offset = group_start - clip_start
            base_image, highlight_images = self._render_group(group)
            base_clip = (
                ImageClip(np.array(base_image))
                .with_start(start_offset)
                .with_duration(duration)
            )
            highlight_clips: List[ImageClip] = []
            for word, image in zip(group, highlight_images):
                h_start = clamp_time(word.start, minimum=clip_start) - clip_start
                h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start
                h_duration = max(0.05, h_end - h_start)
                highlight_clip = (
                    ImageClip(np.array(image))
                    .with_start(h_start)
                    .with_duration(h_duration)
                )
                highlight_clips.append(highlight_clip)
            clip_sets.append(CaptionClipSet(base=base_clip, highlights=highlight_clips))
        return clip_sets
    def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]:
        texts = [self._clean_word(word.word) for word in group]
        widths = []
        for text in texts:
            bbox = self.font.getbbox(text)
            widths.append(bbox[2] - bbox[0])
        total_width = sum(widths)
        if len(widths) > 1:
            total_width += self.space_width * (len(widths) - 1)
        start_x = max(0, (self.canvas_width - total_width) // 2)
        base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0))
        base_draw = ImageDraw.Draw(base_image)
        highlight_images: List[Image.Image] = []
        x = start_x
        for text, width in zip(texts, widths):
            base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color)
            highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
            highlight_draw = ImageDraw.Draw(highlight_image)
            highlight_draw.text(
                (x, self.baseline), text, font=self.font, fill=self.highlight_color
            )
            highlight_images.append(highlight_image)
            x += width + self.space_width
        return base_image, highlight_images
    def _group_words(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]:
        if not words:
            return []
        grouped: List[List[WordTiming]] = []
        buffer: List[WordTiming] = []
        for word in words:
            buffer.append(word)
            if len(buffer) == self.max_words:
                grouped.append(buffer)
                buffer = []
        if buffer:
            if len(buffer) == 1 and grouped:
                grouped[-1].extend(buffer)
            else:
                grouped.append(buffer)
        # Rebalance groups to respect minimum size when possible
        for idx, group in enumerate(grouped[:-1]):
            if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words:
                deficit = self.min_words - len(group)
                transfer = grouped[idx + 1][:deficit]
                grouped[idx] = group + transfer
                grouped[idx + 1] = grouped[idx + 1][deficit:]
        grouped = [grp for grp in grouped if grp]
        return grouped
    @staticmethod
    def _clean_word(text: str) -> str:
        text = text.strip()
        text = re.sub(r"\s+", " ", text)
        return text or "..."
 class VideoRenderer:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self.captions = CaptionBuilder(settings)
    def render(
        self,
        workspace_path: str,
        highlight_windows: Sequence,
        transcription: TranscriptionResult,
        titles: Sequence[str],
        output_dir,
    ) -> List[Tuple[str, float, float, str, str, int]]:
        results: List[Tuple[str, float, float, str, str, int]] = []
        with VideoFileClip(workspace_path) as base_clip:
            video_duration = base_clip.duration or 0
            for index, window in enumerate(highlight_windows, start=1):
                start = clamp_time(window.start)
                end = clamp_time(window.end)
                start = min(start, video_duration)
                end = min(end, video_duration)
                if end <= start:
                    logger.info("Janela ignorada por intervalo invalido: %s", window)
                    continue
                subclip = base_clip.subclipped(start, end)
                try:
                    rendered_path = self._render_single_clip(
                        subclip=subclip,
                        start=start,
                        end=end,
                        title=titles[index - 1] if index - 1 < len(titles) else window.summary,
                        summary=window.summary,
                        index=index,
                        transcription=transcription,
                        output_dir=output_dir,
                    )
                finally:
                    subclip.close()
                results.append(
                    (
                        rendered_path,
                        float(start),
                        float(end),
                        titles[index - 1] if index - 1 < len(titles) else window.summary,
                        window.summary,
                        index,
                    )
                )
        return results
    def _render_single_clip(
        self,
        subclip: VideoFileClip,
        start: float,
        end: float,
        title: str,
        summary: str,
        index: int,
        transcription: TranscriptionResult,
        output_dir,
    ) -> str:
        duration = end - start
        frame_w = self.settings.rendering.frame_width
        frame_h = self.settings.rendering.frame_height
        top_h = int(frame_h * 0.18)
        bottom_h = int(frame_h * 0.20)
        video_area_h = frame_h - top_h - bottom_h
        scale_factor = min(
            frame_w / subclip.w,
            video_area_h / subclip.h,
        )
        resized_clip = subclip.resized(scale_factor)
        video_y = top_h + (video_area_h - resized_clip.h) // 2
        video_clip = resized_clip.with_position(
            ((frame_w - resized_clip.w) // 2, video_y)
        )
        background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration)
        top_panel = (
            ColorClip(size=(frame_w, top_h), color=(12, 12, 12))
            .with_duration(duration)
            .with_opacity(0.85)
        )
        bottom_panel = (
            ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12))
            .with_position((0, frame_h - bottom_h))
            .with_duration(duration)
            .with_opacity(0.85)
        )
        title_text = title or summary
        wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160)
        title_clip = (
            TextClip(
                text=wrapped_title,
                font=str(self.settings.rendering.font_path),
                font_size=self.settings.rendering.title_font_size,
                color=self.settings.rendering.base_color,
                method="caption",
                size=(frame_w - 160, top_h - 40),
            )
            .with_duration(duration)
        )
        title_clip = title_clip.with_position(
            ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
        )
        words = self._collect_words(transcription, start, end)
        caption_sets = self.captions.build(words, clip_start=start)
        caption_clips = []
        caption_resources: List[ImageClip] = []
        caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2
        for clip_set in caption_sets:
            base_positioned = clip_set.base.with_position(("center", caption_y))
            caption_clips.append(base_positioned)
            caption_resources.append(clip_set.base)
            for highlight in clip_set.highlights:
                positioned = highlight.with_position(("center", caption_y))
                caption_clips.append(positioned)
                caption_resources.append(highlight)
        if not caption_clips:
            fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
            caption_clips.append(
                TextClip(
                    text=fallback_text,
                    font=str(self.settings.rendering.font_path),
                    font_size=self.settings.rendering.subtitle_font_size,
                    color=self.settings.rendering.base_color,
                    method="caption",
                    size=(frame_w - 160, bottom_h - 40),
                )
                .with_duration(duration)
                .with_position(("center", caption_y))
            )
        composite = CompositeVideoClip(
            [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
            size=(frame_w, frame_h),
        )
        output_path = output_dir / f"clip_{index:02d}.mp4"
        composite.write_videofile(
            str(output_path),
            codec=self.settings.rendering.video_codec,
            audio_codec=self.settings.rendering.audio_codec,
            fps=self.settings.rendering.fps,
            bitrate=self.settings.rendering.bitrate,
            ffmpeg_params=[
                "-preset",
                self.settings.rendering.preset,
                "-pix_fmt",
                "yuv420p",
            ],
            temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
            remove_temp=True,
            threads=4,
        )
        composite.close()
        resized_clip.close()
        video_clip.close()
        title_clip.close()
        background.close()
        top_panel.close()
        bottom_panel.close()
        for clip in caption_clips:
            clip.close()
        for clip in caption_resources:
            clip.close()
        return str(output_path)
    def _collect_words(
        self, transcription: TranscriptionResult, start: float, end: float
    ) -> List[WordTiming]:
        collected: List[WordTiming] = []
        for segment in transcription.segments:
            if segment.end < start or segment.start > end:
                continue
            if segment.words:
                for word in segment.words:
                    if word.end < start or word.start > end:
                        continue
                    collected.append(
                        WordTiming(
                            start=max(start, word.start),
                            end=min(end, word.end),
                            word=word.word,
                        )
                    )
            else:
                collected.extend(self._fallback_words(segment.text, segment.start, segment.end, start, end))
        collected.sort(key=lambda w: w.start)
        return collected
    def _fallback_words(
        self,
        text: str,
        segment_start: float,
        segment_end: float,
        window_start: float,
        window_end: float,
    ) -> Iterable[WordTiming]:
        words = [w for w in re.split(r"\s+", text.strip()) if w]
        if not words:
            return []
        seg_start = max(segment_start, window_start)
        seg_end = min(segment_end, window_end)
        duration = max(0.01, seg_end - seg_start)
        step = duration / len(words)
        timings: List[WordTiming] = []
        for idx, word in enumerate(words):
            w_start = seg_start + idx * step
            w_end = min(seg_end, w_start + step)
            timings.append(WordTiming(start=w_start, end=w_end, word=word))
        return timings
    @staticmethod
    def _wrap_text(text: str, max_width: int) -> str:
        text = text.strip()
        if not text:
            return ""
        words = text.split()
        lines: List[str] = []
        current: List[str] = []
        for word in words:
            current.append(word)
            if len(" ".join(current)) > max_width // 18:
                lines.append(" ".join(current[:-1]))
                current = [current[-1]]
        if current:
            lines.append(" ".join(current))
        return "\n".join(lines)
--- a/video_render/transcription.py
+++ b/video_render/transcription.py
@@ -0,0 +1,122 @@
 from __future__ import annotations
 import json
 import logging
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional
 from faster_whisper import WhisperModel
 from .config import Settings
 logger = logging.getLogger(__name__)
@dataclass(frozen=True)
 class WordTiming:
    start: float
    end: float
    word: str
@dataclass(frozen=True)
 class TranscriptSegment:
    id: int
    start: float
    end: float
    text: str
    words: List[WordTiming]
@dataclass(frozen=True)
 class TranscriptionResult:
    segments: List[TranscriptSegment]
    full_text: str
 class TranscriptionService:
    def __init__(self, settings: Settings) -> None:
        self.settings = settings
        self._model: Optional[WhisperModel] = None
    def _load_model(self) -> WhisperModel:
        if self._model is None:
            logger.info(
                "Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)",
                self.settings.whisper.model_size,
                self.settings.whisper.device or "auto",
                self.settings.whisper.compute_type or "default",
            )
            self._model = WhisperModel(
                self.settings.whisper.model_size,
                device=self.settings.whisper.device or "auto",
                compute_type=self.settings.whisper.compute_type or "default",
                download_root=str(self.settings.whisper.download_root),
            )
        return self._model
    def transcribe(self, audio_path: Path) -> TranscriptionResult:
        model = self._load_model()
        segments, _ = model.transcribe(
            str(audio_path),
            beam_size=5,
            word_timestamps=True,
        )
        parsed_segments: List[TranscriptSegment] = []
        full_text_parts: List[str] = []
        for idx, segment in enumerate(segments):
            words = [
                WordTiming(start=w.start, end=w.end, word=w.word.strip())
                for w in segment.words or []
                if w.word.strip()
            ]
            text = segment.text.strip()
            full_text_parts.append(text)
            parsed_segments.append(
                TranscriptSegment(
                    id=idx,
                    start=segment.start,
                    end=segment.end,
                    text=text,
                    words=words,
                )
            )
        return TranscriptionResult(
            segments=parsed_segments,
            full_text=" ".join(full_text_parts).strip(),
        )
    @staticmethod
    def persist(result: TranscriptionResult, destination: Path) -> None:
        json_path = destination / "transcription.json"
        text_path = destination / "transcription.txt"
        payload = {
            "segments": [
                {
                    "id": segment.id,
                    "start": segment.start,
                    "end": segment.end,
                    "text": segment.text,
                    "words": [
                        {"start": word.start, "end": word.end, "text": word.word}
                        for word in segment.words
                    ],
                }
                for segment in result.segments
            ],
            "full_text": result.full_text,
        }
        with json_path.open("w", encoding="utf-8") as fp:
            json.dump(payload, fp, ensure_ascii=False, indent=2)
        with text_path.open("w", encoding="utf-8") as fp:
            fp.write(result.full_text)
        logger.info("Transcrição salva em %s", destination)
--- a/video_render/utils.py
+++ b/video_render/utils.py
@@ -0,0 +1,38 @@
 from __future__ import annotations
 import re
 import unicodedata
 from pathlib import Path
 from typing import Iterable
 def sanitize_filename(name: str) -> str:
    normalized = unicodedata.normalize("NFKD", name)
    ascii_text = normalized.encode("ASCII", "ignore").decode()
    ascii_text = ascii_text.lower()
    ascii_text = ascii_text.replace(" ", "_")
    ascii_text = re.sub(r"[^a-z0-9_\-\.]", "", ascii_text)
    ascii_text = re.sub(r"_+", "_", ascii_text)
    return ascii_text.strip("_") or "video"
 def ensure_workspace(root: Path, folder_name: str) -> Path:
    workspace = root / folder_name
    workspace.mkdir(parents=True, exist_ok=True)
    return workspace
 def remove_paths(paths: Iterable[Path]) -> None:
    for path in paths:
        if not path.exists():
            continue
        if path.is_file() or path.is_symlink():
            path.unlink(missing_ok=True)
        else:
            for child in sorted(path.rglob("*"), reverse=True):
                if child.is_file() or child.is_symlink():
                    child.unlink(missing_ok=True)
                elif child.is_dir():
                    child.rmdir()
            path.rmdir()
		`@@ -1 +0,0 @@`
			`"""Top-level package for the video processing pipeline."""`