Cria novos components

2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,98 +0,0 @@
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-*.manifest
-*.spec
-pip-log.txt
-pip-delete-this-directory.txt
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-*.mo
-*.pot
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-instance/
-.webassets-cache
-.scrapy
-docs/_build/
-.pybuilder/
-target/
-.ipynb_checkpoints
-profile_default/
-ipython_config.py
-
-.pdm.toml
-
-__pypackages__/
-
-celerybeat-schedule
-celerybeat.pid
-
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/   
-.spyderproject
-.spyproject
-.ropeproject
-
-/site
-
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-.pyre/
-
-.pytype/
-
-cython_debug/
-.idea/
-.vscode/
-*.code-workspace
-*.local
-*.mp4
-*.wav
-*.mp3
-*.srt
-*.vtt
-*.json
-*.csv
-*.xlsx
-*.db
-*.sqlite3
--- a/init.py
+++ b/init.py
@@ -1 +0,0 @@
-"""Top-level package for the video processing pipeline."""
--- a/pycache/llm.cpython-311.pyc
+++ b/pycache/llm.cpython-311.pyc
--- a/pycache/main.cpython-311.pyc
+++ b/pycache/main.cpython-311.pyc
--- a/pycache/render.cpython-311.pyc
+++ b/pycache/render.cpython-311.pyc
--- a/pycache/transcribe.cpython-311.pyc
+++ b/pycache/transcribe.cpython-311.pyc
--- a/pycache/utils.cpython-311.pyc
+++ b/pycache/utils.cpython-311.pyc
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,3 +1,8 @@
+# GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw"
+# YOUTUBE_API="https://totally-real-dingo.ngrok-free.app"
+# OPENROUTER_API_KEY="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"
+# OPENROUTER_MODEL="openai/gpt-oss-20b:free"
+
 services:
  video-render-new:
    restart: unless-stopped
@@ -6,19 +11,13 @@ services:
    environment:
      # RabbitMQ credentials
      - RABBITMQ_PASS=${RABBITMQ_PASS}
-      - RABBITMQ_HOST=${RABBITMQ_HOST}
-      - RABBITMQ_USER=${RABBITMQ_USER}
-      - RABBITMQ_PORT=${RABBITMQ_PORT}
-      - RABBITMQ_QUEUE=${RABBITMQ_QUEUE}
-      - RABBITMQ_UPLOAD_QUEUE=${RABBITMQ_UPLOAD_QUEUE}
-      # API keys for the LLMs
      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro}
      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
-      - OPENROUTER_MODEL=${OPENROUTER_MODEL}
-      # Optional whisper settings
-      - WHISPER_MODEL=${WHISPER_MODEL}
-      - WHISPER_DEVICE=${WHISPER_DEVICE}
-      - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE}
+      - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free}
+      - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small}
+    ports:
+      - "5000:5000"
    volumes:
      # Mount host directories into the container so that videos can be
      # provided and outputs collected. These paths can be customised when
@@ -27,9 +26,18 @@ services:
      - "/root/videos:/app/videos"
      - "/root/outputs:/app/outputs"
    command: "python -u main.py"
-    networks:
-      - dokploy-network
+    # runtime: nvidia

-networks:
-  dokploy-network:
-    external: true
+    # networks:
+    #   - dokploy-network
+
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+# networks:
+#   dokploy-network:
+#     external: true
--- a/4
+++ b/4
@@ -21,6 +21,10 @@ RUN apt-get update && \
        xdg-utils \
        wget \
        unzip \
+        ffmpeg \
+        libgomp1 \
+        libpq-dev \
+        vim \
        libmagick++-dev \
        imagemagick \
        fonts-liberation \
--- a/llm.py
+++ b/llm.py
@@ -1,234 +0,0 @@
-"""High-level helpers for interacting with the Gemini and OpenRouter APIs.
-
-This module encapsulates all of the logic needed to call the LLM endpoints
-used throughout the application. It uses the OpenAI Python client under the
-hood because both Gemini and OpenRouter expose OpenAI-compatible APIs.
-
-Two functions are exposed:
-
-* ``select_highlights`` takes an SRT-like string (the transcription of a
-  video) and returns a list of highlight objects with start and end
-  timestamps and their corresponding text. It uses the Gemini model to
-  identify which parts of the video are most likely to engage viewers on
-  social media.
-* ``generate_titles`` takes a list of highlight objects and returns a list
-  of the same objects enriched with a ``topText`` field, which contains a
-  sensational title for the clip. It uses the OpenRouter API with a model
-  specified via the ``OPENROUTER_MODEL`` environment variable.
-
-Both functions are resilient to malformed outputs from the models. They try
-to extract the first JSON array found in the model responses; if that
-fails, a descriptive exception is raised. These exceptions should be
-handled by callers to post appropriate error messages back to the queue.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-from typing import Any, Dict, List
-
-import openai
-
-
-class LLMError(Exception):
-    """Raised when the LLM response cannot be parsed into the expected format."""
-
-
-def _extract_json_array(text: str) -> Any:
-    """Extract the first JSON array from a string.
-
-    LLMs sometimes return explanatory text before or after the JSON. This
-    helper uses a regular expression to find the first substring that
-    resembles a JSON array (i.e. starts with '[' and ends with ']'). It
-    returns the corresponding Python object if successful, otherwise
-    raises a ``LLMError``.
-    """
-    # Remove Markdown code fences and other formatting noise
-    cleaned = text.replace("`", "").replace("json", "")
-    # Find the first [ ... ] block
-    match = re.search(r"\[.*\]", cleaned, re.DOTALL)
-    if not match:
-        raise LLMError("Não foi possível encontrar um JSON válido na resposta da IA.")
-    json_str = match.group(0)
-    try:
-        return json.loads(json_str)
-    except json.JSONDecodeError as exc:
-        raise LLMError(f"Erro ao decodificar JSON: {exc}")
-
-
-def select_highlights(srt_text: str) -> List[Dict[str, Any]]:
-    """Call the Gemini API to select highlight segments from a transcription.
-
-    The input ``srt_text`` should be a string containing the transcription
-    formatted like an SRT file, with lines of the form
-    ``00:00:10,140 --> 00:01:00,990`` followed by the spoken text.
-
-    Returns a list of dictionaries, each with ``start``, ``end`` and
-    ``text`` keys. On failure to parse the response, a ``LLMError`` is
-    raised.
-    """
-    api_key = os.environ.get("GEMINI_API_KEY")
-    if not api_key:
-        raise ValueError("GEMINI_API_KEY não definido no ambiente")
-
-    model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
-
-    # Initialise client for Gemini. The base_url points to the
-    # generativelanguage API; see the official docs for details.
-    client = openai.OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
-
-    # System prompt: instructs Gemini how to behave.
-    system_prompt = (
-        "Você é um assistente especializado em selecionar **HIGHLIGHTS** de vídeo "
-        "a partir da transcrição com timestamps.\n"
-        "Sua única função é **selecionar os trechos** conforme solicitado.\n"
-        "- **Não resuma, não interprete, não gere comentários ou textos complementares.**\n"
-        "- **Retorne a resposta exatamente no formato proposto pelo usuário**, sem adicionar ou remover nada além do pedido.\n"
-        "- Cada trecho selecionado deve ter **no mínimo 60 segundos e no máximo 120 segundos** de duração.\n"
-        "- Sempre responda **em português (PT-BR)**."
-    )
-
-    # Base prompt: describes how to select highlights and the format to return.
-    base_prompt = (
-        "Você assumirá o papel de um especialista em Marketing e Social Media, "
-        "sua tarefa é selecionar as melhores partes de uma transcrição que irei fornecer.\n\n"
-        "## Critérios de Seleção\n\n"
-        "- Escolha trechos baseando-se em:\n"
-        "  - **Picos de emoção ou impacto**\n"
-        "  - **Viradas de assunto**\n"
-        "  - **Punchlines** (frases de efeito, momentos de virada)\n"
-        "  - **Informações-chave**\n\n"
-        "## Regras Rápidas\n\n"
-        "- Sempre devolver pelo menos 3 trechos, não possui limite máximo\n"
-        "- Garanta que cada trecho fique com no MÍNIMO 60 segundos e no MÁXIMO 120 segundos.\n"
-        "- Nenhum outro texto além do JSON final.\n\n"
-        "## Restrições de Duração\n\n"
-        "- **Duração mínima do trecho escolhido:** 60 segundos\n"
-        "- **Duração máxima do trecho escolhido:** 90 a 120 segundos\n\n"
-        "## Tarefa\n\n"
-        "- Proponha o **máximo de trechos** com potencial, mas **sempre devolva no mínimo 3 trechos**.\n"
-        "- Extraia os trechos **apenas** da transcrição fornecida abaixo.\n\n"
-        "## IMPORTANTE\n"
-        "- Cada trecho deve ter no mínimo 60 segundos, e no máximo 120 segundos. Isso é indiscutível\n\n"
-        "## Entrada\n\n"
-        "- Transcrição:\n\n"
-        f"{srt_text}\n\n"
-        "## Saída\n\n"
-        "- Retorne **somente** a lista de trechos selecionados em formato JSON, conforme o exemplo abaixo.\n"
-        "- **Não escreva comentários ou qualquer texto extra.**\n"
-        "- No atributo \"text\", inclua o texto presente no trecho escolhido.\n\n"
-        "### Exemplo de Conversão\n\n"
-        "#### De SRT:\n"
-        "00:00:10,140 --> 00:01:00,990\n"
-        "Exemplo de escrita presente no trecho\n\n"
-        "#### Para JSON:\n"
-        "[\n"
-        "  {\n"
-        "    \"start\": \"00:00:10,140\",\n"
-        "    \"end\": \"00:01:00,990\",\n"
-        "    \"text\": \"Exemplo de escrita presente no trecho\"\n"
-        "  }\n"
-        "]\n"
-    )
-
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": base_prompt},
-    ]
-    try:
-        response = client.chat.completions.create(model=model, messages=messages)
-    except Exception as exc:
-        raise LLMError(f"Erro ao chamar a API Gemini: {exc}")
-    # Extract message content
-    content = response.choices[0].message.content if response.choices else None
-    if not content:
-        raise LLMError("A resposta da Gemini veio vazia.")
-    result = _extract_json_array(content)
-    if not isinstance(result, list):
-        raise LLMError("O JSON retornado pela Gemini não é uma lista.")
-    return result
-
-
-def generate_titles(highlights: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Call the OpenRouter API to generate a title (topText) for each highlight.
-
-    The ``highlights`` argument should be a list of dictionaries as returned
-    by ``select_highlights``, each containing ``start``, ``end`` and ``text``.
-    This function adds a ``topText`` field to each dictionary using the
-    OpenRouter model specified via the ``OPENROUTER_MODEL`` environment
-    variable. If parsing fails, an ``LLMError`` is raised.
-    """
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
-        raise ValueError("OPENROUTER_API_KEY não definido no ambiente")
-    model = os.environ.get("OPENROUTER_MODEL")
-    if not model:
-        raise ValueError("OPENROUTER_MODEL não definido no ambiente")
-    # Create client for OpenRouter
-    client = openai.OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1")
-
-    # Compose prompt: instruct to generate titles only
-    prompt_header = (
-        "Você é um especialista em Marketing Digital e Criação de Conteúdo Viral.\n\n"
-        "Sua tarefa é criar **títulos sensacionalistas** (*topText*) para cada trecho "
-        "de transcrição recebido em formato JSON.\n\n"
-        "## Instruções\n\n"
-        "- O texto deve ser **chamativo, impactante** e com alto potencial de viralização "
-        "em redes sociais, **mas sem sair do contexto do trecho**.\n"
-        "- Use expressões fortes e curiosas, mas **nunca palavras de baixo calão**.\n"
-        "- Cada *topText* deve ter **no máximo 2 linhas**.\n"
-        "- Utilize **exclusivamente** o conteúdo do trecho; não invente fatos.\n"
-        "- Não adicione comentários, explicações, ou qualquer texto extra na resposta.\n"
-        "- Responda **apenas** no seguinte formato (mantendo as chaves e colchetes):\n\n"
-        "[\n  {\n    \"start\": \"00:00:10,140\",\n    \"end\": \"00:01:00,990\",\n    \"topText\": \"Título impactante\"\n  }\n]\n\n"
-        "## Observações:\n\n"
-        "- Nunca fuja do contexto do trecho.\n"
-        "- Não invente informações.\n"
-        "- Não utilize palavrões.\n"
-        "- Não escreva nada além do JSON de saída.\n\n"
-        "Aqui estão os trechos em JSON:\n"
-    )
-    # Compose input JSON for the model
-    json_input = json.dumps(highlights, ensure_ascii=False)
-    full_message = prompt_header + json_input
-    messages = [
-        {
-            "role": "system",
-            "content": "Você é um assistente útil e objetivo."
-        },
-        {
-            "role": "user",
-            "content": full_message
-        },
-    ]
-    try:
-        response = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=0.7,
-        )
-    except Exception as exc:
-        raise LLMError(f"Erro ao chamar a API OpenRouter: {exc}")
-    content = response.choices[0].message.content if response.choices else None
-    if not content:
-        raise LLMError("A resposta da OpenRouter veio vazia.")
-    result = _extract_json_array(content)
-    if not isinstance(result, list):
-        raise LLMError("O JSON retornado pela OpenRouter não é uma lista.")
-    # Merge topText back into highlights
-    # We assume the result list has the same order and length as input highlights
-    enriched: List[Dict[str, Any]] = []
-    input_map = {(item["start"], item["end"]): item for item in highlights}
-    for item in result:
-        key = (item.get("start"), item.get("end"))
-        original = input_map.get(key)
-        if original is None:
-            # If the model returns unexpected entries, skip them
-            continue
-        enriched_item = original.copy()
-        # Only topText is expected
-        enriched_item["topText"] = item.get("topText", "").strip()
-        enriched.append(enriched_item)
-    return enriched
--- a/main.py
+++ b/main.py
@@ -1,265 +1,16 @@
-"""Entry point for the video processing pipeline.
-
-This script listens to a RabbitMQ queue for new video processing tasks. When
-a message arrives, it performs the following steps:
-
-1. Creates a working directory for the video based off of its filename.
-2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce
-   a transcription with word-level timestamps.
-3. Uses the Gemini model to determine which parts of the video have the
-   highest potential for engagement. These highlight segments are
-   represented as a list of objects containing start/end timestamps and
-   text.
-4. Uses the OpenRouter model to generate a sensational title for each
-   highlight. Only the ``topText`` field is kept; the description is
-   intentionally omitted since the caption will be burned into the video.
-5. Cuts the original video into individual clips corresponding to each
-   highlight and renders them vertically with a title above and a dynamic
-   caption below.
-6. Publishes a message to the upload queue with information about the
-   generated clips. On success, this message contains the list of output
-   files. On failure, ``hasError`` will be set to ``True`` and the
-   ``error`` field will describe what went wrong.
-7. Cleans up temporary files (audio, transcript, working directory) and
-   deletes the original source video from the ``videos`` directory to
-   conserve disk space.
-
-The queue names and RabbitMQ credentials are configured via environment
-variables. See the accompanying ``docker-compose.yml`` for defaults.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import shutil
-import time
-import traceback
-from typing import Any, Dict, List
-
-import pika
-
-from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds
-from .transcribe import transcribe
-from .llm import LLMError, select_highlights, generate_titles
-from .render import render_clip
+from video_render.config import load_settings
+from video_render.logging_utils import setup_logging
+from video_render.messaging import RabbitMQWorker
+from video_render.pipeline import VideoPipeline


-# Environment variables with sensible defaults
-RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq")
-RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672))
-RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin")
-RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS")
-RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render")
-RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
+def main() -> None:
+    setup_logging()
+    settings = load_settings()

-if not RABBITMQ_PASS:
-    raise RuntimeError("RABBITMQ_PASS não definido no ambiente")
-
-
-def get_next_message() -> Any:
-    """Retrieve a single message from the RABBITMQ_QUEUE.
-
-    Returns ``None`` if no messages are available. This helper opens a new
-    connection for each call to avoid keeping stale connections alive.
-    """
-    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
-    parameters = pika.ConnectionParameters(
-        host=RABBITMQ_HOST,
-        port=RABBITMQ_PORT,
-        credentials=credentials,
-        heartbeat=60,
-        blocked_connection_timeout=300,
-    )
-    connection = pika.BlockingConnection(parameters)
-    channel = connection.channel()
-    method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE)
-    if method_frame:
-        channel.basic_ack(method_frame.delivery_tag)
-        connection.close()
-        return body
-    connection.close()
-    return None
-
-
-def publish_to_queue(payload: Dict[str, Any]) -> None:
-    """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE."""
-    credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
-    parameters = pika.ConnectionParameters(
-        host=RABBITMQ_HOST,
-        port=RABBITMQ_PORT,
-        credentials=credentials,
-        heartbeat=60,
-        blocked_connection_timeout=300,
-    )
-    connection = pika.BlockingConnection(parameters)
-    channel = connection.channel()
-    channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True)
-    channel.basic_publish(
-        exchange="",
-        routing_key=RABBITMQ_UPLOAD_QUEUE,
-        body=json.dumps(payload),
-        properties=pika.BasicProperties(delivery_mode=2),
-    )
-    connection.close()
-
-
-def build_srt(segments: List[Dict[str, Any]]) -> str:
-    """Build an SRT-like string from a list of segments.
-
-    Each segment should have ``start``, ``end`` and ``text`` fields. The
-    timestamps are converted to the ``HH:MM:SS,mmm`` format expected by
-    the Gemini prompt. Segments are separated by a blank line.
-    """
-    lines = []
-    for seg in segments:
-        start_ts = seconds_to_timestamp(seg["start"])
-        end_ts = seconds_to_timestamp(seg["end"])
-        lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}")
-    return "\n\n".join(lines)
-
-
-def process_message(data: Dict[str, Any]) -> Dict[str, Any]:
-    """Process a single video task described in ``data``.
-
-    Returns the payload to be sent to the upload queue. Raises an
-    exception on failure; the caller is responsible for catching it and
-    posting an error payload.
-    """
-    filename = data.get("filename")
-    if not filename:
-        raise ValueError("Campo 'filename' ausente na mensagem")
-    url = data.get("url")
-    video_id = data.get("videoId")
-    # Determine source video path; n8n stores videos in the 'videos' directory
-    video_path = os.path.join("videos", filename)
-    if not os.path.exists(video_path):
-        raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}")
-    # Sanitize the filename to use as directory name
-    base_no_ext = os.path.splitext(filename)[0]
-    sanitized = sanitize_filename(base_no_ext)
-    work_dir = os.path.join("app", "videos", sanitized)
-    # Transcribe video
-    segments, words = transcribe(video_path, work_dir)
-    # Build SRT string
-    srt_str = build_srt(segments)
-    # Call Gemini to select highlights
-    highlights = select_highlights(srt_str)
-    # Convert start/end times to floats and keep original strings for openrouter
-    for item in highlights:
-        item["start"] = item["start"].strip()
-        item["end"] = item["end"].strip()
-    # Generate titles
-    titles = generate_titles(highlights)
-    # Render clips
-    output_dir = os.path.join("outputs", sanitized)
-    processed_files: List[str] = []
-    for idx, item in enumerate(titles, start=1):
-        start_sec = timestamp_to_seconds(item.get("start"))
-        end_sec = timestamp_to_seconds(item.get("end"))
-        # Extract relative words for caption
-        relative_words = []
-        for w in words:
-            # Word must overlap clip interval
-            if w["end"] <= start_sec or w["start"] >= end_sec:
-                continue
-            rel_start = max(0.0, w["start"] - start_sec)
-            rel_end = max(0.0, w["end"] - start_sec)
-            relative_words.append({
-                "start": rel_start,
-                "end": rel_end,
-                "word": w["word"],
-            })
-        # If no words found (e.g. silence), create a dummy word to avoid errors
-        if not relative_words:
-            relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""})
-        out_path = render_clip(
-            video_path=video_path,
-            start=start_sec,
-            end=end_sec,
-            top_text=item.get("topText", ""),
-            words=relative_words,
-            out_dir=output_dir,
-            base_name=sanitized,
-            idx=idx,
-        )
-        processed_files.append(out_path)
-    # Compose payload
-    payload = {
-        "videosProcessedQuantity": len(processed_files),
-        "filename": filename,
-        "processedFiles": processed_files,
-        "url": url,
-        "videoId": video_id,
-        "hasError": False,
-        "error": None,
-    }
-    # Clean up working directory and original video
-    shutil.rmtree(work_dir, ignore_errors=True)
-    try:
-        os.remove(video_path)
-    except FileNotFoundError:
-        pass
-    return payload
-
-
-def main():
-    print(" [*] Esperando mensagens. Para sair: CTRL+C")
-    while True:
-        body = get_next_message()
-        if body is None:
-            time.sleep(5)
-            continue
-        try:
-            data = json.loads(body)
-        except Exception:
-            print("⚠️  Mensagem inválida recebida (não é JSON)")
-            continue
-        try:
-            result = process_message(data)
-        except Exception as exc:
-            # Print stack trace for debugging
-            traceback.print_exc()
-            # Attempt to clean up any directories based on filename
-            filename = data.get("filename")
-            sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else ""
-            work_dir = os.path.join("app", "videos", sanitized) if sanitized else None
-            output_dir = os.path.join("outputs", sanitized) if sanitized else None
-            # Remove working and output directories
-            if work_dir:
-                shutil.rmtree(work_dir, ignore_errors=True)
-            if output_dir:
-                shutil.rmtree(output_dir, ignore_errors=True)
-            # Remove original video if present
-            video_path = os.path.join("videos", filename) if filename else None
-            if video_path and os.path.exists(video_path):
-                try:
-                    os.remove(video_path)
-                except Exception:
-                    pass
-            # Build error payload
-            error_payload = {
-                "videosProcessedQuantity": 0,
-                "filename": filename,
-                "processedFiles": [],
-                "url": data.get("url"),
-                "videoId": data.get("videoId"),
-                "hasError": True,
-                "error": str(exc),
-            }
-            try:
-                publish_to_queue(error_payload)
-                print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
-            except Exception as publish_err:
-                print(f"Erro ao publicar mensagem de erro: {publish_err}")
-            continue
-        # On success publish payload
-        try:
-            publish_to_queue(result)
-            print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
-        except Exception as publish_err:
-            print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}")
-        # Loop continues
+    pipeline = VideoPipeline(settings)
+    worker = RabbitMQWorker(settings)
+    worker.consume_forever(pipeline.process_message)


 if __name__ == "__main__":
--- a/prompts/generate.txt
+++ b/prompts/generate.txt
@@ -0,0 +1,35 @@
+Voce e um estrategista de conteudo especializado em identificar cortes curtos de videos longos que performam bem em redes sociais.
+
+FUNCAO:
+- Analisar a transcricao completa de um video.
+- Escolher trechos curtos (entre 20s e 90s) com maior chance de engajamento.
+- Responder APENAS em JSON valido.
+
+FORMATO DA RESPOSTA:
+{
+  "highlights": [
+    {
+      "start": <segundos_inicio_float>,
+      "end": <segundos_fim_float>,
+      "summary": "Resumo conciso do porque este trecho engaja"
+    }
+  ]
+}
+
+REGRAS:
+- Liste no maximo 6 destaques.
+- Respeite a ordem cronologica.
+- Nunca deixe listas vazias; se nada for relevante, inclua uma entrada com start = 0, end = 0 e summary explicando a ausencia de cortes.
+- Utilize apenas valores numericos simples (ponto como separador decimal).
+- Nao repita um mesmo trecho.
+
+PERSPECTIVA DE ANALISE:
+- Concentre-se em momentos com gatilhos emocionais, insights, storytelling ou chamadas para acao fortes.
+- Prefira trechos com comeco, meio e fim claros.
+- Evite partes redundantes, silenciosas ou extremamente tecnicas.
+
+TAREFA:
+- Leia a transcricao recebida no campo "transcript".
+- Use a lista de marcas de tempo detalhadas no campo "segments" para embasar suas escolhas.
+- Produza a saida JSON descrita acima.
+
--- a/render.py
+++ b/render.py
@@ -1,205 +0,0 @@
-"""Rendering logic for producing vertical clips with dynamic captions.
-
-This module defines a single function ``render_clip`` which takes a video
-segment and produces a vertical clip suitable for social media. Each clip
-contains three regions:
-
-* A top region (480px high) showing a title generated by an LLM.
-* A middle region (960px high) containing the original video, scaled to
-  fit horizontally while preserving aspect ratio and centred vertically.
-* A bottom region (480px high) showing a dynamic caption. The caption
-  displays a sliding window of three to five words from the transcript,
-  colouring the currently spoken word differently to draw the viewer's
-  attention.
-
-The function uses the MoviePy library to compose the various elements and
-writes the resulting video to disk. It returns the path to the created
-file.
-"""
-
-from __future__ import annotations
-
-import os
-from typing import Dict, List
-
-import numpy as np
-from moviepy.video.io.VideoFileClip import VideoFileClip
-from moviepy.video.VideoClip import ColorClip, VideoClip
-from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
-from moviepy.video.VideoClip import TextClip
-from PIL import Image, ImageDraw, ImageFont
-
-from .utils import wrap_text
-
-
-def render_clip(
-    video_path: str,
-    start: float,
-    end: float,
-    top_text: str,
-    words: List[Dict[str, float]],
-    out_dir: str,
-    base_name: str,
-    idx: int,
-    # Use a widely available system font by default. DejaVuSans is installed
-    # in most Debian-based containers. The caller can override this path.
-    font_path: str = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
-    final_width: int = 1080,
-    final_height: int = 1920,
-    top_h: int = 480,
-    middle_h: int = 960,
-    bottom_h: int = 480,
-    video_codec: str = "libx264",
-    bitrate: str = "3000k",
-) -> str:
-    """Render a single clip with title and dynamic caption.
-
-    Parameters
-    ----------
-    video_path: str
-        Path to the source video file.
-    start: float
-        Start time of the clip in seconds.
-    end: float
-        End time of the clip in seconds.
-    top_text: str
-        The title to display in the top region.
-    words: List[Dict[str, float]]
-        List of word-level timestamps for this clip. Each dict must have
-        ``start``, ``end`` and ``word`` keys. The start and end values
-        should be relative to the beginning of this clip (i.e. start at 0).
-    out_dir: str
-        Directory where the output file should be saved. The function
-        creates this directory if it doesn't exist.
-    base_name: str
-        Base name of the original video (sanitized). Used to build the
-        output filename.
-    idx: int
-        Index of the clip. Output will be named ``clip_{idx}.mp4``.
-    font_path: str
-        Path to the TrueType font to use for both title and caption.
-    final_width: int
-        Width of the final video in pixels.
-    final_height: int
-        Height of the final video in pixels.
-    top_h: int
-        Height of the title area in pixels.
-    middle_h: int
-        Height of the video area in pixels.
-    bottom_h: int
-        Height of the caption area in pixels.
-    video_codec: str
-        FFmpeg codec to use when writing the video.
-    bitrate: str
-        Bitrate for the output video.
-
-    Returns
-    -------
-    str
-        The path to the rendered video file.
-    """
-    os.makedirs(out_dir, exist_ok=True)
-    # Extract the segment from the source video
-    with VideoFileClip(video_path) as clip:
-        segment = clip.subclip(start, end)
-        dur = segment.duration
-        # Background
-        bg = ColorClip(size=(final_width, final_height), color=(0, 0, 0), duration=dur)
-        # Resize video to fit width
-        video_resized = segment.resize(width=final_width)
-        # Compute vertical position to centre in the middle region
-        y = top_h + (middle_h - video_resized.h) // 2
-        video_resized = video_resized.set_position((0, y))
-
-        # Build title clip
-        # Wrap the title to avoid overflow
-        wrapped_lines = wrap_text(top_text, max_chars=40)
-        wrapped_title = "\n".join(wrapped_lines)
-        title_clip = TextClip(
-            wrapped_title,
-            font=font_path,
-            fontsize=70,
-            color="white",
-            method="caption",
-            size=(final_width, top_h),
-            align="center",
-        ).set_duration(dur).set_position((0, 0))
-
-        # Prepare font for caption rendering
-        pil_font = ImageFont.truetype(font_path, size=60)
-        default_color = (255, 255, 255)  # white
-        highlight_color = (255, 215, 0)  # gold-like yellow
-
-        # Precompute widths of a space and bounding box height for vertical centering
-        space_width = pil_font.getbbox(" ")[2] - pil_font.getbbox(" ")[0]
-        bbox = pil_font.getbbox("A")
-        text_height = bbox[3] - bbox[1]
-
-        def make_caption_frame(t: float):
-            """Generate an image for the caption at time t."""
-            # Determine current word index
-            idx_cur = 0
-            for i, w in enumerate(words):
-                if w["start"] <= t < w["end"]:
-                    idx_cur = i
-                    break
-                if t >= w["end"]:
-                    idx_cur = i
-            # Define window of words to display: show up to 5 words
-            start_idx = max(0, idx_cur - 2)
-            end_idx = min(len(words), idx_cur + 3)
-            window = words[start_idx:end_idx]
-            # Compute widths for each word
-            word_sizes = []
-            for w in window:
-                bbox = pil_font.getbbox(w["word"])
-                word_width = bbox[2] - bbox[0]
-                word_sizes.append(word_width)
-            total_width = sum(word_sizes) + space_width * (len(window) - 1 if window else 0)
-            # Create blank image for caption area
-            img = Image.new("RGB", (final_width, bottom_h), color=(0, 0, 0))
-            draw = ImageDraw.Draw(img)
-            x = int((final_width - total_width) / 2)
-            y_pos = int((bottom_h - text_height) / 2)
-            for j, w in enumerate(window):
-                color = highlight_color if (start_idx + j) == idx_cur else default_color
-                draw.text((x, y_pos), w["word"], font=pil_font, fill=color)
-                x += word_sizes[j] + space_width
-            return np.array(img)
-
-        caption_clip = VideoClip(make_frame=make_caption_frame, duration=dur)
-        caption_clip = caption_clip.set_position((0, final_height - bottom_h))
-
-        # Compose final clip
-        final = CompositeVideoClip([
-            bg,
-            video_resized,
-            title_clip,
-            caption_clip,
-        ], size=(final_width, final_height))
-        # Use the original audio from the video segment
-        final_audio = segment.audio
-        if final_audio is not None:
-            final = final.set_audio(final_audio)
-        # Define output path
-        out_path = os.path.join(out_dir, f"clip_{idx}.mp4")
-        # Write to disk
-        final.write_videofile(
-            out_path,
-            codec=video_codec,
-            fps=30,
-            bitrate=bitrate,
-            audio_codec="aac",
-            preset="ultrafast",
-            ffmpeg_params=[
-                "-tune", "zerolatency",
-                "-pix_fmt", "yuv420p",
-                "-profile:v", "high",
-                "-level", "4.1",
-            ],
-            threads=4,
-        )
-        # Close clips to free resources
-        final.close()
-        segment.close()
-    return out_path
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
-pika==1.3.2
-moviepy==2.0.0
-faster-whisper==1.2.0
-openai==1.16.0
-numpy==1.26.4
-Pillow==10.1.0
-unidecode==1.3.6
+moviepy==2.2.0
+pillow==10.3.0
+numpy>=1.26.0
+requests>=2.31.0
+pika>=1.3.2
+faster-whisper==1.0.0
--- a/transcribe.py
+++ b/transcribe.py
@@ -1,111 +0,0 @@
-"""Utilities for extracting audio from video and generating transcriptions.
-
-This module handles two tasks:
-
-1. Use FFMPEG to extract the audio track from a video file into a WAV file
-   suitable for consumption by the Whisper model. The audio is resampled to
-   16 kHz mono PCM as required by Whisper.
-2. Use the Faster-Whisper implementation to generate a transcription with
-   word-level timestamps. The transcription is returned both as a list of
-   segments (for building an SRT) and as a flattened list of words (for
-   building dynamic subtitles).
-
-If FFMPEG is not installed or fails, a ``RuntimeError`` is raised. The caller
-is responsible for cleaning up the temporary files created in the working
-directory.
-"""
-
-from __future__ import annotations
-
-import os
-import subprocess
-from typing import Dict, List, Tuple
-
-from faster_whisper import WhisperModel
-
-
-def extract_audio_ffmpeg(video_path: str, audio_path: str) -> None:
-    """Use FFMPEG to extract audio from ``video_path`` into ``audio_path``.
-
-    The output will be a 16 kHz mono WAV file in PCM S16LE format. Any
-    existing file at ``audio_path`` will be overwritten. If ffmpeg returns
-    a non-zero exit code, a ``RuntimeError`` is raised with the stderr.
-    """
-    cmd = [
-        "ffmpeg",
-        "-y",  # overwrite output
-        "-i",
-        video_path,
-        "-vn",  # disable video recording
-        "-acodec",
-        "pcm_s16le",
-        "-ar",
-        "16000",
-        "-ac",
-        "1",
-        audio_path,
-    ]
-    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    if proc.returncode != 0:
-        raise RuntimeError(f"FFMPEG error: {proc.stderr.decode(errors='ignore')}")
-
-
-def load_whisper_model() -> WhisperModel:
-    """Instantiate and cache a Faster-Whisper model.
-
-    The model name and device can be configured via the ``WHISPER_MODEL`` and
-    ``WHISPER_DEVICE`` environment variables. The default model is
-    ``large-v3`` for best accuracy. The device can be ``cuda`` or ``cpu``.
-    A module-level cache is used to prevent loading the model multiple times.
-    """
-    if hasattr(load_whisper_model, "_cache"):
-        return load_whisper_model._cache  # type: ignore[attr-defined]
-    model_name = os.environ.get("WHISPER_MODEL", "large-v3")
-    device = os.environ.get("WHISPER_DEVICE", "cpu")
-    # Compute type can be set via WHISPER_COMPUTE_TYPE; default to float16 on GPU
-    compute_type = os.environ.get("WHISPER_COMPUTE_TYPE")
-    # If not explicitly set, choose sensible defaults
-    if compute_type is None:
-        compute_type = "float16" if device == "cuda" else "int8"
-    model = WhisperModel(model_name, device=device, compute_type=compute_type)
-    load_whisper_model._cache = model  # type: ignore[attr-defined]
-    return model
-
-
-def transcribe(video_path: str, work_dir: str) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]:
-    """Transcribe a video file using Faster-Whisper.
-
-    ``video_path`` is the path to the video to transcribe. ``work_dir`` is a
-    directory where temporary files will be stored (audio file and
-    transcription). The function returns a tuple ``(segments, words)`` where
-    ``segments`` is a list of dictionaries with ``start``, ``end`` and
-    ``text`` fields, and ``words`` is a flat list of dictionaries with
-    ``start``, ``end`` and ``word`` fields covering the entire video.
-    The timestamps are expressed in seconds as floats.
-    """
-    os.makedirs(work_dir, exist_ok=True)
-    audio_path = os.path.join(work_dir, "audio.wav")
-    # Extract audio
-    extract_audio_ffmpeg(video_path, audio_path)
-    # Load Whisper model
-    model = load_whisper_model()
-    # Run transcription with word-level timestamps
-    segments, info = model.transcribe(audio_path, word_timestamps=True)
-    seg_list: List[Dict[str, float]] = []
-    words_list: List[Dict[str, float]] = []
-    for seg in segments:
-        seg_list.append({
-            "start": float(seg.start),
-            "end": float(seg.end),
-            "text": seg.text.strip(),
-        })
-        # Each segment may contain words attribute
-        for w in getattr(seg, "words", []) or []:
-            words_list.append({
-                "start": float(w.start),
-                "end": float(w.end),
-                "word": w.word,
-            })
-    # Sort words by start time to be safe
-    words_list.sort(key=lambda d: d["start"])
-    return seg_list, words_list
--- a/utils.py
+++ b/utils.py
@@ -1,93 +0,0 @@
-import re
-import unicodedata
-from typing import List, Tuple
-
-
-def sanitize_filename(name: str) -> str:
-    """Return a sanitized version of a filename.
-
-    This helper removes accents, converts to lowercase, replaces spaces
-    with underscores and removes any non alphanumeric characters except
-    underscores and dots. This makes the directory names safe to use on
-    most filesystems and matches the behaviour described in the spec.
-    """
-    if not name:
-        return ""
-    # Decompose Unicode characters and strip accents
-    nfkd_form = unicodedata.normalize("NFKD", name)
-    no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
-    # Replace spaces with underscores
-    no_spaces = no_accents.replace(" ", "_")
-    # Lowercase and remove any character that is not a letter, digit, dot or underscore
-    sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
-    return sanitized
-
-
-def timestamp_to_seconds(ts: str) -> float:
-    """Convert a timestamp in HH:MM:SS,mmm format to seconds.
-
-    The Gemini and OpenRouter prompts use timestamps formatted with a comma
-    as the decimal separator. This helper splits the string into hours,
-    minutes and seconds and returns a float expressed in seconds.
-    """
-    if ts is None:
-        return 0.0
-    ts = ts.strip()
-    if not ts:
-        return 0.0
-    # Replace comma by dot for decimal seconds
-    ts = ts.replace(",", ".")
-    parts = ts.split(":")
-    parts = [float(p) for p in parts]
-    if len(parts) == 3:
-        h, m, s = parts
-        return h * 3600 + m * 60 + s
-    elif len(parts) == 2:
-        m, s = parts
-        return m * 60 + s
-    else:
-        # only seconds
-        return parts[0]
-
-
-def seconds_to_timestamp(seconds: float) -> str:
-    """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
-    if seconds < 0:
-        seconds = 0
-    h = int(seconds // 3600)
-    m = int((seconds % 3600) // 60)
-    s = seconds % 60
-    # Format with comma as decimal separator and three decimal places
-    return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
-
-
-def wrap_text(text: str, max_chars: int = 80) -> List[str]:
-    """Simple word-wrap for a string.
-
-    Splits ``text`` into a list of lines, each at most ``max_chars``
-    characters long. This does not attempt to hyphenate words – a word
-    longer than ``max_chars`` will occupy its own line. The return value
-    is a list of lines without trailing whitespace.
-    """
-    if not text:
-        return []
-    words = text.split()
-    lines: List[str] = []
-    current: List[str] = []
-    current_len = 0
-    for word in words:
-        # If adding this word would exceed the max, flush current line
-        if current and current_len + 1 + len(word) > max_chars:
-            lines.append(" ".join(current))
-            current = [word]
-            current_len = len(word)
-        else:
-            # Add to current line
-            if current:
-                current_len += 1 + len(word)
-            else:
-                current_len = len(word)
-            current.append(word)
-    if current:
-        lines.append(" ".join(current))
-    return lines
--- a/video_render/init.py
+++ b/video_render/init.py
@@ -0,0 +1,4 @@
+"""
+Core package for the revamped video rendering pipeline.
+"""
+
--- a/video_render/pycache/init.cpython-39.pyc
+++ b/video_render/pycache/init.cpython-39.pyc
--- a/video_render/pycache/config.cpython-39.pyc
+++ b/video_render/pycache/config.cpython-39.pyc
--- a/video_render/pycache/ffmpeg.cpython-39.pyc
+++ b/video_render/pycache/ffmpeg.cpython-39.pyc
--- a/video_render/pycache/llm.cpython-39.pyc
+++ b/video_render/pycache/llm.cpython-39.pyc
--- a/video_render/pycache/logging_utils.cpython-39.pyc
+++ b/video_render/pycache/logging_utils.cpython-39.pyc
--- a/video_render/pycache/media.cpython-39.pyc
+++ b/video_render/pycache/media.cpython-39.pyc
--- a/video_render/pycache/messaging.cpython-39.pyc
+++ b/video_render/pycache/messaging.cpython-39.pyc
--- a/video_render/pycache/pipeline.cpython-39.pyc
+++ b/video_render/pycache/pipeline.cpython-39.pyc
--- a/video_render/pycache/rendering.cpython-39.pyc
+++ b/video_render/pycache/rendering.cpython-39.pyc
--- a/video_render/pycache/transcription.cpython-39.pyc
+++ b/video_render/pycache/transcription.cpython-39.pyc
--- a/video_render/pycache/utils.cpython-39.pyc
+++ b/video_render/pycache/utils.cpython-39.pyc
--- a/video_render/config.py
+++ b/video_render/config.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+VIDEOS_ROOT = BASE_DIR / "videos"
+OUTPUTS_ROOT = BASE_DIR / "outputs"
+TEMP_ROOT = BASE_DIR / "temp"
+
+
+@dataclass(frozen=True)
+class RabbitMQSettings:
+    host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq")
+    port: int = int(os.environ.get("RABBITMQ_PORT", 5672))
+    user: str = os.environ.get("RABBITMQ_USER", "admin")
+    password: str = os.environ.get("RABBITMQ_PASS", "")
+    consume_queue: str = os.environ.get("RABBITMQ_QUEUE", "to-render")
+    publish_queue: str = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
+    prefetch_count: int = int(os.environ.get("RABBITMQ_PREFETCH", 1))
+    heartbeat: int = int(os.environ.get("RABBITMQ_HEARTBEAT", 60))
+    blocked_timeout: int = int(os.environ.get("RABBITMQ_BLOCKED_TIMEOUT", 300))
+
+
+@dataclass(frozen=True)
+class GeminiSettings:
+    api_key: str = os.environ.get("GEMINI_API_KEY", "")
+    model: str = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro-latest")
+    safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS")
+    temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2))
+    top_k: int | None = (
+        int(os.environ["GEMINI_TOP_K"]) if os.environ.get("GEMINI_TOP_K") else None
+    )
+    top_p: float | None = (
+        float(os.environ["GEMINI_TOP_P"]) if os.environ.get("GEMINI_TOP_P") else None
+    )
+    prompt_path: str = os.environ.get("GEMINI_PROMPT_PATH", "prompts/generate.txt")
+
+
+@dataclass(frozen=True)
+class OpenRouterSettings:
+    api_key: str = os.environ.get("OPENROUTER_API_KEY", "")
+    model: str = os.environ.get(
+        "OPENROUTER_MODEL", "anthropic/claude-3-haiku:beta"
+    )
+    temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6))
+    max_output_tokens: int = int(os.environ.get("OPENROUTER_MAX_OUTPUT_TOKENS", 256))
+
+
+@dataclass(frozen=True)
+class WhisperSettings:
+    model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium")
+    device: str | None = os.environ.get("FASTER_WHISPER_DEVICE")
+    compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE")
+    download_root: Path = Path(
+        os.environ.get("FASTER_WHISPER_DOWNLOAD_ROOT", str(BASE_DIR / ".whisper"))
+    )
+
+
+@dataclass(frozen=True)
+class RenderingSettings:
+    frame_width: int = int(os.environ.get("RENDER_WIDTH", 1080))
+    frame_height: int = int(os.environ.get("RENDER_HEIGHT", 1920))
+    fps: int = int(os.environ.get("RENDER_FPS", 30))
+    video_codec: str = os.environ.get("RENDER_CODEC", "libx264")
+    audio_codec: str = os.environ.get("RENDER_AUDIO_CODEC", "aac")
+    bitrate: str = os.environ.get("RENDER_BITRATE", "5000k")
+    preset: str = os.environ.get("RENDER_PRESET", "faster")
+    highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#FFD200")
+    base_color: str = os.environ.get("SUBTITLE_BASE_COLOR", "#FFFFFF")
+    font_path: Path = Path(os.environ.get("RENDER_FONT_PATH", "./Montserrat.ttf"))
+    title_font_size: int = int(os.environ.get("RENDER_TITLE_FONT_SIZE", 110))
+    subtitle_font_size: int = int(os.environ.get("RENDER_SUBTITLE_FONT_SIZE", 64))
+    caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 3))
+    caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 4))
+
+
+@dataclass(frozen=True)
+class Settings:
+    rabbitmq: RabbitMQSettings = RabbitMQSettings()
+    gemini: GeminiSettings = GeminiSettings()
+    openrouter: OpenRouterSettings = OpenRouterSettings()
+    whisper: WhisperSettings = WhisperSettings()
+    rendering: RenderingSettings = RenderingSettings()
+
+    videos_dir: Path = VIDEOS_ROOT
+    outputs_dir: Path = OUTPUTS_ROOT
+    temp_dir: Path = TEMP_ROOT
+
+
+def load_settings() -> Settings:
+    settings = Settings()
+
+    if not settings.rabbitmq.password:
+        raise RuntimeError("RABBITMQ_PASS must be provided")
+
+    settings.videos_dir.mkdir(parents=True, exist_ok=True)
+    settings.outputs_dir.mkdir(parents=True, exist_ok=True)
+    settings.temp_dir.mkdir(parents=True, exist_ok=True)
+
+    return settings
--- a/video_render/ffmpeg.py
+++ b/video_render/ffmpeg.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import logging
+import shlex
+import subprocess
+from pathlib import Path
+from typing import Sequence
+
+logger = logging.getLogger(__name__)
+
+
+def _run_ffmpeg(args: Sequence[str]) -> None:
+    cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", *args]
+    logger.debug("Executando ffmpeg: %s", " ".join(shlex.quote(part) for part in cmd))
+    completed = subprocess.run(cmd, check=False)
+    if completed.returncode != 0:
+        raise RuntimeError(f"ffmpeg falhou com exit code {completed.returncode}")
+
+
+def extract_audio_to_wav(input_video: Path, output_wav: Path) -> Path:
+    _run_ffmpeg(
+        [
+            "-y",
+            "-i",
+            str(input_video),
+            "-ac",
+            "1",
+            "-ar",
+            "16000",
+            "-vn",
+            str(output_wav),
+        ]
+    )
+    return output_wav
+
+
+def create_video_segment(input_video: Path, start: float, end: float, output_path: Path) -> Path:
+    duration = max(0.01, end - start)
+    _run_ffmpeg(
+        [
+            "-y",
+            "-i",
+            str(input_video),
+            "-ss",
+            f"{start:.3f}",
+            "-t",
+            f"{duration:.3f}",
+            "-c",
+            "copy",
+            str(output_path),
+        ]
+    )
+    return output_path
+
--- a/video_render/llm.py
+++ b/video_render/llm.py
@@ -0,0 +1,187 @@
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List
+
+import requests
+
+from .config import BASE_DIR, Settings
+from .transcription import TranscriptionResult
+
+logger = logging.getLogger(__name__)
+
+GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
+
+
+class GeminiHighlighter:
+    def __init__(self, settings: Settings) -> None:
+        if not settings.gemini.api_key:
+            raise RuntimeError("GEMINI_API_KEY nao foi definido")
+
+        prompt_path = Path(settings.gemini.prompt_path)
+
+        if not prompt_path.is_absolute():
+            prompt_path = BASE_DIR / prompt_path
+
+        if not prompt_path.exists():
+            raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}")
+
+        self.prompt_template = prompt_path.read_text(encoding="utf-8")
+        self.settings = settings
+
+    def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
+        payload = {
+            "transcript": transcription.full_text,
+            "segments": [
+                {
+                    "start": segment.start,
+                    "end": segment.end,
+                    "text": segment.text,
+                }
+                for segment in transcription.segments
+            ],
+        }
+
+        body = {
+            "contents": [
+                {
+                    "role": "user",
+                    "parts": [
+                        {"text": self.prompt_template},
+                        {"text": json.dumps(payload, ensure_ascii=False)},
+                    ],
+                }
+            ]
+        }
+
+        if self.settings.gemini.temperature is not None:
+            body["generationConfig"] = {
+                "temperature": self.settings.gemini.temperature,
+            }
+            if self.settings.gemini.top_p is not None:
+                body["generationConfig"]["topP"] = self.settings.gemini.top_p
+            if self.settings.gemini.top_k is not None:
+                body["generationConfig"]["topK"] = self.settings.gemini.top_k
+
+        url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
+        params = {"key": self.settings.gemini.api_key}
+
+        response = requests.post(url, params=params, json=body, timeout=120)
+        response.raise_for_status()
+        data = response.json()
+
+        candidates = data.get("candidates") or []
+        if not candidates:
+            raise RuntimeError("Gemini nao retornou candidatos")
+
+        text_parts = candidates[0].get("content", {}).get("parts", [])
+        if not text_parts:
+            raise RuntimeError("Resposta do Gemini sem conteudo")
+
+        raw_text = text_parts[0].get("text")
+        if not raw_text:
+            raise RuntimeError("Resposta do Gemini sem texto")
+
+        parsed = self._extract_json(raw_text)
+        highlights = parsed.get("highlights")
+        if not isinstance(highlights, list):
+            raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
+        return highlights
+
+    @staticmethod
+    def _extract_json(response_text: str) -> Dict:
+        try:
+            return json.loads(response_text)
+        except json.JSONDecodeError:
+            start = response_text.find("{")
+            end = response_text.rfind("}")
+            if start == -1 or end == -1:
+                raise
+            subset = response_text[start : end + 1]
+            return json.loads(subset)
+
+
+class OpenRouterCopywriter:
+    def __init__(self, settings: Settings) -> None:
+        if not settings.openrouter.api_key:
+            raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
+        self.settings = settings
+
+    def generate_titles(self, highlights: List[Dict]) -> List[str]:
+        if not highlights:
+            return []
+
+        prompt = (
+            "Voce e um copywriter especializado em titulos curtos e virais para reels.\n"
+            "Recebera uma lista de trechos destacados de um video com resumo e tempo.\n"
+            "Produza um titulo envolvente (ate 60 caracteres) para cada item.\n"
+            "Responda apenas em JSON com a seguinte estrutura:\n"
+            '{"titles": ["titulo 1", "titulo 2"]}\n'
+            "Titulos devem ser em portugues, usar verbos fortes e refletir o resumo."
+        )
+
+        user_payload = {
+            "highlights": [
+                {
+                    "start": item.get("start"),
+                    "end": item.get("end"),
+                    "summary": item.get("summary"),
+                }
+                for item in highlights
+            ]
+        }
+
+        body = {
+            "model": self.settings.openrouter.model,
+            "temperature": self.settings.openrouter.temperature,
+            "max_tokens": self.settings.openrouter.max_output_tokens,
+            "messages": [
+                {"role": "system", "content": prompt},
+                {
+                    "role": "user",
+                    "content": json.dumps(user_payload, ensure_ascii=False),
+                },
+            ],
+        }
+
+        headers = {
+            "Authorization": f"Bearer {self.settings.openrouter.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://localhost",
+            "X-Title": "video-render-pipeline",
+        }
+
+        response = requests.post(
+            OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        choices = data.get("choices") or []
+        if not choices:
+            raise RuntimeError("OpenRouter nao retornou escolhas")
+
+        message = choices[0].get("message", {}).get("content")
+        if not message:
+            raise RuntimeError("Resposta do OpenRouter sem conteudo")
+
+        parsed = self._extract_json(message)
+        titles = parsed.get("titles")
+        if not isinstance(titles, list):
+            raise ValueError("Resposta do OpenRouter invalida: campo 'titles'")
+        return [str(title) for title in titles]
+
+    @staticmethod
+    def _extract_json(response_text: str) -> Dict:
+        try:
+            return json.loads(response_text)
+        except json.JSONDecodeError:
+            start = response_text.find("{")
+            end = response_text.rfind("}")
+            if start == -1 or end == -1:
+                raise
+            subset = response_text[start : end + 1]
+            return json.loads(subset)
--- a/video_render/logging_utils.py
+++ b/video_render/logging_utils.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+import logging
+import os
+
+
+def setup_logging() -> None:
+    log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
+    logging.basicConfig(
+        level=log_level,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    )
+
--- a/video_render/media.py
+++ b/video_render/media.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import logging
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+
+from .config import Settings
+from .ffmpeg import extract_audio_to_wav
+from .utils import ensure_workspace, remove_paths, sanitize_filename
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VideoWorkspace:
+    original_filename: str
+    sanitized_name: str
+    workspace_dir: Path
+    output_dir: Path
+    source_path: Path
+    working_video_path: Path
+    audio_path: Path
+
+
+class MediaPreparer:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+
+    def prepare(self, filename: str) -> VideoWorkspace:
+        source_path = self.settings.videos_dir / filename
+        if not source_path.exists():
+            raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {source_path}")
+
+        sanitized_name = sanitize_filename(Path(filename).stem)
+        workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name)
+
+        existing_children = list(workspace_dir.iterdir())
+        if existing_children:
+            logger.info("Limpando workspace existente para %s", sanitized_name)
+            remove_paths(existing_children)
+
+        destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
+        working_video_path = workspace_dir / destination_name
+        shutil.copy2(source_path, working_video_path)
+        logger.info("Cópia do vídeo criada em %s", working_video_path)
+
+        output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
+        existing_outputs = list(output_dir.iterdir())
+        if existing_outputs:
+            remove_paths(existing_outputs)
+
+        audio_path = workspace_dir / "audio.wav"
+        extract_audio_to_wav(working_video_path, audio_path)
+
+        return VideoWorkspace(
+            original_filename=filename,
+            sanitized_name=sanitized_name,
+            workspace_dir=workspace_dir,
+            output_dir=output_dir,
+            source_path=source_path,
+            working_video_path=working_video_path,
+            audio_path=audio_path,
+        )
--- a/video_render/messaging.py
+++ b/video_render/messaging.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, Callable, Dict
+
+import pika
+
+from .config import Settings
+
+logger = logging.getLogger(__name__)
+
+MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
+
+
+class RabbitMQWorker:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self._params = pika.ConnectionParameters(
+            host=settings.rabbitmq.host,
+            port=settings.rabbitmq.port,
+            credentials=pika.PlainCredentials(
+                settings.rabbitmq.user, settings.rabbitmq.password
+            ),
+            heartbeat=settings.rabbitmq.heartbeat,
+            blocked_connection_timeout=settings.rabbitmq.blocked_timeout,
+        )
+
+    def consume_forever(self, handler: MessageHandler) -> None:
+        while True:
+            try:
+                with pika.BlockingConnection(self._params) as connection:
+                    channel = connection.channel()
+                    channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True)
+                    channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True)
+                    channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count)
+
+                    def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body):
+                        try:
+                            message = json.loads(body)
+                        except json.JSONDecodeError:
+                            logger.error("Mensagem inválida recebida: %s", body)
+                            ch.basic_ack(delivery_tag=method.delivery_tag)
+                            return
+
+                        logger.info("Mensagem recebida: %s", message.get("filename", "<sem_nome>"))
+                        try:
+                            response = handler(message)
+                        except Exception:
+                            logger.exception("Erro não tratado durante o processamento")
+                            response = {
+                                "hasError": True,
+                                "error": "Erro não tratado no pipeline",
+                                "filename": message.get("filename"),
+                                "videoId": message.get("videoId"),
+                                "url": message.get("url"),
+                                "processedFiles": [],
+                            }
+
+                        try:
+                            payload = json.dumps(response)
+                            ch.basic_publish(
+                                exchange="",
+                                routing_key=self.settings.rabbitmq.publish_queue,
+                                body=payload,
+                                properties=pika.BasicProperties(delivery_mode=2),
+                            )
+                            logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue)
+                        except Exception:
+                            logger.exception("Falha ao publicar a resposta na fila de upload")
+                        finally:
+                            ch.basic_ack(delivery_tag=method.delivery_tag)
+
+                    channel.basic_consume(
+                        queue=self.settings.rabbitmq.consume_queue,
+                        on_message_callback=_on_message,
+                        auto_ack=False,
+                    )
+                    logger.info("Consumidor iniciado. Aguardando mensagens...")
+                    channel.start_consuming()
+            except pika.exceptions.AMQPConnectionError:
+                logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...")
+            except KeyboardInterrupt:
+                logger.info("Encerrando consumidor por interrupção do usuário.")
+                break
--- a/video_render/pipeline.py
+++ b/video_render/pipeline.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from .config import Settings
+from .llm import GeminiHighlighter, OpenRouterCopywriter
+from .media import MediaPreparer, VideoWorkspace
+from .transcription import TranscriptionResult, TranscriptionService
+from .utils import remove_paths, sanitize_filename
+from .rendering import VideoRenderer
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class JobMessage:
+    filename: str
+    url: Optional[str]
+    video_id: Optional[str]
+    extras: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class HighlightWindow:
+    start: float
+    end: float
+    summary: str
+    title: Optional[str] = None
+
+
+@dataclass
+class RenderedClip:
+    path: Path
+    start: float
+    end: float
+    title: str
+    summary: str
+    index: int
+
+
+@dataclass
+class PipelineContext:
+    job: JobMessage
+    workspace: Optional[VideoWorkspace] = None
+    transcription: Optional[TranscriptionResult] = None
+    highlight_windows: List[HighlightWindow] = field(default_factory=list)
+    rendered_clips: List[RenderedClip] = field(default_factory=list)
+
+
+class VideoPipeline:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self.media_preparer = MediaPreparer(settings)
+        self.transcriber = TranscriptionService(settings)
+        self.highlighter = GeminiHighlighter(settings)
+        self.copywriter = OpenRouterCopywriter(settings)
+        self.renderer = VideoRenderer(settings)
+
+    def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
+        context = PipelineContext(job=self._parse_job(message))
+        try:
+            self._prepare_workspace(context)
+            self._generate_transcription(context)
+            self._determine_highlights(context)
+            self._generate_titles(context)
+            self._render_clips(context)
+            return self._build_success_payload(context)
+        except Exception as exc:
+            logger.exception("Falha ao processar vídeo %s", context.job.filename)
+            return self._handle_failure(context, exc)
+
+    def _parse_job(self, message: Dict[str, Any]) -> JobMessage:
+        filename = message.get("filename")
+        if not filename:
+            raise ValueError("Mensagem inválida: 'filename' é obrigatório")
+
+        url = message.get("url")
+        video_id = message.get("videoId") or message.get("video_id")
+        extras = {
+            key: value
+            for key, value in message.items()
+            if key not in {"filename", "url", "videoId", "video_id"}
+        }
+        return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras)
+
+    def _prepare_workspace(self, context: PipelineContext) -> None:
+        context.workspace = self.media_preparer.prepare(context.job.filename)
+
+    def _generate_transcription(self, context: PipelineContext) -> None:
+        if not context.workspace:
+            raise RuntimeError("Workspace não preparado")
+        transcription = self.transcriber.transcribe(context.workspace.audio_path)
+        TranscriptionService.persist(transcription, context.workspace.workspace_dir)
+        context.transcription = transcription
+
+    def _determine_highlights(self, context: PipelineContext) -> None:
+        if not context.transcription:
+            raise RuntimeError("Transcricao nao disponivel")
+
+        highlights_raw = self.highlighter.generate_highlights(context.transcription)
+        windows: List[HighlightWindow] = []
+
+        for item in highlights_raw:
+            try:
+                start = float(item.get("start", 0))  # type: ignore[arg-type]
+                end = float(item.get("end", start))  # type: ignore[arg-type]
+            except (TypeError, ValueError):
+                logger.warning("Highlight invalido ignorado: %s", item)
+                continue
+
+            summary = str(item.get("summary", "")).strip()
+            if end <= start:
+                logger.debug("Highlight com intervalo invalido ignorado: %s", item)
+                continue
+
+            windows.append(HighlightWindow(start=start, end=end, summary=summary))
+
+        if not windows:
+            last_end = (
+                context.transcription.segments[-1].end
+                if context.transcription.segments
+                else 0
+            )
+            windows.append(
+                HighlightWindow(
+                    start=0.0,
+                    end=max(last_end, 10.0),
+                    summary="Sem destaque identificado; fallback automatico.",
+                )
+            )
+
+        context.highlight_windows = windows
+
+    def _generate_titles(self, context: PipelineContext) -> None:
+        if not context.highlight_windows:
+            return
+
+        highlight_dicts = [
+            {"start": window.start, "end": window.end, "summary": window.summary}
+            for window in context.highlight_windows
+        ]
+        titles = self.copywriter.generate_titles(highlight_dicts)
+
+        for window, title in zip(context.highlight_windows, titles):
+            window.title = title.strip()
+
+
+    def _render_clips(self, context: PipelineContext) -> None:
+        if not context.workspace or not context.highlight_windows or not context.transcription:
+            return
+
+        titles = [
+            window.title or window.summary for window in context.highlight_windows
+        ]
+
+        render_results = self.renderer.render(
+            workspace_path=str(context.workspace.working_video_path),
+            highlight_windows=context.highlight_windows,
+            transcription=context.transcription,
+            titles=titles,
+            output_dir=context.workspace.output_dir,
+        )
+
+        context.rendered_clips = [
+            RenderedClip(
+                path=Path(path),
+                start=start,
+                end=end,
+                title=title,
+                summary=summary,
+                index=index,
+            )
+            for path, start, end, title, summary, index in render_results
+        ]
+
+    def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]:
+        return {
+            "hasError": False,
+            "videosProcessedQuantity": len(context.rendered_clips),
+            "filename": context.job.filename,
+            "videoId": context.job.video_id,
+            "url": context.job.url,
+            "workspaceFolder": context.workspace.sanitized_name if context.workspace else None,
+            "outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None,
+            "processedFiles": [
+                {
+                    "path": self._relative_path(clip.path),
+                    "start": clip.start,
+                    "end": clip.end,
+                    "title": clip.title,
+                    "summary": clip.summary,
+                    "clipIndex": clip.index,
+                }
+                for clip in context.rendered_clips
+            ],
+        }
+
+    def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]:
+        logger.error("Erro no pipeline: %s", exc)
+        cleanup_targets: List[Path] = []
+
+        if context.workspace:
+            cleanup_targets.append(context.workspace.workspace_dir)
+            cleanup_targets.append(context.workspace.output_dir)
+            original_path = context.workspace.source_path
+            if original_path.exists():
+                cleanup_targets.append(original_path)
+        else:
+            sanitized = sanitize_filename(Path(context.job.filename).stem)
+            job_output_dir = self.settings.outputs_dir / sanitized
+            if job_output_dir.exists():
+                cleanup_targets.append(job_output_dir)
+            original_path = self.settings.videos_dir / context.job.filename
+            if original_path.exists():
+                cleanup_targets.append(original_path)
+
+        remove_paths(cleanup_targets)
+
+        return {
+            "hasError": True,
+            "error": str(exc),
+            "filename": context.job.filename,
+            "videoId": context.job.video_id,
+            "url": context.job.url,
+            "processedFiles": [],
+        }
+
+    def _relative_path(self, path: Path) -> str:
+        base = self.settings.videos_dir.parent
+        try:
+            return str(path.relative_to(base))
+        except ValueError:
+            return str(path)
--- a/video_render/rendering.py
+++ b/video_render/rendering.py
@@ -0,0 +1,406 @@
+from __future__ import annotations
+
+import logging
+import math
+import re
+from dataclasses import dataclass
+from typing import Iterable, List, Sequence, Tuple
+
+import numpy as np
+from moviepy.editor import (
+    ColorClip,
+    CompositeVideoClip,
+    ImageClip,
+    TextClip,
+    VideoFileClip,
+)
+from PIL import Image, ImageColor, ImageDraw, ImageFont
+
+from .config import Settings
+from .transcription import TranscriptionResult, WordTiming
+
+logger = logging.getLogger(__name__)
+
+
+def clamp_time(value: float, minimum: float = 0.0) -> float:
+    return max(minimum, float(value))
+
+
+@dataclass
+class CaptionClipSet:
+    base: ImageClip
+    highlights: List[ImageClip]
+
+
+class CaptionBuilder:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self.font_path = settings.rendering.font_path
+        if not self.font_path.exists():
+            raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}")
+
+        self.font = ImageFont.truetype(
+            str(self.font_path), settings.rendering.subtitle_font_size
+        )
+        self.base_color = ImageColor.getrgb(settings.rendering.base_color)
+        self.highlight_color = ImageColor.getrgb(settings.rendering.highlight_color)
+        self.canvas_width = settings.rendering.frame_width - 160
+        self.canvas_height = int(settings.rendering.subtitle_font_size * 2.2)
+        self.min_words = settings.rendering.caption_min_words
+        self.max_words = settings.rendering.caption_max_words
+
+        bbox = self.font.getbbox("Ay")
+        self.text_height = bbox[3] - bbox[1]
+        self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1]
+        self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0]
+
+    def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]:
+        grouped = self._group_words(words)
+        clip_sets: List[CaptionClipSet] = []
+
+        for group in grouped:
+            group_start = clamp_time(group[0].start, minimum=clip_start)
+            group_end = clamp_time(group[-1].end, minimum=group_start + 0.05)
+            duration = max(0.05, group_end - group_start)
+            start_offset = group_start - clip_start
+
+            base_image, highlight_images = self._render_group(group)
+
+            base_clip = (
+                ImageClip(np.array(base_image))
+                .with_start(start_offset)
+                .with_duration(duration)
+            )
+
+            highlight_clips: List[ImageClip] = []
+            for word, image in zip(group, highlight_images):
+                h_start = clamp_time(word.start, minimum=clip_start) - clip_start
+                h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start
+                h_duration = max(0.05, h_end - h_start)
+                highlight_clip = (
+                    ImageClip(np.array(image))
+                    .with_start(h_start)
+                    .with_duration(h_duration)
+                )
+                highlight_clips.append(highlight_clip)
+
+            clip_sets.append(CaptionClipSet(base=base_clip, highlights=highlight_clips))
+
+        return clip_sets
+
+    def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]:
+        texts = [self._clean_word(word.word) for word in group]
+
+        widths = []
+        for text in texts:
+            bbox = self.font.getbbox(text)
+            widths.append(bbox[2] - bbox[0])
+
+        total_width = sum(widths)
+        if len(widths) > 1:
+            total_width += self.space_width * (len(widths) - 1)
+
+        start_x = max(0, (self.canvas_width - total_width) // 2)
+
+        base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0))
+        base_draw = ImageDraw.Draw(base_image)
+        highlight_images: List[Image.Image] = []
+
+        x = start_x
+        for text, width in zip(texts, widths):
+            base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color)
+
+            highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
+            highlight_draw = ImageDraw.Draw(highlight_image)
+            highlight_draw.text(
+                (x, self.baseline), text, font=self.font, fill=self.highlight_color
+            )
+            highlight_images.append(highlight_image)
+
+            x += width + self.space_width
+
+        return base_image, highlight_images
+
+    def _group_words(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]:
+        if not words:
+            return []
+
+        grouped: List[List[WordTiming]] = []
+        buffer: List[WordTiming] = []
+
+        for word in words:
+            buffer.append(word)
+            if len(buffer) == self.max_words:
+                grouped.append(buffer)
+                buffer = []
+
+        if buffer:
+            if len(buffer) == 1 and grouped:
+                grouped[-1].extend(buffer)
+            else:
+                grouped.append(buffer)
+
+        # Rebalance groups to respect minimum size when possible
+        for idx, group in enumerate(grouped[:-1]):
+            if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words:
+                deficit = self.min_words - len(group)
+                transfer = grouped[idx + 1][:deficit]
+                grouped[idx] = group + transfer
+                grouped[idx + 1] = grouped[idx + 1][deficit:]
+
+        grouped = [grp for grp in grouped if grp]
+        return grouped
+
+    @staticmethod
+    def _clean_word(text: str) -> str:
+        text = text.strip()
+        text = re.sub(r"\s+", " ", text)
+        return text or "..."
+
+
+class VideoRenderer:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self.captions = CaptionBuilder(settings)
+
+    def render(
+        self,
+        workspace_path: str,
+        highlight_windows: Sequence,
+        transcription: TranscriptionResult,
+        titles: Sequence[str],
+        output_dir,
+    ) -> List[Tuple[str, float, float, str, str, int]]:
+        results: List[Tuple[str, float, float, str, str, int]] = []
+
+        with VideoFileClip(workspace_path) as base_clip:
+            video_duration = base_clip.duration or 0
+            for index, window in enumerate(highlight_windows, start=1):
+                start = clamp_time(window.start)
+                end = clamp_time(window.end)
+                start = min(start, video_duration)
+                end = min(end, video_duration)
+                if end <= start:
+                    logger.info("Janela ignorada por intervalo invalido: %s", window)
+                    continue
+
+                subclip = base_clip.subclipped(start, end)
+                try:
+                    rendered_path = self._render_single_clip(
+                        subclip=subclip,
+                        start=start,
+                        end=end,
+                        title=titles[index - 1] if index - 1 < len(titles) else window.summary,
+                        summary=window.summary,
+                        index=index,
+                        transcription=transcription,
+                        output_dir=output_dir,
+                    )
+                finally:
+                    subclip.close()
+
+                results.append(
+                    (
+                        rendered_path,
+                        float(start),
+                        float(end),
+                        titles[index - 1] if index - 1 < len(titles) else window.summary,
+                        window.summary,
+                        index,
+                    )
+                )
+
+        return results
+
+    def _render_single_clip(
+        self,
+        subclip: VideoFileClip,
+        start: float,
+        end: float,
+        title: str,
+        summary: str,
+        index: int,
+        transcription: TranscriptionResult,
+        output_dir,
+    ) -> str:
+        duration = end - start
+        frame_w = self.settings.rendering.frame_width
+        frame_h = self.settings.rendering.frame_height
+        top_h = int(frame_h * 0.18)
+        bottom_h = int(frame_h * 0.20)
+        video_area_h = frame_h - top_h - bottom_h
+
+        scale_factor = min(
+            frame_w / subclip.w,
+            video_area_h / subclip.h,
+        )
+        resized_clip = subclip.resized(scale_factor)
+        video_y = top_h + (video_area_h - resized_clip.h) // 2
+
+        video_clip = resized_clip.with_position(
+            ((frame_w - resized_clip.w) // 2, video_y)
+        )
+
+        background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration)
+        top_panel = (
+            ColorClip(size=(frame_w, top_h), color=(12, 12, 12))
+            .with_duration(duration)
+            .with_opacity(0.85)
+        )
+        bottom_panel = (
+            ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12))
+            .with_position((0, frame_h - bottom_h))
+            .with_duration(duration)
+            .with_opacity(0.85)
+        )
+
+        title_text = title or summary
+        wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160)
+        title_clip = (
+            TextClip(
+                text=wrapped_title,
+                font=str(self.settings.rendering.font_path),
+                font_size=self.settings.rendering.title_font_size,
+                color=self.settings.rendering.base_color,
+                method="caption",
+                size=(frame_w - 160, top_h - 40),
+            )
+            .with_duration(duration)
+        )
+        title_clip = title_clip.with_position(
+            ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
+        )
+
+        words = self._collect_words(transcription, start, end)
+        caption_sets = self.captions.build(words, clip_start=start)
+
+        caption_clips = []
+        caption_resources: List[ImageClip] = []
+        caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2
+        for clip_set in caption_sets:
+            base_positioned = clip_set.base.with_position(("center", caption_y))
+            caption_clips.append(base_positioned)
+            caption_resources.append(clip_set.base)
+            for highlight in clip_set.highlights:
+                positioned = highlight.with_position(("center", caption_y))
+                caption_clips.append(positioned)
+                caption_resources.append(highlight)
+
+        if not caption_clips:
+            fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
+            caption_clips.append(
+                TextClip(
+                    text=fallback_text,
+                    font=str(self.settings.rendering.font_path),
+                    font_size=self.settings.rendering.subtitle_font_size,
+                    color=self.settings.rendering.base_color,
+                    method="caption",
+                    size=(frame_w - 160, bottom_h - 40),
+                )
+                .with_duration(duration)
+                .with_position(("center", caption_y))
+            )
+
+        composite = CompositeVideoClip(
+            [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
+            size=(frame_w, frame_h),
+        )
+
+        output_path = output_dir / f"clip_{index:02d}.mp4"
+        composite.write_videofile(
+            str(output_path),
+            codec=self.settings.rendering.video_codec,
+            audio_codec=self.settings.rendering.audio_codec,
+            fps=self.settings.rendering.fps,
+            bitrate=self.settings.rendering.bitrate,
+            ffmpeg_params=[
+                "-preset",
+                self.settings.rendering.preset,
+                "-pix_fmt",
+                "yuv420p",
+            ],
+            temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
+            remove_temp=True,
+            threads=4,
+        )
+
+        composite.close()
+        resized_clip.close()
+        video_clip.close()
+        title_clip.close()
+        background.close()
+        top_panel.close()
+        bottom_panel.close()
+        for clip in caption_clips:
+            clip.close()
+        for clip in caption_resources:
+            clip.close()
+
+        return str(output_path)
+
+    def _collect_words(
+        self, transcription: TranscriptionResult, start: float, end: float
+    ) -> List[WordTiming]:
+        collected: List[WordTiming] = []
+        for segment in transcription.segments:
+            if segment.end < start or segment.start > end:
+                continue
+
+            if segment.words:
+                for word in segment.words:
+                    if word.end < start or word.start > end:
+                        continue
+                    collected.append(
+                        WordTiming(
+                            start=max(start, word.start),
+                            end=min(end, word.end),
+                            word=word.word,
+                        )
+                    )
+            else:
+                collected.extend(self._fallback_words(segment.text, segment.start, segment.end, start, end))
+
+        collected.sort(key=lambda w: w.start)
+        return collected
+
+    def _fallback_words(
+        self,
+        text: str,
+        segment_start: float,
+        segment_end: float,
+        window_start: float,
+        window_end: float,
+    ) -> Iterable[WordTiming]:
+        words = [w for w in re.split(r"\s+", text.strip()) if w]
+        if not words:
+            return []
+
+        seg_start = max(segment_start, window_start)
+        seg_end = min(segment_end, window_end)
+        duration = max(0.01, seg_end - seg_start)
+        step = duration / len(words)
+
+        timings: List[WordTiming] = []
+        for idx, word in enumerate(words):
+            w_start = seg_start + idx * step
+            w_end = min(seg_end, w_start + step)
+            timings.append(WordTiming(start=w_start, end=w_end, word=word))
+        return timings
+
+    @staticmethod
+    def _wrap_text(text: str, max_width: int) -> str:
+        text = text.strip()
+        if not text:
+            return ""
+
+        words = text.split()
+        lines: List[str] = []
+        current: List[str] = []
+        for word in words:
+            current.append(word)
+            if len(" ".join(current)) > max_width // 18:
+                lines.append(" ".join(current[:-1]))
+                current = [current[-1]]
+        if current:
+            lines.append(" ".join(current))
+        return "\n".join(lines)
--- a/video_render/transcription.py
+++ b/video_render/transcription.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional
+
+from faster_whisper import WhisperModel
+
+from .config import Settings
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class WordTiming:
+    start: float
+    end: float
+    word: str
+
+
+@dataclass(frozen=True)
+class TranscriptSegment:
+    id: int
+    start: float
+    end: float
+    text: str
+    words: List[WordTiming]
+
+
+@dataclass(frozen=True)
+class TranscriptionResult:
+    segments: List[TranscriptSegment]
+    full_text: str
+
+
+class TranscriptionService:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self._model: Optional[WhisperModel] = None
+
+    def _load_model(self) -> WhisperModel:
+        if self._model is None:
+            logger.info(
+                "Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)",
+                self.settings.whisper.model_size,
+                self.settings.whisper.device or "auto",
+                self.settings.whisper.compute_type or "default",
+            )
+            self._model = WhisperModel(
+                self.settings.whisper.model_size,
+                device=self.settings.whisper.device or "auto",
+                compute_type=self.settings.whisper.compute_type or "default",
+                download_root=str(self.settings.whisper.download_root),
+            )
+        return self._model
+
+    def transcribe(self, audio_path: Path) -> TranscriptionResult:
+        model = self._load_model()
+        segments, _ = model.transcribe(
+            str(audio_path),
+            beam_size=5,
+            word_timestamps=True,
+        )
+
+        parsed_segments: List[TranscriptSegment] = []
+        full_text_parts: List[str] = []
+
+        for idx, segment in enumerate(segments):
+            words = [
+                WordTiming(start=w.start, end=w.end, word=w.word.strip())
+                for w in segment.words or []
+                if w.word.strip()
+            ]
+            text = segment.text.strip()
+            full_text_parts.append(text)
+            parsed_segments.append(
+                TranscriptSegment(
+                    id=idx,
+                    start=segment.start,
+                    end=segment.end,
+                    text=text,
+                    words=words,
+                )
+            )
+
+        return TranscriptionResult(
+            segments=parsed_segments,
+            full_text=" ".join(full_text_parts).strip(),
+        )
+
+    @staticmethod
+    def persist(result: TranscriptionResult, destination: Path) -> None:
+        json_path = destination / "transcription.json"
+        text_path = destination / "transcription.txt"
+
+        payload = {
+            "segments": [
+                {
+                    "id": segment.id,
+                    "start": segment.start,
+                    "end": segment.end,
+                    "text": segment.text,
+                    "words": [
+                        {"start": word.start, "end": word.end, "text": word.word}
+                        for word in segment.words
+                    ],
+                }
+                for segment in result.segments
+            ],
+            "full_text": result.full_text,
+        }
+
+        with json_path.open("w", encoding="utf-8") as fp:
+            json.dump(payload, fp, ensure_ascii=False, indent=2)
+
+        with text_path.open("w", encoding="utf-8") as fp:
+            fp.write(result.full_text)
+
+        logger.info("Transcrição salva em %s", destination)
+
--- a/video_render/utils.py
+++ b/video_render/utils.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import re
+import unicodedata
+from pathlib import Path
+from typing import Iterable
+
+
+def sanitize_filename(name: str) -> str:
+    normalized = unicodedata.normalize("NFKD", name)
+    ascii_text = normalized.encode("ASCII", "ignore").decode()
+    ascii_text = ascii_text.lower()
+    ascii_text = ascii_text.replace(" ", "_")
+    ascii_text = re.sub(r"[^a-z0-9_\-\.]", "", ascii_text)
+    ascii_text = re.sub(r"_+", "_", ascii_text)
+    return ascii_text.strip("_") or "video"
+
+
+def ensure_workspace(root: Path, folder_name: str) -> Path:
+    workspace = root / folder_name
+    workspace.mkdir(parents=True, exist_ok=True)
+    return workspace
+
+
+def remove_paths(paths: Iterable[Path]) -> None:
+    for path in paths:
+        if not path.exists():
+            continue
+        if path.is_file() or path.is_symlink():
+            path.unlink(missing_ok=True)
+        else:
+            for child in sorted(path.rglob("*"), reverse=True):
+                if child.is_file() or child.is_symlink():
+                    child.unlink(missing_ok=True)
+                elif child.is_dir():
+                    child.rmdir()
+            path.rmdir()
+
				`@@ -1 +0,0 @@`
				`"""Top-level package for the video processing pipeline."""`