Cria novos components

This commit is contained in:
LeoMortari
2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions

269
main.py
View File

@@ -1,265 +1,16 @@
"""Entry point for the video processing pipeline.
This script listens to a RabbitMQ queue for new video processing tasks. When
a message arrives, it performs the following steps:
1. Creates a working directory for the video based off of its filename.
2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce
a transcription with word-level timestamps.
3. Uses the Gemini model to determine which parts of the video have the
highest potential for engagement. These highlight segments are
represented as a list of objects containing start/end timestamps and
text.
4. Uses the OpenRouter model to generate a sensational title for each
highlight. Only the ``topText`` field is kept; the description is
intentionally omitted since the caption will be burned into the video.
5. Cuts the original video into individual clips corresponding to each
highlight and renders them vertically with a title above and a dynamic
caption below.
6. Publishes a message to the upload queue with information about the
generated clips. On success, this message contains the list of output
files. On failure, ``hasError`` will be set to ``True`` and the
``error`` field will describe what went wrong.
7. Cleans up temporary files (audio, transcript, working directory) and
deletes the original source video from the ``videos`` directory to
conserve disk space.
The queue names and RabbitMQ credentials are configured via environment
variables. See the accompanying ``docker-compose.yml`` for defaults.
"""
from __future__ import annotations
import json
import os
import shutil
import time
import traceback
from typing import Any, Dict, List
import pika
from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds
from .transcribe import transcribe
from .llm import LLMError, select_highlights, generate_titles
from .render import render_clip
from video_render.config import load_settings
from video_render.logging_utils import setup_logging
from video_render.messaging import RabbitMQWorker
from video_render.pipeline import VideoPipeline
# Environment variables with sensible defaults
RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq")
RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672))
RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin")
RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS")
RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render")
RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
def main() -> None:
setup_logging()
settings = load_settings()
if not RABBITMQ_PASS:
raise RuntimeError("RABBITMQ_PASS não definido no ambiente")
def get_next_message() -> Any:
"""Retrieve a single message from the RABBITMQ_QUEUE.
Returns ``None`` if no messages are available. This helper opens a new
connection for each call to avoid keeping stale connections alive.
"""
credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
parameters = pika.ConnectionParameters(
host=RABBITMQ_HOST,
port=RABBITMQ_PORT,
credentials=credentials,
heartbeat=60,
blocked_connection_timeout=300,
)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE)
if method_frame:
channel.basic_ack(method_frame.delivery_tag)
connection.close()
return body
connection.close()
return None
def publish_to_queue(payload: Dict[str, Any]) -> None:
"""Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE."""
credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
parameters = pika.ConnectionParameters(
host=RABBITMQ_HOST,
port=RABBITMQ_PORT,
credentials=credentials,
heartbeat=60,
blocked_connection_timeout=300,
)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True)
channel.basic_publish(
exchange="",
routing_key=RABBITMQ_UPLOAD_QUEUE,
body=json.dumps(payload),
properties=pika.BasicProperties(delivery_mode=2),
)
connection.close()
def build_srt(segments: List[Dict[str, Any]]) -> str:
"""Build an SRT-like string from a list of segments.
Each segment should have ``start``, ``end`` and ``text`` fields. The
timestamps are converted to the ``HH:MM:SS,mmm`` format expected by
the Gemini prompt. Segments are separated by a blank line.
"""
lines = []
for seg in segments:
start_ts = seconds_to_timestamp(seg["start"])
end_ts = seconds_to_timestamp(seg["end"])
lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}")
return "\n\n".join(lines)
def process_message(data: Dict[str, Any]) -> Dict[str, Any]:
"""Process a single video task described in ``data``.
Returns the payload to be sent to the upload queue. Raises an
exception on failure; the caller is responsible for catching it and
posting an error payload.
"""
filename = data.get("filename")
if not filename:
raise ValueError("Campo 'filename' ausente na mensagem")
url = data.get("url")
video_id = data.get("videoId")
# Determine source video path; n8n stores videos in the 'videos' directory
video_path = os.path.join("videos", filename)
if not os.path.exists(video_path):
raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}")
# Sanitize the filename to use as directory name
base_no_ext = os.path.splitext(filename)[0]
sanitized = sanitize_filename(base_no_ext)
work_dir = os.path.join("app", "videos", sanitized)
# Transcribe video
segments, words = transcribe(video_path, work_dir)
# Build SRT string
srt_str = build_srt(segments)
# Call Gemini to select highlights
highlights = select_highlights(srt_str)
# Convert start/end times to floats and keep original strings for openrouter
for item in highlights:
item["start"] = item["start"].strip()
item["end"] = item["end"].strip()
# Generate titles
titles = generate_titles(highlights)
# Render clips
output_dir = os.path.join("outputs", sanitized)
processed_files: List[str] = []
for idx, item in enumerate(titles, start=1):
start_sec = timestamp_to_seconds(item.get("start"))
end_sec = timestamp_to_seconds(item.get("end"))
# Extract relative words for caption
relative_words = []
for w in words:
# Word must overlap clip interval
if w["end"] <= start_sec or w["start"] >= end_sec:
continue
rel_start = max(0.0, w["start"] - start_sec)
rel_end = max(0.0, w["end"] - start_sec)
relative_words.append({
"start": rel_start,
"end": rel_end,
"word": w["word"],
})
# If no words found (e.g. silence), create a dummy word to avoid errors
if not relative_words:
relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""})
out_path = render_clip(
video_path=video_path,
start=start_sec,
end=end_sec,
top_text=item.get("topText", ""),
words=relative_words,
out_dir=output_dir,
base_name=sanitized,
idx=idx,
)
processed_files.append(out_path)
# Compose payload
payload = {
"videosProcessedQuantity": len(processed_files),
"filename": filename,
"processedFiles": processed_files,
"url": url,
"videoId": video_id,
"hasError": False,
"error": None,
}
# Clean up working directory and original video
shutil.rmtree(work_dir, ignore_errors=True)
try:
os.remove(video_path)
except FileNotFoundError:
pass
return payload
def main():
print(" [*] Esperando mensagens. Para sair: CTRL+C")
while True:
body = get_next_message()
if body is None:
time.sleep(5)
continue
try:
data = json.loads(body)
except Exception:
print("⚠️ Mensagem inválida recebida (não é JSON)")
continue
try:
result = process_message(data)
except Exception as exc:
# Print stack trace for debugging
traceback.print_exc()
# Attempt to clean up any directories based on filename
filename = data.get("filename")
sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else ""
work_dir = os.path.join("app", "videos", sanitized) if sanitized else None
output_dir = os.path.join("outputs", sanitized) if sanitized else None
# Remove working and output directories
if work_dir:
shutil.rmtree(work_dir, ignore_errors=True)
if output_dir:
shutil.rmtree(output_dir, ignore_errors=True)
# Remove original video if present
video_path = os.path.join("videos", filename) if filename else None
if video_path and os.path.exists(video_path):
try:
os.remove(video_path)
except Exception:
pass
# Build error payload
error_payload = {
"videosProcessedQuantity": 0,
"filename": filename,
"processedFiles": [],
"url": data.get("url"),
"videoId": data.get("videoId"),
"hasError": True,
"error": str(exc),
}
try:
publish_to_queue(error_payload)
print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
except Exception as publish_err:
print(f"Erro ao publicar mensagem de erro: {publish_err}")
continue
# On success publish payload
try:
publish_to_queue(result)
print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
except Exception as publish_err:
print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}")
# Loop continues
pipeline = VideoPipeline(settings)
worker = RabbitMQWorker(settings)
worker.consume_forever(pipeline.process_message)
if __name__ == "__main__":