Cria novos components
This commit is contained in:
269
main.py
269
main.py
@@ -1,265 +1,16 @@
|
||||
"""Entry point for the video processing pipeline.
|
||||
|
||||
This script listens to a RabbitMQ queue for new video processing tasks. When
|
||||
a message arrives, it performs the following steps:
|
||||
|
||||
1. Creates a working directory for the video based off of its filename.
|
||||
2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce
|
||||
a transcription with word-level timestamps.
|
||||
3. Uses the Gemini model to determine which parts of the video have the
|
||||
highest potential for engagement. These highlight segments are
|
||||
represented as a list of objects containing start/end timestamps and
|
||||
text.
|
||||
4. Uses the OpenRouter model to generate a sensational title for each
|
||||
highlight. Only the ``topText`` field is kept; the description is
|
||||
intentionally omitted since the caption will be burned into the video.
|
||||
5. Cuts the original video into individual clips corresponding to each
|
||||
highlight and renders them vertically with a title above and a dynamic
|
||||
caption below.
|
||||
6. Publishes a message to the upload queue with information about the
|
||||
generated clips. On success, this message contains the list of output
|
||||
files. On failure, ``hasError`` will be set to ``True`` and the
|
||||
``error`` field will describe what went wrong.
|
||||
7. Cleans up temporary files (audio, transcript, working directory) and
|
||||
deletes the original source video from the ``videos`` directory to
|
||||
conserve disk space.
|
||||
|
||||
The queue names and RabbitMQ credentials are configured via environment
|
||||
variables. See the accompanying ``docker-compose.yml`` for defaults.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pika
|
||||
|
||||
from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds
|
||||
from .transcribe import transcribe
|
||||
from .llm import LLMError, select_highlights, generate_titles
|
||||
from .render import render_clip
|
||||
from video_render.config import load_settings
|
||||
from video_render.logging_utils import setup_logging
|
||||
from video_render.messaging import RabbitMQWorker
|
||||
from video_render.pipeline import VideoPipeline
|
||||
|
||||
|
||||
# Environment variables with sensible defaults
|
||||
RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq")
|
||||
RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672))
|
||||
RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin")
|
||||
RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS")
|
||||
RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render")
|
||||
RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
|
||||
def main() -> None:
|
||||
setup_logging()
|
||||
settings = load_settings()
|
||||
|
||||
if not RABBITMQ_PASS:
|
||||
raise RuntimeError("RABBITMQ_PASS não definido no ambiente")
|
||||
|
||||
|
||||
def get_next_message() -> Any:
|
||||
"""Retrieve a single message from the RABBITMQ_QUEUE.
|
||||
|
||||
Returns ``None`` if no messages are available. This helper opens a new
|
||||
connection for each call to avoid keeping stale connections alive.
|
||||
"""
|
||||
credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
|
||||
parameters = pika.ConnectionParameters(
|
||||
host=RABBITMQ_HOST,
|
||||
port=RABBITMQ_PORT,
|
||||
credentials=credentials,
|
||||
heartbeat=60,
|
||||
blocked_connection_timeout=300,
|
||||
)
|
||||
connection = pika.BlockingConnection(parameters)
|
||||
channel = connection.channel()
|
||||
method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE)
|
||||
if method_frame:
|
||||
channel.basic_ack(method_frame.delivery_tag)
|
||||
connection.close()
|
||||
return body
|
||||
connection.close()
|
||||
return None
|
||||
|
||||
|
||||
def publish_to_queue(payload: Dict[str, Any]) -> None:
|
||||
"""Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE."""
|
||||
credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS)
|
||||
parameters = pika.ConnectionParameters(
|
||||
host=RABBITMQ_HOST,
|
||||
port=RABBITMQ_PORT,
|
||||
credentials=credentials,
|
||||
heartbeat=60,
|
||||
blocked_connection_timeout=300,
|
||||
)
|
||||
connection = pika.BlockingConnection(parameters)
|
||||
channel = connection.channel()
|
||||
channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True)
|
||||
channel.basic_publish(
|
||||
exchange="",
|
||||
routing_key=RABBITMQ_UPLOAD_QUEUE,
|
||||
body=json.dumps(payload),
|
||||
properties=pika.BasicProperties(delivery_mode=2),
|
||||
)
|
||||
connection.close()
|
||||
|
||||
|
||||
def build_srt(segments: List[Dict[str, Any]]) -> str:
|
||||
"""Build an SRT-like string from a list of segments.
|
||||
|
||||
Each segment should have ``start``, ``end`` and ``text`` fields. The
|
||||
timestamps are converted to the ``HH:MM:SS,mmm`` format expected by
|
||||
the Gemini prompt. Segments are separated by a blank line.
|
||||
"""
|
||||
lines = []
|
||||
for seg in segments:
|
||||
start_ts = seconds_to_timestamp(seg["start"])
|
||||
end_ts = seconds_to_timestamp(seg["end"])
|
||||
lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}")
|
||||
return "\n\n".join(lines)
|
||||
|
||||
|
||||
def process_message(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a single video task described in ``data``.
|
||||
|
||||
Returns the payload to be sent to the upload queue. Raises an
|
||||
exception on failure; the caller is responsible for catching it and
|
||||
posting an error payload.
|
||||
"""
|
||||
filename = data.get("filename")
|
||||
if not filename:
|
||||
raise ValueError("Campo 'filename' ausente na mensagem")
|
||||
url = data.get("url")
|
||||
video_id = data.get("videoId")
|
||||
# Determine source video path; n8n stores videos in the 'videos' directory
|
||||
video_path = os.path.join("videos", filename)
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}")
|
||||
# Sanitize the filename to use as directory name
|
||||
base_no_ext = os.path.splitext(filename)[0]
|
||||
sanitized = sanitize_filename(base_no_ext)
|
||||
work_dir = os.path.join("app", "videos", sanitized)
|
||||
# Transcribe video
|
||||
segments, words = transcribe(video_path, work_dir)
|
||||
# Build SRT string
|
||||
srt_str = build_srt(segments)
|
||||
# Call Gemini to select highlights
|
||||
highlights = select_highlights(srt_str)
|
||||
# Convert start/end times to floats and keep original strings for openrouter
|
||||
for item in highlights:
|
||||
item["start"] = item["start"].strip()
|
||||
item["end"] = item["end"].strip()
|
||||
# Generate titles
|
||||
titles = generate_titles(highlights)
|
||||
# Render clips
|
||||
output_dir = os.path.join("outputs", sanitized)
|
||||
processed_files: List[str] = []
|
||||
for idx, item in enumerate(titles, start=1):
|
||||
start_sec = timestamp_to_seconds(item.get("start"))
|
||||
end_sec = timestamp_to_seconds(item.get("end"))
|
||||
# Extract relative words for caption
|
||||
relative_words = []
|
||||
for w in words:
|
||||
# Word must overlap clip interval
|
||||
if w["end"] <= start_sec or w["start"] >= end_sec:
|
||||
continue
|
||||
rel_start = max(0.0, w["start"] - start_sec)
|
||||
rel_end = max(0.0, w["end"] - start_sec)
|
||||
relative_words.append({
|
||||
"start": rel_start,
|
||||
"end": rel_end,
|
||||
"word": w["word"],
|
||||
})
|
||||
# If no words found (e.g. silence), create a dummy word to avoid errors
|
||||
if not relative_words:
|
||||
relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""})
|
||||
out_path = render_clip(
|
||||
video_path=video_path,
|
||||
start=start_sec,
|
||||
end=end_sec,
|
||||
top_text=item.get("topText", ""),
|
||||
words=relative_words,
|
||||
out_dir=output_dir,
|
||||
base_name=sanitized,
|
||||
idx=idx,
|
||||
)
|
||||
processed_files.append(out_path)
|
||||
# Compose payload
|
||||
payload = {
|
||||
"videosProcessedQuantity": len(processed_files),
|
||||
"filename": filename,
|
||||
"processedFiles": processed_files,
|
||||
"url": url,
|
||||
"videoId": video_id,
|
||||
"hasError": False,
|
||||
"error": None,
|
||||
}
|
||||
# Clean up working directory and original video
|
||||
shutil.rmtree(work_dir, ignore_errors=True)
|
||||
try:
|
||||
os.remove(video_path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return payload
|
||||
|
||||
|
||||
def main():
|
||||
print(" [*] Esperando mensagens. Para sair: CTRL+C")
|
||||
while True:
|
||||
body = get_next_message()
|
||||
if body is None:
|
||||
time.sleep(5)
|
||||
continue
|
||||
try:
|
||||
data = json.loads(body)
|
||||
except Exception:
|
||||
print("⚠️ Mensagem inválida recebida (não é JSON)")
|
||||
continue
|
||||
try:
|
||||
result = process_message(data)
|
||||
except Exception as exc:
|
||||
# Print stack trace for debugging
|
||||
traceback.print_exc()
|
||||
# Attempt to clean up any directories based on filename
|
||||
filename = data.get("filename")
|
||||
sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else ""
|
||||
work_dir = os.path.join("app", "videos", sanitized) if sanitized else None
|
||||
output_dir = os.path.join("outputs", sanitized) if sanitized else None
|
||||
# Remove working and output directories
|
||||
if work_dir:
|
||||
shutil.rmtree(work_dir, ignore_errors=True)
|
||||
if output_dir:
|
||||
shutil.rmtree(output_dir, ignore_errors=True)
|
||||
# Remove original video if present
|
||||
video_path = os.path.join("videos", filename) if filename else None
|
||||
if video_path and os.path.exists(video_path):
|
||||
try:
|
||||
os.remove(video_path)
|
||||
except Exception:
|
||||
pass
|
||||
# Build error payload
|
||||
error_payload = {
|
||||
"videosProcessedQuantity": 0,
|
||||
"filename": filename,
|
||||
"processedFiles": [],
|
||||
"url": data.get("url"),
|
||||
"videoId": data.get("videoId"),
|
||||
"hasError": True,
|
||||
"error": str(exc),
|
||||
}
|
||||
try:
|
||||
publish_to_queue(error_payload)
|
||||
print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
|
||||
except Exception as publish_err:
|
||||
print(f"Erro ao publicar mensagem de erro: {publish_err}")
|
||||
continue
|
||||
# On success publish payload
|
||||
try:
|
||||
publish_to_queue(result)
|
||||
print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.")
|
||||
except Exception as publish_err:
|
||||
print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}")
|
||||
# Loop continues
|
||||
pipeline = VideoPipeline(settings)
|
||||
worker = RabbitMQWorker(settings)
|
||||
worker.consume_forever(pipeline.process_message)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user