Cria novos components

This commit is contained in:
LeoMortari
2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions

4
video_render/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
"""
Core package for the revamped video rendering pipeline.
"""

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

103
video_render/config.py Normal file
View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent.parent
VIDEOS_ROOT = BASE_DIR / "videos"
OUTPUTS_ROOT = BASE_DIR / "outputs"
TEMP_ROOT = BASE_DIR / "temp"
@dataclass(frozen=True)
class RabbitMQSettings:
host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq")
port: int = int(os.environ.get("RABBITMQ_PORT", 5672))
user: str = os.environ.get("RABBITMQ_USER", "admin")
password: str = os.environ.get("RABBITMQ_PASS", "")
consume_queue: str = os.environ.get("RABBITMQ_QUEUE", "to-render")
publish_queue: str = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
prefetch_count: int = int(os.environ.get("RABBITMQ_PREFETCH", 1))
heartbeat: int = int(os.environ.get("RABBITMQ_HEARTBEAT", 60))
blocked_timeout: int = int(os.environ.get("RABBITMQ_BLOCKED_TIMEOUT", 300))
@dataclass(frozen=True)
class GeminiSettings:
api_key: str = os.environ.get("GEMINI_API_KEY", "")
model: str = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro-latest")
safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS")
temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2))
top_k: int | None = (
int(os.environ["GEMINI_TOP_K"]) if os.environ.get("GEMINI_TOP_K") else None
)
top_p: float | None = (
float(os.environ["GEMINI_TOP_P"]) if os.environ.get("GEMINI_TOP_P") else None
)
prompt_path: str = os.environ.get("GEMINI_PROMPT_PATH", "prompts/generate.txt")
@dataclass(frozen=True)
class OpenRouterSettings:
api_key: str = os.environ.get("OPENROUTER_API_KEY", "")
model: str = os.environ.get(
"OPENROUTER_MODEL", "anthropic/claude-3-haiku:beta"
)
temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6))
max_output_tokens: int = int(os.environ.get("OPENROUTER_MAX_OUTPUT_TOKENS", 256))
@dataclass(frozen=True)
class WhisperSettings:
model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium")
device: str | None = os.environ.get("FASTER_WHISPER_DEVICE")
compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE")
download_root: Path = Path(
os.environ.get("FASTER_WHISPER_DOWNLOAD_ROOT", str(BASE_DIR / ".whisper"))
)
@dataclass(frozen=True)
class RenderingSettings:
frame_width: int = int(os.environ.get("RENDER_WIDTH", 1080))
frame_height: int = int(os.environ.get("RENDER_HEIGHT", 1920))
fps: int = int(os.environ.get("RENDER_FPS", 30))
video_codec: str = os.environ.get("RENDER_CODEC", "libx264")
audio_codec: str = os.environ.get("RENDER_AUDIO_CODEC", "aac")
bitrate: str = os.environ.get("RENDER_BITRATE", "5000k")
preset: str = os.environ.get("RENDER_PRESET", "faster")
highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#FFD200")
base_color: str = os.environ.get("SUBTITLE_BASE_COLOR", "#FFFFFF")
font_path: Path = Path(os.environ.get("RENDER_FONT_PATH", "./Montserrat.ttf"))
title_font_size: int = int(os.environ.get("RENDER_TITLE_FONT_SIZE", 110))
subtitle_font_size: int = int(os.environ.get("RENDER_SUBTITLE_FONT_SIZE", 64))
caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 3))
caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 4))
@dataclass(frozen=True)
class Settings:
rabbitmq: RabbitMQSettings = RabbitMQSettings()
gemini: GeminiSettings = GeminiSettings()
openrouter: OpenRouterSettings = OpenRouterSettings()
whisper: WhisperSettings = WhisperSettings()
rendering: RenderingSettings = RenderingSettings()
videos_dir: Path = VIDEOS_ROOT
outputs_dir: Path = OUTPUTS_ROOT
temp_dir: Path = TEMP_ROOT
def load_settings() -> Settings:
settings = Settings()
if not settings.rabbitmq.password:
raise RuntimeError("RABBITMQ_PASS must be provided")
settings.videos_dir.mkdir(parents=True, exist_ok=True)
settings.outputs_dir.mkdir(parents=True, exist_ok=True)
settings.temp_dir.mkdir(parents=True, exist_ok=True)
return settings

54
video_render/ffmpeg.py Normal file
View File

@@ -0,0 +1,54 @@
from __future__ import annotations
import logging
import shlex
import subprocess
from pathlib import Path
from typing import Sequence
logger = logging.getLogger(__name__)
def _run_ffmpeg(args: Sequence[str]) -> None:
cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", *args]
logger.debug("Executando ffmpeg: %s", " ".join(shlex.quote(part) for part in cmd))
completed = subprocess.run(cmd, check=False)
if completed.returncode != 0:
raise RuntimeError(f"ffmpeg falhou com exit code {completed.returncode}")
def extract_audio_to_wav(input_video: Path, output_wav: Path) -> Path:
_run_ffmpeg(
[
"-y",
"-i",
str(input_video),
"-ac",
"1",
"-ar",
"16000",
"-vn",
str(output_wav),
]
)
return output_wav
def create_video_segment(input_video: Path, start: float, end: float, output_path: Path) -> Path:
duration = max(0.01, end - start)
_run_ffmpeg(
[
"-y",
"-i",
str(input_video),
"-ss",
f"{start:.3f}",
"-t",
f"{duration:.3f}",
"-c",
"copy",
str(output_path),
]
)
return output_path

187
video_render/llm.py Normal file
View File

@@ -0,0 +1,187 @@
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Dict, List
import requests
from .config import BASE_DIR, Settings
from .transcription import TranscriptionResult
logger = logging.getLogger(__name__)
GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
class GeminiHighlighter:
def __init__(self, settings: Settings) -> None:
if not settings.gemini.api_key:
raise RuntimeError("GEMINI_API_KEY nao foi definido")
prompt_path = Path(settings.gemini.prompt_path)
if not prompt_path.is_absolute():
prompt_path = BASE_DIR / prompt_path
if not prompt_path.exists():
raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}")
self.prompt_template = prompt_path.read_text(encoding="utf-8")
self.settings = settings
def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
payload = {
"transcript": transcription.full_text,
"segments": [
{
"start": segment.start,
"end": segment.end,
"text": segment.text,
}
for segment in transcription.segments
],
}
body = {
"contents": [
{
"role": "user",
"parts": [
{"text": self.prompt_template},
{"text": json.dumps(payload, ensure_ascii=False)},
],
}
]
}
if self.settings.gemini.temperature is not None:
body["generationConfig"] = {
"temperature": self.settings.gemini.temperature,
}
if self.settings.gemini.top_p is not None:
body["generationConfig"]["topP"] = self.settings.gemini.top_p
if self.settings.gemini.top_k is not None:
body["generationConfig"]["topK"] = self.settings.gemini.top_k
url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
params = {"key": self.settings.gemini.api_key}
response = requests.post(url, params=params, json=body, timeout=120)
response.raise_for_status()
data = response.json()
candidates = data.get("candidates") or []
if not candidates:
raise RuntimeError("Gemini nao retornou candidatos")
text_parts = candidates[0].get("content", {}).get("parts", [])
if not text_parts:
raise RuntimeError("Resposta do Gemini sem conteudo")
raw_text = text_parts[0].get("text")
if not raw_text:
raise RuntimeError("Resposta do Gemini sem texto")
parsed = self._extract_json(raw_text)
highlights = parsed.get("highlights")
if not isinstance(highlights, list):
raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
return highlights
@staticmethod
def _extract_json(response_text: str) -> Dict:
try:
return json.loads(response_text)
except json.JSONDecodeError:
start = response_text.find("{")
end = response_text.rfind("}")
if start == -1 or end == -1:
raise
subset = response_text[start : end + 1]
return json.loads(subset)
class OpenRouterCopywriter:
def __init__(self, settings: Settings) -> None:
if not settings.openrouter.api_key:
raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
self.settings = settings
def generate_titles(self, highlights: List[Dict]) -> List[str]:
if not highlights:
return []
prompt = (
"Voce e um copywriter especializado em titulos curtos e virais para reels.\n"
"Recebera uma lista de trechos destacados de um video com resumo e tempo.\n"
"Produza um titulo envolvente (ate 60 caracteres) para cada item.\n"
"Responda apenas em JSON com a seguinte estrutura:\n"
'{"titles": ["titulo 1", "titulo 2"]}\n'
"Titulos devem ser em portugues, usar verbos fortes e refletir o resumo."
)
user_payload = {
"highlights": [
{
"start": item.get("start"),
"end": item.get("end"),
"summary": item.get("summary"),
}
for item in highlights
]
}
body = {
"model": self.settings.openrouter.model,
"temperature": self.settings.openrouter.temperature,
"max_tokens": self.settings.openrouter.max_output_tokens,
"messages": [
{"role": "system", "content": prompt},
{
"role": "user",
"content": json.dumps(user_payload, ensure_ascii=False),
},
],
}
headers = {
"Authorization": f"Bearer {self.settings.openrouter.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://localhost",
"X-Title": "video-render-pipeline",
}
response = requests.post(
OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120
)
response.raise_for_status()
data = response.json()
choices = data.get("choices") or []
if not choices:
raise RuntimeError("OpenRouter nao retornou escolhas")
message = choices[0].get("message", {}).get("content")
if not message:
raise RuntimeError("Resposta do OpenRouter sem conteudo")
parsed = self._extract_json(message)
titles = parsed.get("titles")
if not isinstance(titles, list):
raise ValueError("Resposta do OpenRouter invalida: campo 'titles'")
return [str(title) for title in titles]
@staticmethod
def _extract_json(response_text: str) -> Dict:
try:
return json.loads(response_text)
except json.JSONDecodeError:
start = response_text.find("{")
end = response_text.rfind("}")
if start == -1 or end == -1:
raise
subset = response_text[start : end + 1]
return json.loads(subset)

View File

@@ -0,0 +1,13 @@
from __future__ import annotations
import logging
import os
def setup_logging() -> None:
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=log_level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)

64
video_render/media.py Normal file
View File

@@ -0,0 +1,64 @@
from __future__ import annotations
import logging
import shutil
from dataclasses import dataclass
from pathlib import Path
from .config import Settings
from .ffmpeg import extract_audio_to_wav
from .utils import ensure_workspace, remove_paths, sanitize_filename
logger = logging.getLogger(__name__)
@dataclass
class VideoWorkspace:
original_filename: str
sanitized_name: str
workspace_dir: Path
output_dir: Path
source_path: Path
working_video_path: Path
audio_path: Path
class MediaPreparer:
def __init__(self, settings: Settings) -> None:
self.settings = settings
def prepare(self, filename: str) -> VideoWorkspace:
source_path = self.settings.videos_dir / filename
if not source_path.exists():
raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {source_path}")
sanitized_name = sanitize_filename(Path(filename).stem)
workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name)
existing_children = list(workspace_dir.iterdir())
if existing_children:
logger.info("Limpando workspace existente para %s", sanitized_name)
remove_paths(existing_children)
destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
working_video_path = workspace_dir / destination_name
shutil.copy2(source_path, working_video_path)
logger.info("Cópia do vídeo criada em %s", working_video_path)
output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
existing_outputs = list(output_dir.iterdir())
if existing_outputs:
remove_paths(existing_outputs)
audio_path = workspace_dir / "audio.wav"
extract_audio_to_wav(working_video_path, audio_path)
return VideoWorkspace(
original_filename=filename,
sanitized_name=sanitized_name,
workspace_dir=workspace_dir,
output_dir=output_dir,
source_path=source_path,
working_video_path=working_video_path,
audio_path=audio_path,
)

85
video_render/messaging.py Normal file
View File

@@ -0,0 +1,85 @@
from __future__ import annotations
import json
import logging
from typing import Any, Callable, Dict
import pika
from .config import Settings
logger = logging.getLogger(__name__)
MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
class RabbitMQWorker:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self._params = pika.ConnectionParameters(
host=settings.rabbitmq.host,
port=settings.rabbitmq.port,
credentials=pika.PlainCredentials(
settings.rabbitmq.user, settings.rabbitmq.password
),
heartbeat=settings.rabbitmq.heartbeat,
blocked_connection_timeout=settings.rabbitmq.blocked_timeout,
)
def consume_forever(self, handler: MessageHandler) -> None:
while True:
try:
with pika.BlockingConnection(self._params) as connection:
channel = connection.channel()
channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True)
channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True)
channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count)
def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body):
try:
message = json.loads(body)
except json.JSONDecodeError:
logger.error("Mensagem inválida recebida: %s", body)
ch.basic_ack(delivery_tag=method.delivery_tag)
return
logger.info("Mensagem recebida: %s", message.get("filename", "<sem_nome>"))
try:
response = handler(message)
except Exception:
logger.exception("Erro não tratado durante o processamento")
response = {
"hasError": True,
"error": "Erro não tratado no pipeline",
"filename": message.get("filename"),
"videoId": message.get("videoId"),
"url": message.get("url"),
"processedFiles": [],
}
try:
payload = json.dumps(response)
ch.basic_publish(
exchange="",
routing_key=self.settings.rabbitmq.publish_queue,
body=payload,
properties=pika.BasicProperties(delivery_mode=2),
)
logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue)
except Exception:
logger.exception("Falha ao publicar a resposta na fila de upload")
finally:
ch.basic_ack(delivery_tag=method.delivery_tag)
channel.basic_consume(
queue=self.settings.rabbitmq.consume_queue,
on_message_callback=_on_message,
auto_ack=False,
)
logger.info("Consumidor iniciado. Aguardando mensagens...")
channel.start_consuming()
except pika.exceptions.AMQPConnectionError:
logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...")
except KeyboardInterrupt:
logger.info("Encerrando consumidor por interrupção do usuário.")
break

236
video_render/pipeline.py Normal file
View File

@@ -0,0 +1,236 @@
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
from .config import Settings
from .llm import GeminiHighlighter, OpenRouterCopywriter
from .media import MediaPreparer, VideoWorkspace
from .transcription import TranscriptionResult, TranscriptionService
from .utils import remove_paths, sanitize_filename
from .rendering import VideoRenderer
logger = logging.getLogger(__name__)
@dataclass
class JobMessage:
filename: str
url: Optional[str]
video_id: Optional[str]
extras: Dict[str, Any] = field(default_factory=dict)
@dataclass
class HighlightWindow:
start: float
end: float
summary: str
title: Optional[str] = None
@dataclass
class RenderedClip:
path: Path
start: float
end: float
title: str
summary: str
index: int
@dataclass
class PipelineContext:
job: JobMessage
workspace: Optional[VideoWorkspace] = None
transcription: Optional[TranscriptionResult] = None
highlight_windows: List[HighlightWindow] = field(default_factory=list)
rendered_clips: List[RenderedClip] = field(default_factory=list)
class VideoPipeline:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.media_preparer = MediaPreparer(settings)
self.transcriber = TranscriptionService(settings)
self.highlighter = GeminiHighlighter(settings)
self.copywriter = OpenRouterCopywriter(settings)
self.renderer = VideoRenderer(settings)
def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
context = PipelineContext(job=self._parse_job(message))
try:
self._prepare_workspace(context)
self._generate_transcription(context)
self._determine_highlights(context)
self._generate_titles(context)
self._render_clips(context)
return self._build_success_payload(context)
except Exception as exc:
logger.exception("Falha ao processar vídeo %s", context.job.filename)
return self._handle_failure(context, exc)
def _parse_job(self, message: Dict[str, Any]) -> JobMessage:
filename = message.get("filename")
if not filename:
raise ValueError("Mensagem inválida: 'filename' é obrigatório")
url = message.get("url")
video_id = message.get("videoId") or message.get("video_id")
extras = {
key: value
for key, value in message.items()
if key not in {"filename", "url", "videoId", "video_id"}
}
return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras)
def _prepare_workspace(self, context: PipelineContext) -> None:
context.workspace = self.media_preparer.prepare(context.job.filename)
def _generate_transcription(self, context: PipelineContext) -> None:
if not context.workspace:
raise RuntimeError("Workspace não preparado")
transcription = self.transcriber.transcribe(context.workspace.audio_path)
TranscriptionService.persist(transcription, context.workspace.workspace_dir)
context.transcription = transcription
def _determine_highlights(self, context: PipelineContext) -> None:
if not context.transcription:
raise RuntimeError("Transcricao nao disponivel")
highlights_raw = self.highlighter.generate_highlights(context.transcription)
windows: List[HighlightWindow] = []
for item in highlights_raw:
try:
start = float(item.get("start", 0)) # type: ignore[arg-type]
end = float(item.get("end", start)) # type: ignore[arg-type]
except (TypeError, ValueError):
logger.warning("Highlight invalido ignorado: %s", item)
continue
summary = str(item.get("summary", "")).strip()
if end <= start:
logger.debug("Highlight com intervalo invalido ignorado: %s", item)
continue
windows.append(HighlightWindow(start=start, end=end, summary=summary))
if not windows:
last_end = (
context.transcription.segments[-1].end
if context.transcription.segments
else 0
)
windows.append(
HighlightWindow(
start=0.0,
end=max(last_end, 10.0),
summary="Sem destaque identificado; fallback automatico.",
)
)
context.highlight_windows = windows
def _generate_titles(self, context: PipelineContext) -> None:
if not context.highlight_windows:
return
highlight_dicts = [
{"start": window.start, "end": window.end, "summary": window.summary}
for window in context.highlight_windows
]
titles = self.copywriter.generate_titles(highlight_dicts)
for window, title in zip(context.highlight_windows, titles):
window.title = title.strip()
def _render_clips(self, context: PipelineContext) -> None:
if not context.workspace or not context.highlight_windows or not context.transcription:
return
titles = [
window.title or window.summary for window in context.highlight_windows
]
render_results = self.renderer.render(
workspace_path=str(context.workspace.working_video_path),
highlight_windows=context.highlight_windows,
transcription=context.transcription,
titles=titles,
output_dir=context.workspace.output_dir,
)
context.rendered_clips = [
RenderedClip(
path=Path(path),
start=start,
end=end,
title=title,
summary=summary,
index=index,
)
for path, start, end, title, summary, index in render_results
]
def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]:
return {
"hasError": False,
"videosProcessedQuantity": len(context.rendered_clips),
"filename": context.job.filename,
"videoId": context.job.video_id,
"url": context.job.url,
"workspaceFolder": context.workspace.sanitized_name if context.workspace else None,
"outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None,
"processedFiles": [
{
"path": self._relative_path(clip.path),
"start": clip.start,
"end": clip.end,
"title": clip.title,
"summary": clip.summary,
"clipIndex": clip.index,
}
for clip in context.rendered_clips
],
}
def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]:
logger.error("Erro no pipeline: %s", exc)
cleanup_targets: List[Path] = []
if context.workspace:
cleanup_targets.append(context.workspace.workspace_dir)
cleanup_targets.append(context.workspace.output_dir)
original_path = context.workspace.source_path
if original_path.exists():
cleanup_targets.append(original_path)
else:
sanitized = sanitize_filename(Path(context.job.filename).stem)
job_output_dir = self.settings.outputs_dir / sanitized
if job_output_dir.exists():
cleanup_targets.append(job_output_dir)
original_path = self.settings.videos_dir / context.job.filename
if original_path.exists():
cleanup_targets.append(original_path)
remove_paths(cleanup_targets)
return {
"hasError": True,
"error": str(exc),
"filename": context.job.filename,
"videoId": context.job.video_id,
"url": context.job.url,
"processedFiles": [],
}
def _relative_path(self, path: Path) -> str:
base = self.settings.videos_dir.parent
try:
return str(path.relative_to(base))
except ValueError:
return str(path)

406
video_render/rendering.py Normal file
View File

@@ -0,0 +1,406 @@
from __future__ import annotations
import logging
import math
import re
from dataclasses import dataclass
from typing import Iterable, List, Sequence, Tuple
import numpy as np
from moviepy.editor import (
ColorClip,
CompositeVideoClip,
ImageClip,
TextClip,
VideoFileClip,
)
from PIL import Image, ImageColor, ImageDraw, ImageFont
from .config import Settings
from .transcription import TranscriptionResult, WordTiming
logger = logging.getLogger(__name__)
def clamp_time(value: float, minimum: float = 0.0) -> float:
return max(minimum, float(value))
@dataclass
class CaptionClipSet:
base: ImageClip
highlights: List[ImageClip]
class CaptionBuilder:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.font_path = settings.rendering.font_path
if not self.font_path.exists():
raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}")
self.font = ImageFont.truetype(
str(self.font_path), settings.rendering.subtitle_font_size
)
self.base_color = ImageColor.getrgb(settings.rendering.base_color)
self.highlight_color = ImageColor.getrgb(settings.rendering.highlight_color)
self.canvas_width = settings.rendering.frame_width - 160
self.canvas_height = int(settings.rendering.subtitle_font_size * 2.2)
self.min_words = settings.rendering.caption_min_words
self.max_words = settings.rendering.caption_max_words
bbox = self.font.getbbox("Ay")
self.text_height = bbox[3] - bbox[1]
self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1]
self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0]
def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]:
grouped = self._group_words(words)
clip_sets: List[CaptionClipSet] = []
for group in grouped:
group_start = clamp_time(group[0].start, minimum=clip_start)
group_end = clamp_time(group[-1].end, minimum=group_start + 0.05)
duration = max(0.05, group_end - group_start)
start_offset = group_start - clip_start
base_image, highlight_images = self._render_group(group)
base_clip = (
ImageClip(np.array(base_image))
.with_start(start_offset)
.with_duration(duration)
)
highlight_clips: List[ImageClip] = []
for word, image in zip(group, highlight_images):
h_start = clamp_time(word.start, minimum=clip_start) - clip_start
h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start
h_duration = max(0.05, h_end - h_start)
highlight_clip = (
ImageClip(np.array(image))
.with_start(h_start)
.with_duration(h_duration)
)
highlight_clips.append(highlight_clip)
clip_sets.append(CaptionClipSet(base=base_clip, highlights=highlight_clips))
return clip_sets
def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]:
texts = [self._clean_word(word.word) for word in group]
widths = []
for text in texts:
bbox = self.font.getbbox(text)
widths.append(bbox[2] - bbox[0])
total_width = sum(widths)
if len(widths) > 1:
total_width += self.space_width * (len(widths) - 1)
start_x = max(0, (self.canvas_width - total_width) // 2)
base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0))
base_draw = ImageDraw.Draw(base_image)
highlight_images: List[Image.Image] = []
x = start_x
for text, width in zip(texts, widths):
base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color)
highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
highlight_draw = ImageDraw.Draw(highlight_image)
highlight_draw.text(
(x, self.baseline), text, font=self.font, fill=self.highlight_color
)
highlight_images.append(highlight_image)
x += width + self.space_width
return base_image, highlight_images
def _group_words(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]:
if not words:
return []
grouped: List[List[WordTiming]] = []
buffer: List[WordTiming] = []
for word in words:
buffer.append(word)
if len(buffer) == self.max_words:
grouped.append(buffer)
buffer = []
if buffer:
if len(buffer) == 1 and grouped:
grouped[-1].extend(buffer)
else:
grouped.append(buffer)
# Rebalance groups to respect minimum size when possible
for idx, group in enumerate(grouped[:-1]):
if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words:
deficit = self.min_words - len(group)
transfer = grouped[idx + 1][:deficit]
grouped[idx] = group + transfer
grouped[idx + 1] = grouped[idx + 1][deficit:]
grouped = [grp for grp in grouped if grp]
return grouped
@staticmethod
def _clean_word(text: str) -> str:
text = text.strip()
text = re.sub(r"\s+", " ", text)
return text or "..."
class VideoRenderer:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self.captions = CaptionBuilder(settings)
def render(
self,
workspace_path: str,
highlight_windows: Sequence,
transcription: TranscriptionResult,
titles: Sequence[str],
output_dir,
) -> List[Tuple[str, float, float, str, str, int]]:
results: List[Tuple[str, float, float, str, str, int]] = []
with VideoFileClip(workspace_path) as base_clip:
video_duration = base_clip.duration or 0
for index, window in enumerate(highlight_windows, start=1):
start = clamp_time(window.start)
end = clamp_time(window.end)
start = min(start, video_duration)
end = min(end, video_duration)
if end <= start:
logger.info("Janela ignorada por intervalo invalido: %s", window)
continue
subclip = base_clip.subclipped(start, end)
try:
rendered_path = self._render_single_clip(
subclip=subclip,
start=start,
end=end,
title=titles[index - 1] if index - 1 < len(titles) else window.summary,
summary=window.summary,
index=index,
transcription=transcription,
output_dir=output_dir,
)
finally:
subclip.close()
results.append(
(
rendered_path,
float(start),
float(end),
titles[index - 1] if index - 1 < len(titles) else window.summary,
window.summary,
index,
)
)
return results
def _render_single_clip(
self,
subclip: VideoFileClip,
start: float,
end: float,
title: str,
summary: str,
index: int,
transcription: TranscriptionResult,
output_dir,
) -> str:
duration = end - start
frame_w = self.settings.rendering.frame_width
frame_h = self.settings.rendering.frame_height
top_h = int(frame_h * 0.18)
bottom_h = int(frame_h * 0.20)
video_area_h = frame_h - top_h - bottom_h
scale_factor = min(
frame_w / subclip.w,
video_area_h / subclip.h,
)
resized_clip = subclip.resized(scale_factor)
video_y = top_h + (video_area_h - resized_clip.h) // 2
video_clip = resized_clip.with_position(
((frame_w - resized_clip.w) // 2, video_y)
)
background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration)
top_panel = (
ColorClip(size=(frame_w, top_h), color=(12, 12, 12))
.with_duration(duration)
.with_opacity(0.85)
)
bottom_panel = (
ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12))
.with_position((0, frame_h - bottom_h))
.with_duration(duration)
.with_opacity(0.85)
)
title_text = title or summary
wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160)
title_clip = (
TextClip(
text=wrapped_title,
font=str(self.settings.rendering.font_path),
font_size=self.settings.rendering.title_font_size,
color=self.settings.rendering.base_color,
method="caption",
size=(frame_w - 160, top_h - 40),
)
.with_duration(duration)
)
title_clip = title_clip.with_position(
((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
)
words = self._collect_words(transcription, start, end)
caption_sets = self.captions.build(words, clip_start=start)
caption_clips = []
caption_resources: List[ImageClip] = []
caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2
for clip_set in caption_sets:
base_positioned = clip_set.base.with_position(("center", caption_y))
caption_clips.append(base_positioned)
caption_resources.append(clip_set.base)
for highlight in clip_set.highlights:
positioned = highlight.with_position(("center", caption_y))
caption_clips.append(positioned)
caption_resources.append(highlight)
if not caption_clips:
fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
caption_clips.append(
TextClip(
text=fallback_text,
font=str(self.settings.rendering.font_path),
font_size=self.settings.rendering.subtitle_font_size,
color=self.settings.rendering.base_color,
method="caption",
size=(frame_w - 160, bottom_h - 40),
)
.with_duration(duration)
.with_position(("center", caption_y))
)
composite = CompositeVideoClip(
[background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
size=(frame_w, frame_h),
)
output_path = output_dir / f"clip_{index:02d}.mp4"
composite.write_videofile(
str(output_path),
codec=self.settings.rendering.video_codec,
audio_codec=self.settings.rendering.audio_codec,
fps=self.settings.rendering.fps,
bitrate=self.settings.rendering.bitrate,
ffmpeg_params=[
"-preset",
self.settings.rendering.preset,
"-pix_fmt",
"yuv420p",
],
temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
remove_temp=True,
threads=4,
)
composite.close()
resized_clip.close()
video_clip.close()
title_clip.close()
background.close()
top_panel.close()
bottom_panel.close()
for clip in caption_clips:
clip.close()
for clip in caption_resources:
clip.close()
return str(output_path)
def _collect_words(
self, transcription: TranscriptionResult, start: float, end: float
) -> List[WordTiming]:
collected: List[WordTiming] = []
for segment in transcription.segments:
if segment.end < start or segment.start > end:
continue
if segment.words:
for word in segment.words:
if word.end < start or word.start > end:
continue
collected.append(
WordTiming(
start=max(start, word.start),
end=min(end, word.end),
word=word.word,
)
)
else:
collected.extend(self._fallback_words(segment.text, segment.start, segment.end, start, end))
collected.sort(key=lambda w: w.start)
return collected
def _fallback_words(
self,
text: str,
segment_start: float,
segment_end: float,
window_start: float,
window_end: float,
) -> Iterable[WordTiming]:
words = [w for w in re.split(r"\s+", text.strip()) if w]
if not words:
return []
seg_start = max(segment_start, window_start)
seg_end = min(segment_end, window_end)
duration = max(0.01, seg_end - seg_start)
step = duration / len(words)
timings: List[WordTiming] = []
for idx, word in enumerate(words):
w_start = seg_start + idx * step
w_end = min(seg_end, w_start + step)
timings.append(WordTiming(start=w_start, end=w_end, word=word))
return timings
@staticmethod
def _wrap_text(text: str, max_width: int) -> str:
text = text.strip()
if not text:
return ""
words = text.split()
lines: List[str] = []
current: List[str] = []
for word in words:
current.append(word)
if len(" ".join(current)) > max_width // 18:
lines.append(" ".join(current[:-1]))
current = [current[-1]]
if current:
lines.append(" ".join(current))
return "\n".join(lines)

View File

@@ -0,0 +1,122 @@
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional
from faster_whisper import WhisperModel
from .config import Settings
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class WordTiming:
start: float
end: float
word: str
@dataclass(frozen=True)
class TranscriptSegment:
id: int
start: float
end: float
text: str
words: List[WordTiming]
@dataclass(frozen=True)
class TranscriptionResult:
segments: List[TranscriptSegment]
full_text: str
class TranscriptionService:
def __init__(self, settings: Settings) -> None:
self.settings = settings
self._model: Optional[WhisperModel] = None
def _load_model(self) -> WhisperModel:
if self._model is None:
logger.info(
"Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)",
self.settings.whisper.model_size,
self.settings.whisper.device or "auto",
self.settings.whisper.compute_type or "default",
)
self._model = WhisperModel(
self.settings.whisper.model_size,
device=self.settings.whisper.device or "auto",
compute_type=self.settings.whisper.compute_type or "default",
download_root=str(self.settings.whisper.download_root),
)
return self._model
def transcribe(self, audio_path: Path) -> TranscriptionResult:
model = self._load_model()
segments, _ = model.transcribe(
str(audio_path),
beam_size=5,
word_timestamps=True,
)
parsed_segments: List[TranscriptSegment] = []
full_text_parts: List[str] = []
for idx, segment in enumerate(segments):
words = [
WordTiming(start=w.start, end=w.end, word=w.word.strip())
for w in segment.words or []
if w.word.strip()
]
text = segment.text.strip()
full_text_parts.append(text)
parsed_segments.append(
TranscriptSegment(
id=idx,
start=segment.start,
end=segment.end,
text=text,
words=words,
)
)
return TranscriptionResult(
segments=parsed_segments,
full_text=" ".join(full_text_parts).strip(),
)
@staticmethod
def persist(result: TranscriptionResult, destination: Path) -> None:
json_path = destination / "transcription.json"
text_path = destination / "transcription.txt"
payload = {
"segments": [
{
"id": segment.id,
"start": segment.start,
"end": segment.end,
"text": segment.text,
"words": [
{"start": word.start, "end": word.end, "text": word.word}
for word in segment.words
],
}
for segment in result.segments
],
"full_text": result.full_text,
}
with json_path.open("w", encoding="utf-8") as fp:
json.dump(payload, fp, ensure_ascii=False, indent=2)
with text_path.open("w", encoding="utf-8") as fp:
fp.write(result.full_text)
logger.info("Transcrição salva em %s", destination)

38
video_render/utils.py Normal file
View File

@@ -0,0 +1,38 @@
from __future__ import annotations
import re
import unicodedata
from pathlib import Path
from typing import Iterable
def sanitize_filename(name: str) -> str:
normalized = unicodedata.normalize("NFKD", name)
ascii_text = normalized.encode("ASCII", "ignore").decode()
ascii_text = ascii_text.lower()
ascii_text = ascii_text.replace(" ", "_")
ascii_text = re.sub(r"[^a-z0-9_\-\.]", "", ascii_text)
ascii_text = re.sub(r"_+", "_", ascii_text)
return ascii_text.strip("_") or "video"
def ensure_workspace(root: Path, folder_name: str) -> Path:
workspace = root / folder_name
workspace.mkdir(parents=True, exist_ok=True)
return workspace
def remove_paths(paths: Iterable[Path]) -> None:
for path in paths:
if not path.exists():
continue
if path.is_file() or path.is_symlink():
path.unlink(missing_ok=True)
else:
for child in sorted(path.rglob("*"), reverse=True):
if child.is_file() or child.is_symlink():
child.unlink(missing_ok=True)
elif child.is_dir():
child.rmdir()
path.rmdir()