Cria novos components

2025-10-20 17:56:36 -03:00
parent 2b99d2ad78
commit b090f7c2cb
38 changed files with 1391 additions and 1024 deletions
--- a/video_render/pipeline.py
+++ b/video_render/pipeline.py
@@ -0,0 +1,236 @@
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from .config import Settings
+from .llm import GeminiHighlighter, OpenRouterCopywriter
+from .media import MediaPreparer, VideoWorkspace
+from .transcription import TranscriptionResult, TranscriptionService
+from .utils import remove_paths, sanitize_filename
+from .rendering import VideoRenderer
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class JobMessage:
+    filename: str
+    url: Optional[str]
+    video_id: Optional[str]
+    extras: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class HighlightWindow:
+    start: float
+    end: float
+    summary: str
+    title: Optional[str] = None
+
+
+@dataclass
+class RenderedClip:
+    path: Path
+    start: float
+    end: float
+    title: str
+    summary: str
+    index: int
+
+
+@dataclass
+class PipelineContext:
+    job: JobMessage
+    workspace: Optional[VideoWorkspace] = None
+    transcription: Optional[TranscriptionResult] = None
+    highlight_windows: List[HighlightWindow] = field(default_factory=list)
+    rendered_clips: List[RenderedClip] = field(default_factory=list)
+
+
+class VideoPipeline:
+    def __init__(self, settings: Settings) -> None:
+        self.settings = settings
+        self.media_preparer = MediaPreparer(settings)
+        self.transcriber = TranscriptionService(settings)
+        self.highlighter = GeminiHighlighter(settings)
+        self.copywriter = OpenRouterCopywriter(settings)
+        self.renderer = VideoRenderer(settings)
+
+    def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
+        context = PipelineContext(job=self._parse_job(message))
+        try:
+            self._prepare_workspace(context)
+            self._generate_transcription(context)
+            self._determine_highlights(context)
+            self._generate_titles(context)
+            self._render_clips(context)
+            return self._build_success_payload(context)
+        except Exception as exc:
+            logger.exception("Falha ao processar vídeo %s", context.job.filename)
+            return self._handle_failure(context, exc)
+
+    def _parse_job(self, message: Dict[str, Any]) -> JobMessage:
+        filename = message.get("filename")
+        if not filename:
+            raise ValueError("Mensagem inválida: 'filename' é obrigatório")
+
+        url = message.get("url")
+        video_id = message.get("videoId") or message.get("video_id")
+        extras = {
+            key: value
+            for key, value in message.items()
+            if key not in {"filename", "url", "videoId", "video_id"}
+        }
+        return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras)
+
+    def _prepare_workspace(self, context: PipelineContext) -> None:
+        context.workspace = self.media_preparer.prepare(context.job.filename)
+
+    def _generate_transcription(self, context: PipelineContext) -> None:
+        if not context.workspace:
+            raise RuntimeError("Workspace não preparado")
+        transcription = self.transcriber.transcribe(context.workspace.audio_path)
+        TranscriptionService.persist(transcription, context.workspace.workspace_dir)
+        context.transcription = transcription
+
+    def _determine_highlights(self, context: PipelineContext) -> None:
+        if not context.transcription:
+            raise RuntimeError("Transcricao nao disponivel")
+
+        highlights_raw = self.highlighter.generate_highlights(context.transcription)
+        windows: List[HighlightWindow] = []
+
+        for item in highlights_raw:
+            try:
+                start = float(item.get("start", 0))  # type: ignore[arg-type]
+                end = float(item.get("end", start))  # type: ignore[arg-type]
+            except (TypeError, ValueError):
+                logger.warning("Highlight invalido ignorado: %s", item)
+                continue
+
+            summary = str(item.get("summary", "")).strip()
+            if end <= start:
+                logger.debug("Highlight com intervalo invalido ignorado: %s", item)
+                continue
+
+            windows.append(HighlightWindow(start=start, end=end, summary=summary))
+
+        if not windows:
+            last_end = (
+                context.transcription.segments[-1].end
+                if context.transcription.segments
+                else 0
+            )
+            windows.append(
+                HighlightWindow(
+                    start=0.0,
+                    end=max(last_end, 10.0),
+                    summary="Sem destaque identificado; fallback automatico.",
+                )
+            )
+
+        context.highlight_windows = windows
+
+    def _generate_titles(self, context: PipelineContext) -> None:
+        if not context.highlight_windows:
+            return
+
+        highlight_dicts = [
+            {"start": window.start, "end": window.end, "summary": window.summary}
+            for window in context.highlight_windows
+        ]
+        titles = self.copywriter.generate_titles(highlight_dicts)
+
+        for window, title in zip(context.highlight_windows, titles):
+            window.title = title.strip()
+
+
+    def _render_clips(self, context: PipelineContext) -> None:
+        if not context.workspace or not context.highlight_windows or not context.transcription:
+            return
+
+        titles = [
+            window.title or window.summary for window in context.highlight_windows
+        ]
+
+        render_results = self.renderer.render(
+            workspace_path=str(context.workspace.working_video_path),
+            highlight_windows=context.highlight_windows,
+            transcription=context.transcription,
+            titles=titles,
+            output_dir=context.workspace.output_dir,
+        )
+
+        context.rendered_clips = [
+            RenderedClip(
+                path=Path(path),
+                start=start,
+                end=end,
+                title=title,
+                summary=summary,
+                index=index,
+            )
+            for path, start, end, title, summary, index in render_results
+        ]
+
+    def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]:
+        return {
+            "hasError": False,
+            "videosProcessedQuantity": len(context.rendered_clips),
+            "filename": context.job.filename,
+            "videoId": context.job.video_id,
+            "url": context.job.url,
+            "workspaceFolder": context.workspace.sanitized_name if context.workspace else None,
+            "outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None,
+            "processedFiles": [
+                {
+                    "path": self._relative_path(clip.path),
+                    "start": clip.start,
+                    "end": clip.end,
+                    "title": clip.title,
+                    "summary": clip.summary,
+                    "clipIndex": clip.index,
+                }
+                for clip in context.rendered_clips
+            ],
+        }
+
+    def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]:
+        logger.error("Erro no pipeline: %s", exc)
+        cleanup_targets: List[Path] = []
+
+        if context.workspace:
+            cleanup_targets.append(context.workspace.workspace_dir)
+            cleanup_targets.append(context.workspace.output_dir)
+            original_path = context.workspace.source_path
+            if original_path.exists():
+                cleanup_targets.append(original_path)
+        else:
+            sanitized = sanitize_filename(Path(context.job.filename).stem)
+            job_output_dir = self.settings.outputs_dir / sanitized
+            if job_output_dir.exists():
+                cleanup_targets.append(job_output_dir)
+            original_path = self.settings.videos_dir / context.job.filename
+            if original_path.exists():
+                cleanup_targets.append(original_path)
+
+        remove_paths(cleanup_targets)
+
+        return {
+            "hasError": True,
+            "error": str(exc),
+            "filename": context.job.filename,
+            "videoId": context.job.video_id,
+            "url": context.job.url,
+            "processedFiles": [],
+        }
+
+    def _relative_path(self, path: Path) -> str:
+        base = self.settings.videos_dir.parent
+        try:
+            return str(path.relative_to(base))
+        except ValueError:
+            return str(path)