diff --git a/.gitignore b/.gitignore index 64e5617..7a2b6cf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ *.pyc *.pyo *.pyd -__pycache__/ +/__pycache__/ *.egg-info/ .eggs/ dist/ @@ -10,7 +10,7 @@ build/ doc/ videos/ outputs/ - +.DS_STORE # Ignore virtual envs venv/ env/ diff --git a/docker-compose.yml b/docker-compose.yml index 9fe75ac..338e355 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ services: video-render: restart: unless-stopped build: . - container_name: video-render environment: # - RABBITMQ_PASS=${RABBITMQ_PASS} - RABBITMQ_PASS=L@l321321321 @@ -10,7 +9,7 @@ services: - RABBITMQ_PORT=32790 # - GEMINI_API_KEY=${GEMINI_API_KEY} - GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw - - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} + - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash} # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - OPENROUTER_API_KEY=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 - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} diff --git a/requirements.txt b/requirements.txt index 1593182..f38966b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ pillow==9.5.0 numpy>=1.26.0 requests pika -faster-whisper==1.2.0 \ No newline at end of file +faster-whisper==1.2.0 +google-genai diff --git a/video_render/__pycache__/llm.cpython-39.pyc b/video_render/__pycache__/llm.cpython-39.pyc index 72379c3..2199266 100644 Binary files a/video_render/__pycache__/llm.cpython-39.pyc and b/video_render/__pycache__/llm.cpython-39.pyc differ diff --git a/video_render/__pycache__/rendering.cpython-39.pyc b/video_render/__pycache__/rendering.cpython-39.pyc index 18da5d3..6577a62 100644 Binary files a/video_render/__pycache__/rendering.cpython-39.pyc and b/video_render/__pycache__/rendering.cpython-39.pyc differ diff --git a/video_render/llm.py b/video_render/llm.py index de6c4ae..84d2d4f 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -3,8 +3,10 @@ from __future__ import annotations import json import logging from pathlib import Path -from typing import Dict, List +from typing import Any, Dict, List, Optional +from google import genai +from google.genai import types as genai_types import requests from video_render.config import BASE_DIR, Settings @@ -12,7 +14,6 @@ from video_render.transcription import TranscriptionResult logger = logging.getLogger(__name__) -GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions" @@ -31,6 +32,7 @@ class GeminiHighlighter: self.prompt_template = prompt_path.read_text(encoding="utf-8") self.settings = settings + self.client = genai.Client() def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]: payload = { @@ -45,45 +47,13 @@ class GeminiHighlighter: ], } - body = { - "contents": [ - { - "role": "user", - "parts": [ - {"text": self.prompt_template}, - {"text": json.dumps(payload, ensure_ascii=False)}, - ], - } - ] - } + try: + response = self._call_gemini(payload) + except Exception as exc: + logger.error("Gemini API request falhou: %s", exc) + raise RuntimeError("Gemini API request falhou") from exc - if self.settings.gemini.temperature is not None: - body["generationConfig"] = { - "temperature": self.settings.gemini.temperature, - } - if self.settings.gemini.top_p is not None: - body["generationConfig"]["topP"] = self.settings.gemini.top_p - if self.settings.gemini.top_k is not None: - body["generationConfig"]["topK"] = self.settings.gemini.top_k - - url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model) - params = {"key": self.settings.gemini.api_key} - - response = requests.post(url, params=params, json=body, timeout=120) - response.raise_for_status() - data = response.json() - - candidates = data.get("candidates") or [] - if not candidates: - raise RuntimeError("Gemini nao retornou candidatos") - - text_parts = candidates[0].get("content", {}).get("parts", []) - if not text_parts: - raise RuntimeError("Resposta do Gemini sem conteudo") - - raw_text = text_parts[0].get("text") - if not raw_text: - raise RuntimeError("Resposta do Gemini sem texto") + raw_text = self._extract_response_text(response) parsed = self._extract_json(raw_text) highlights = parsed.get("highlights") @@ -91,6 +61,61 @@ class GeminiHighlighter: raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente") return highlights + def _call_gemini(self, payload: Dict[str, Any]) -> Any: + contents = [ + { + "role": "user", + "parts": [ + {"text": self.prompt_template}, + {"text": json.dumps(payload, ensure_ascii=False)}, + ], + } + ] + + request_kwargs: Dict[str, Any] = { + "model": self.settings.gemini.model, + "contents": contents, + } + + config = self._build_generation_config() + if config is not None: + request_kwargs["config"] = config + + return self.client.models.generate_content(**request_kwargs) + + def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]: + config_kwargs: Dict[str, Any] = {} + if self.settings.gemini.temperature is not None: + config_kwargs["temperature"] = self.settings.gemini.temperature + if self.settings.gemini.top_p is not None: + config_kwargs["top_p"] = self.settings.gemini.top_p + if self.settings.gemini.top_k is not None: + config_kwargs["top_k"] = self.settings.gemini.top_k + + if not config_kwargs: + return None + + return genai_types.GenerateContentConfig(**config_kwargs) + + @staticmethod + def _extract_response_text(response: Any) -> str: + text = getattr(response, "text", None) + if text: + return str(text).strip() + + candidates = getattr(response, "candidates", None) or [] + for candidate in candidates: + content = getattr(candidate, "content", None) + if not content: + continue + parts = getattr(content, "parts", None) or [] + for part in parts: + part_text = getattr(part, "text", None) + if part_text: + return str(part_text).strip() + + raise RuntimeError("Resposta do Gemini sem texto") + @staticmethod def _extract_json(response_text: str) -> Dict: try: @@ -160,10 +185,6 @@ class OpenRouterCopywriter: response.raise_for_status() data = response.json() choices = data.get("choices") or [] - print("Data:") - print(data) - print("Choices:") - print(choices) if not choices: raise RuntimeError("OpenRouter nao retornou escolhas") diff --git a/video_render/media.py b/video_render/media.py index 7fb878e..a79dd4f 100644 --- a/video_render/media.py +++ b/video_render/media.py @@ -38,7 +38,7 @@ class MediaPreparer: existing_children = list(workspace_dir.iterdir()) if existing_children: logger.info("Limpando workspace existente para %s", sanitized_name) - remove_paths(existing_children) + # remove_paths(existing_children) destination_name = f"{sanitized_name}{source_path.suffix.lower()}" working_video_path = workspace_dir / destination_name @@ -46,9 +46,9 @@ class MediaPreparer: logger.info("Cópia do vídeo criada em %s", working_video_path) output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name) - existing_outputs = list(output_dir.iterdir()) - if existing_outputs: - remove_paths(existing_outputs) + # existing_outputs = list(output_dir.iterdir()) + # if existing_outputs: + # remove_paths(existing_outputs) audio_path = workspace_dir / "audio.wav" extract_audio_to_wav(working_video_path, audio_path) diff --git a/video_render/messaging.py b/video_render/messaging.py index c37058d..b61599c 100644 --- a/video_render/messaging.py +++ b/video_render/messaging.py @@ -15,7 +15,6 @@ MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] class RabbitMQWorker: def __init__(self, settings: Settings) -> None: - print(settings) self.settings = settings self._params = pika.ConnectionParameters( host=settings.rabbitmq.host, diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 6bd6689..c8e309e 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -101,7 +101,15 @@ class VideoPipeline: if not context.transcription: raise RuntimeError("Transcricao nao disponivel") - highlights_raw = self.highlighter.generate_highlights(context.transcription) + try: + highlights_raw = self.highlighter.generate_highlights(context.transcription) + except Exception: + logger.exception( + "Falha ao gerar destaques com Gemini; aplicando fallback padrao." + ) + context.highlight_windows = [self._build_fallback_highlight(context)] + return + windows: List[HighlightWindow] = [] for item in highlights_raw: @@ -120,18 +128,7 @@ class VideoPipeline: windows.append(HighlightWindow(start=start, end=end, summary=summary)) if not windows: - last_end = ( - context.transcription.segments[-1].end - if context.transcription.segments - else 0 - ) - windows.append( - HighlightWindow( - start=0.0, - end=max(last_end, 10.0), - summary="Sem destaque identificado; fallback automatico.", - ) - ) + windows.append(self._build_fallback_highlight(context)) context.highlight_windows = windows @@ -148,6 +145,20 @@ class VideoPipeline: for window, title in zip(context.highlight_windows, titles): window.title = title.strip() + def _build_fallback_highlight(self, context: PipelineContext) -> HighlightWindow: + if not context.transcription: + raise RuntimeError("Transcricao nao disponivel para criar fallback") + + last_end = ( + context.transcription.segments[-1].end + if context.transcription.segments + else 0.0 + ) + return HighlightWindow( + start=0.0, + end=max(last_end, 10.0), + summary="Sem destaque identificado; fallback automatico.", + ) def _render_clips(self, context: PipelineContext) -> None: if not context.workspace or not context.highlight_windows or not context.transcription: diff --git a/video_render/rendering.py b/video_render/rendering.py index f09ab87..723f17d 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -267,6 +267,7 @@ class VideoRenderer: color=self.settings.rendering.base_color, method="caption", size=(frame_w - 160, top_h - 40), + align="center", ) .with_duration(duration) ) @@ -279,8 +280,18 @@ class VideoRenderer: caption_clips = [] caption_resources: List[ImageClip] = [] - margin = 20 - caption_y = max(0, video_y - self.captions.canvas_height - margin) + caption_area_top = frame_h - bottom_h + caption_area_height = bottom_h + caption_margin = 20 + raw_caption_y = caption_area_top + (caption_area_height - self.captions.canvas_height) // 2 + min_caption_y = caption_area_top + caption_margin + max_caption_y = ( + caption_area_top + caption_area_height - self.captions.canvas_height - caption_margin + ) + if max_caption_y < min_caption_y: + caption_y = min_caption_y + else: + caption_y = min(max(raw_caption_y, min_caption_y), max_caption_y) for clip_set in caption_sets: base_positioned = clip_set.base.with_position(("center", caption_y)) @@ -300,6 +311,7 @@ class VideoRenderer: font_size=self.settings.rendering.subtitle_font_size, color=self.settings.rendering.base_color, method="caption", + align="center", size=(frame_w - 160, max(40, self.captions.canvas_height)), ) .with_duration(duration) @@ -310,6 +322,9 @@ class VideoRenderer: [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], size=(frame_w, frame_h), ) + video_audio = video_clip.audio or resized_clip.audio or subclip.audio + if video_audio is not None: + composite = composite.set_audio(video_audio) output_path = output_dir / f"clip_{index:02d}.mp4" composite.write_videofile(