Ajustes do Gemini

This commit is contained in:
LeoMortari
2025-10-27 14:08:10 -03:00
parent 2692cc4dfd
commit b5a27fa938
10 changed files with 115 additions and 69 deletions

4
.gitignore vendored
View File

@@ -2,7 +2,7 @@
*.pyc *.pyc
*.pyo *.pyo
*.pyd *.pyd
__pycache__/ /__pycache__/
*.egg-info/ *.egg-info/
.eggs/ .eggs/
dist/ dist/
@@ -10,7 +10,7 @@ build/
doc/ doc/
videos/ videos/
outputs/ outputs/
.DS_STORE
# Ignore virtual envs # Ignore virtual envs
venv/ venv/
env/ env/

View File

@@ -2,7 +2,6 @@ services:
video-render: video-render:
restart: unless-stopped restart: unless-stopped
build: . build: .
container_name: video-render
environment: environment:
# - RABBITMQ_PASS=${RABBITMQ_PASS} # - RABBITMQ_PASS=${RABBITMQ_PASS}
- RABBITMQ_PASS=L@l321321321 - RABBITMQ_PASS=L@l321321321
@@ -10,7 +9,7 @@ services:
- RABBITMQ_PORT=32790 - RABBITMQ_PORT=32790
# - GEMINI_API_KEY=${GEMINI_API_KEY} # - GEMINI_API_KEY=${GEMINI_API_KEY}
- GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw - GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw
- GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash}
# - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
- OPENROUTER_API_KEY=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 - OPENROUTER_API_KEY=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
- OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free}

View File

@@ -4,3 +4,4 @@ numpy>=1.26.0
requests requests
pika pika
faster-whisper==1.2.0 faster-whisper==1.2.0
google-genai

View File

@@ -3,8 +3,10 @@ from __future__ import annotations
import json import json
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Dict, List from typing import Any, Dict, List, Optional
from google import genai
from google.genai import types as genai_types
import requests import requests
from video_render.config import BASE_DIR, Settings from video_render.config import BASE_DIR, Settings
@@ -12,7 +14,6 @@ from video_render.transcription import TranscriptionResult
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions" OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
@@ -31,6 +32,7 @@ class GeminiHighlighter:
self.prompt_template = prompt_path.read_text(encoding="utf-8") self.prompt_template = prompt_path.read_text(encoding="utf-8")
self.settings = settings self.settings = settings
self.client = genai.Client()
def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]: def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
payload = { payload = {
@@ -45,45 +47,13 @@ class GeminiHighlighter:
], ],
} }
body = { try:
"contents": [ response = self._call_gemini(payload)
{ except Exception as exc:
"role": "user", logger.error("Gemini API request falhou: %s", exc)
"parts": [ raise RuntimeError("Gemini API request falhou") from exc
{"text": self.prompt_template},
{"text": json.dumps(payload, ensure_ascii=False)},
],
}
]
}
if self.settings.gemini.temperature is not None: raw_text = self._extract_response_text(response)
body["generationConfig"] = {
"temperature": self.settings.gemini.temperature,
}
if self.settings.gemini.top_p is not None:
body["generationConfig"]["topP"] = self.settings.gemini.top_p
if self.settings.gemini.top_k is not None:
body["generationConfig"]["topK"] = self.settings.gemini.top_k
url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
params = {"key": self.settings.gemini.api_key}
response = requests.post(url, params=params, json=body, timeout=120)
response.raise_for_status()
data = response.json()
candidates = data.get("candidates") or []
if not candidates:
raise RuntimeError("Gemini nao retornou candidatos")
text_parts = candidates[0].get("content", {}).get("parts", [])
if not text_parts:
raise RuntimeError("Resposta do Gemini sem conteudo")
raw_text = text_parts[0].get("text")
if not raw_text:
raise RuntimeError("Resposta do Gemini sem texto")
parsed = self._extract_json(raw_text) parsed = self._extract_json(raw_text)
highlights = parsed.get("highlights") highlights = parsed.get("highlights")
@@ -91,6 +61,61 @@ class GeminiHighlighter:
raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente") raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
return highlights return highlights
def _call_gemini(self, payload: Dict[str, Any]) -> Any:
contents = [
{
"role": "user",
"parts": [
{"text": self.prompt_template},
{"text": json.dumps(payload, ensure_ascii=False)},
],
}
]
request_kwargs: Dict[str, Any] = {
"model": self.settings.gemini.model,
"contents": contents,
}
config = self._build_generation_config()
if config is not None:
request_kwargs["config"] = config
return self.client.models.generate_content(**request_kwargs)
def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]:
config_kwargs: Dict[str, Any] = {}
if self.settings.gemini.temperature is not None:
config_kwargs["temperature"] = self.settings.gemini.temperature
if self.settings.gemini.top_p is not None:
config_kwargs["top_p"] = self.settings.gemini.top_p
if self.settings.gemini.top_k is not None:
config_kwargs["top_k"] = self.settings.gemini.top_k
if not config_kwargs:
return None
return genai_types.GenerateContentConfig(**config_kwargs)
@staticmethod
def _extract_response_text(response: Any) -> str:
text = getattr(response, "text", None)
if text:
return str(text).strip()
candidates = getattr(response, "candidates", None) or []
for candidate in candidates:
content = getattr(candidate, "content", None)
if not content:
continue
parts = getattr(content, "parts", None) or []
for part in parts:
part_text = getattr(part, "text", None)
if part_text:
return str(part_text).strip()
raise RuntimeError("Resposta do Gemini sem texto")
@staticmethod @staticmethod
def _extract_json(response_text: str) -> Dict: def _extract_json(response_text: str) -> Dict:
try: try:
@@ -160,10 +185,6 @@ class OpenRouterCopywriter:
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
choices = data.get("choices") or [] choices = data.get("choices") or []
print("Data:")
print(data)
print("Choices:")
print(choices)
if not choices: if not choices:
raise RuntimeError("OpenRouter nao retornou escolhas") raise RuntimeError("OpenRouter nao retornou escolhas")

View File

@@ -38,7 +38,7 @@ class MediaPreparer:
existing_children = list(workspace_dir.iterdir()) existing_children = list(workspace_dir.iterdir())
if existing_children: if existing_children:
logger.info("Limpando workspace existente para %s", sanitized_name) logger.info("Limpando workspace existente para %s", sanitized_name)
remove_paths(existing_children) # remove_paths(existing_children)
destination_name = f"{sanitized_name}{source_path.suffix.lower()}" destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
working_video_path = workspace_dir / destination_name working_video_path = workspace_dir / destination_name
@@ -46,9 +46,9 @@ class MediaPreparer:
logger.info("Cópia do vídeo criada em %s", working_video_path) logger.info("Cópia do vídeo criada em %s", working_video_path)
output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name) output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
existing_outputs = list(output_dir.iterdir()) # existing_outputs = list(output_dir.iterdir())
if existing_outputs: # if existing_outputs:
remove_paths(existing_outputs) # remove_paths(existing_outputs)
audio_path = workspace_dir / "audio.wav" audio_path = workspace_dir / "audio.wav"
extract_audio_to_wav(working_video_path, audio_path) extract_audio_to_wav(working_video_path, audio_path)

View File

@@ -15,7 +15,6 @@ MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
class RabbitMQWorker: class RabbitMQWorker:
def __init__(self, settings: Settings) -> None: def __init__(self, settings: Settings) -> None:
print(settings)
self.settings = settings self.settings = settings
self._params = pika.ConnectionParameters( self._params = pika.ConnectionParameters(
host=settings.rabbitmq.host, host=settings.rabbitmq.host,

View File

@@ -101,7 +101,15 @@ class VideoPipeline:
if not context.transcription: if not context.transcription:
raise RuntimeError("Transcricao nao disponivel") raise RuntimeError("Transcricao nao disponivel")
highlights_raw = self.highlighter.generate_highlights(context.transcription) try:
highlights_raw = self.highlighter.generate_highlights(context.transcription)
except Exception:
logger.exception(
"Falha ao gerar destaques com Gemini; aplicando fallback padrao."
)
context.highlight_windows = [self._build_fallback_highlight(context)]
return
windows: List[HighlightWindow] = [] windows: List[HighlightWindow] = []
for item in highlights_raw: for item in highlights_raw:
@@ -120,18 +128,7 @@ class VideoPipeline:
windows.append(HighlightWindow(start=start, end=end, summary=summary)) windows.append(HighlightWindow(start=start, end=end, summary=summary))
if not windows: if not windows:
last_end = ( windows.append(self._build_fallback_highlight(context))
context.transcription.segments[-1].end
if context.transcription.segments
else 0
)
windows.append(
HighlightWindow(
start=0.0,
end=max(last_end, 10.0),
summary="Sem destaque identificado; fallback automatico.",
)
)
context.highlight_windows = windows context.highlight_windows = windows
@@ -148,6 +145,20 @@ class VideoPipeline:
for window, title in zip(context.highlight_windows, titles): for window, title in zip(context.highlight_windows, titles):
window.title = title.strip() window.title = title.strip()
def _build_fallback_highlight(self, context: PipelineContext) -> HighlightWindow:
if not context.transcription:
raise RuntimeError("Transcricao nao disponivel para criar fallback")
last_end = (
context.transcription.segments[-1].end
if context.transcription.segments
else 0.0
)
return HighlightWindow(
start=0.0,
end=max(last_end, 10.0),
summary="Sem destaque identificado; fallback automatico.",
)
def _render_clips(self, context: PipelineContext) -> None: def _render_clips(self, context: PipelineContext) -> None:
if not context.workspace or not context.highlight_windows or not context.transcription: if not context.workspace or not context.highlight_windows or not context.transcription:

View File

@@ -267,6 +267,7 @@ class VideoRenderer:
color=self.settings.rendering.base_color, color=self.settings.rendering.base_color,
method="caption", method="caption",
size=(frame_w - 160, top_h - 40), size=(frame_w - 160, top_h - 40),
align="center",
) )
.with_duration(duration) .with_duration(duration)
) )
@@ -279,8 +280,18 @@ class VideoRenderer:
caption_clips = [] caption_clips = []
caption_resources: List[ImageClip] = [] caption_resources: List[ImageClip] = []
margin = 20 caption_area_top = frame_h - bottom_h
caption_y = max(0, video_y - self.captions.canvas_height - margin) caption_area_height = bottom_h
caption_margin = 20
raw_caption_y = caption_area_top + (caption_area_height - self.captions.canvas_height) // 2
min_caption_y = caption_area_top + caption_margin
max_caption_y = (
caption_area_top + caption_area_height - self.captions.canvas_height - caption_margin
)
if max_caption_y < min_caption_y:
caption_y = min_caption_y
else:
caption_y = min(max(raw_caption_y, min_caption_y), max_caption_y)
for clip_set in caption_sets: for clip_set in caption_sets:
base_positioned = clip_set.base.with_position(("center", caption_y)) base_positioned = clip_set.base.with_position(("center", caption_y))
@@ -300,6 +311,7 @@ class VideoRenderer:
font_size=self.settings.rendering.subtitle_font_size, font_size=self.settings.rendering.subtitle_font_size,
color=self.settings.rendering.base_color, color=self.settings.rendering.base_color,
method="caption", method="caption",
align="center",
size=(frame_w - 160, max(40, self.captions.canvas_height)), size=(frame_w - 160, max(40, self.captions.canvas_height)),
) )
.with_duration(duration) .with_duration(duration)
@@ -310,6 +322,9 @@ class VideoRenderer:
[background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
size=(frame_w, frame_h), size=(frame_w, frame_h),
) )
video_audio = video_clip.audio or resized_clip.audio or subclip.audio
if video_audio is not None:
composite = composite.set_audio(video_audio)
output_path = output_dir / f"clip_{index:02d}.mp4" output_path = output_dir / f"clip_{index:02d}.mp4"
composite.write_videofile( composite.write_videofile(