Ajustes do Gemini
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -2,7 +2,7 @@
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
__pycache__/
|
||||
/__pycache__/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
@@ -10,7 +10,7 @@ build/
|
||||
doc/
|
||||
videos/
|
||||
outputs/
|
||||
|
||||
.DS_STORE
|
||||
# Ignore virtual envs
|
||||
venv/
|
||||
env/
|
||||
|
||||
@@ -2,7 +2,6 @@ services:
|
||||
video-render:
|
||||
restart: unless-stopped
|
||||
build: .
|
||||
container_name: video-render
|
||||
environment:
|
||||
# - RABBITMQ_PASS=${RABBITMQ_PASS}
|
||||
- RABBITMQ_PASS=L@l321321321
|
||||
@@ -10,7 +9,7 @@ services:
|
||||
- RABBITMQ_PORT=32790
|
||||
# - GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||
- GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw
|
||||
- GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro}
|
||||
- GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash}
|
||||
# - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- OPENROUTER_API_KEY=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
|
||||
- OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free}
|
||||
|
||||
@@ -3,4 +3,5 @@ pillow==9.5.0
|
||||
numpy>=1.26.0
|
||||
requests
|
||||
pika
|
||||
faster-whisper==1.2.0
|
||||
faster-whisper==1.2.0
|
||||
google-genai
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -3,8 +3,10 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from google import genai
|
||||
from google.genai import types as genai_types
|
||||
import requests
|
||||
|
||||
from video_render.config import BASE_DIR, Settings
|
||||
@@ -12,7 +14,6 @@ from video_render.transcription import TranscriptionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
|
||||
OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
|
||||
@@ -31,6 +32,7 @@ class GeminiHighlighter:
|
||||
|
||||
self.prompt_template = prompt_path.read_text(encoding="utf-8")
|
||||
self.settings = settings
|
||||
self.client = genai.Client()
|
||||
|
||||
def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
|
||||
payload = {
|
||||
@@ -45,45 +47,13 @@ class GeminiHighlighter:
|
||||
],
|
||||
}
|
||||
|
||||
body = {
|
||||
"contents": [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"text": self.prompt_template},
|
||||
{"text": json.dumps(payload, ensure_ascii=False)},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
try:
|
||||
response = self._call_gemini(payload)
|
||||
except Exception as exc:
|
||||
logger.error("Gemini API request falhou: %s", exc)
|
||||
raise RuntimeError("Gemini API request falhou") from exc
|
||||
|
||||
if self.settings.gemini.temperature is not None:
|
||||
body["generationConfig"] = {
|
||||
"temperature": self.settings.gemini.temperature,
|
||||
}
|
||||
if self.settings.gemini.top_p is not None:
|
||||
body["generationConfig"]["topP"] = self.settings.gemini.top_p
|
||||
if self.settings.gemini.top_k is not None:
|
||||
body["generationConfig"]["topK"] = self.settings.gemini.top_k
|
||||
|
||||
url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
|
||||
params = {"key": self.settings.gemini.api_key}
|
||||
|
||||
response = requests.post(url, params=params, json=body, timeout=120)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
candidates = data.get("candidates") or []
|
||||
if not candidates:
|
||||
raise RuntimeError("Gemini nao retornou candidatos")
|
||||
|
||||
text_parts = candidates[0].get("content", {}).get("parts", [])
|
||||
if not text_parts:
|
||||
raise RuntimeError("Resposta do Gemini sem conteudo")
|
||||
|
||||
raw_text = text_parts[0].get("text")
|
||||
if not raw_text:
|
||||
raise RuntimeError("Resposta do Gemini sem texto")
|
||||
raw_text = self._extract_response_text(response)
|
||||
|
||||
parsed = self._extract_json(raw_text)
|
||||
highlights = parsed.get("highlights")
|
||||
@@ -91,6 +61,61 @@ class GeminiHighlighter:
|
||||
raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
|
||||
return highlights
|
||||
|
||||
def _call_gemini(self, payload: Dict[str, Any]) -> Any:
|
||||
contents = [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"text": self.prompt_template},
|
||||
{"text": json.dumps(payload, ensure_ascii=False)},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
request_kwargs: Dict[str, Any] = {
|
||||
"model": self.settings.gemini.model,
|
||||
"contents": contents,
|
||||
}
|
||||
|
||||
config = self._build_generation_config()
|
||||
if config is not None:
|
||||
request_kwargs["config"] = config
|
||||
|
||||
return self.client.models.generate_content(**request_kwargs)
|
||||
|
||||
def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]:
|
||||
config_kwargs: Dict[str, Any] = {}
|
||||
if self.settings.gemini.temperature is not None:
|
||||
config_kwargs["temperature"] = self.settings.gemini.temperature
|
||||
if self.settings.gemini.top_p is not None:
|
||||
config_kwargs["top_p"] = self.settings.gemini.top_p
|
||||
if self.settings.gemini.top_k is not None:
|
||||
config_kwargs["top_k"] = self.settings.gemini.top_k
|
||||
|
||||
if not config_kwargs:
|
||||
return None
|
||||
|
||||
return genai_types.GenerateContentConfig(**config_kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _extract_response_text(response: Any) -> str:
|
||||
text = getattr(response, "text", None)
|
||||
if text:
|
||||
return str(text).strip()
|
||||
|
||||
candidates = getattr(response, "candidates", None) or []
|
||||
for candidate in candidates:
|
||||
content = getattr(candidate, "content", None)
|
||||
if not content:
|
||||
continue
|
||||
parts = getattr(content, "parts", None) or []
|
||||
for part in parts:
|
||||
part_text = getattr(part, "text", None)
|
||||
if part_text:
|
||||
return str(part_text).strip()
|
||||
|
||||
raise RuntimeError("Resposta do Gemini sem texto")
|
||||
|
||||
@staticmethod
|
||||
def _extract_json(response_text: str) -> Dict:
|
||||
try:
|
||||
@@ -160,10 +185,6 @@ class OpenRouterCopywriter:
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
choices = data.get("choices") or []
|
||||
print("Data:")
|
||||
print(data)
|
||||
print("Choices:")
|
||||
print(choices)
|
||||
|
||||
if not choices:
|
||||
raise RuntimeError("OpenRouter nao retornou escolhas")
|
||||
|
||||
@@ -38,7 +38,7 @@ class MediaPreparer:
|
||||
existing_children = list(workspace_dir.iterdir())
|
||||
if existing_children:
|
||||
logger.info("Limpando workspace existente para %s", sanitized_name)
|
||||
remove_paths(existing_children)
|
||||
# remove_paths(existing_children)
|
||||
|
||||
destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
|
||||
working_video_path = workspace_dir / destination_name
|
||||
@@ -46,9 +46,9 @@ class MediaPreparer:
|
||||
logger.info("Cópia do vídeo criada em %s", working_video_path)
|
||||
|
||||
output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
|
||||
existing_outputs = list(output_dir.iterdir())
|
||||
if existing_outputs:
|
||||
remove_paths(existing_outputs)
|
||||
# existing_outputs = list(output_dir.iterdir())
|
||||
# if existing_outputs:
|
||||
# remove_paths(existing_outputs)
|
||||
|
||||
audio_path = workspace_dir / "audio.wav"
|
||||
extract_audio_to_wav(working_video_path, audio_path)
|
||||
|
||||
@@ -15,7 +15,6 @@ MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
|
||||
|
||||
class RabbitMQWorker:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
print(settings)
|
||||
self.settings = settings
|
||||
self._params = pika.ConnectionParameters(
|
||||
host=settings.rabbitmq.host,
|
||||
|
||||
@@ -101,7 +101,15 @@ class VideoPipeline:
|
||||
if not context.transcription:
|
||||
raise RuntimeError("Transcricao nao disponivel")
|
||||
|
||||
highlights_raw = self.highlighter.generate_highlights(context.transcription)
|
||||
try:
|
||||
highlights_raw = self.highlighter.generate_highlights(context.transcription)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Falha ao gerar destaques com Gemini; aplicando fallback padrao."
|
||||
)
|
||||
context.highlight_windows = [self._build_fallback_highlight(context)]
|
||||
return
|
||||
|
||||
windows: List[HighlightWindow] = []
|
||||
|
||||
for item in highlights_raw:
|
||||
@@ -120,18 +128,7 @@ class VideoPipeline:
|
||||
windows.append(HighlightWindow(start=start, end=end, summary=summary))
|
||||
|
||||
if not windows:
|
||||
last_end = (
|
||||
context.transcription.segments[-1].end
|
||||
if context.transcription.segments
|
||||
else 0
|
||||
)
|
||||
windows.append(
|
||||
HighlightWindow(
|
||||
start=0.0,
|
||||
end=max(last_end, 10.0),
|
||||
summary="Sem destaque identificado; fallback automatico.",
|
||||
)
|
||||
)
|
||||
windows.append(self._build_fallback_highlight(context))
|
||||
|
||||
context.highlight_windows = windows
|
||||
|
||||
@@ -148,6 +145,20 @@ class VideoPipeline:
|
||||
for window, title in zip(context.highlight_windows, titles):
|
||||
window.title = title.strip()
|
||||
|
||||
def _build_fallback_highlight(self, context: PipelineContext) -> HighlightWindow:
|
||||
if not context.transcription:
|
||||
raise RuntimeError("Transcricao nao disponivel para criar fallback")
|
||||
|
||||
last_end = (
|
||||
context.transcription.segments[-1].end
|
||||
if context.transcription.segments
|
||||
else 0.0
|
||||
)
|
||||
return HighlightWindow(
|
||||
start=0.0,
|
||||
end=max(last_end, 10.0),
|
||||
summary="Sem destaque identificado; fallback automatico.",
|
||||
)
|
||||
|
||||
def _render_clips(self, context: PipelineContext) -> None:
|
||||
if not context.workspace or not context.highlight_windows or not context.transcription:
|
||||
|
||||
@@ -267,6 +267,7 @@ class VideoRenderer:
|
||||
color=self.settings.rendering.base_color,
|
||||
method="caption",
|
||||
size=(frame_w - 160, top_h - 40),
|
||||
align="center",
|
||||
)
|
||||
.with_duration(duration)
|
||||
)
|
||||
@@ -279,8 +280,18 @@ class VideoRenderer:
|
||||
|
||||
caption_clips = []
|
||||
caption_resources: List[ImageClip] = []
|
||||
margin = 20
|
||||
caption_y = max(0, video_y - self.captions.canvas_height - margin)
|
||||
caption_area_top = frame_h - bottom_h
|
||||
caption_area_height = bottom_h
|
||||
caption_margin = 20
|
||||
raw_caption_y = caption_area_top + (caption_area_height - self.captions.canvas_height) // 2
|
||||
min_caption_y = caption_area_top + caption_margin
|
||||
max_caption_y = (
|
||||
caption_area_top + caption_area_height - self.captions.canvas_height - caption_margin
|
||||
)
|
||||
if max_caption_y < min_caption_y:
|
||||
caption_y = min_caption_y
|
||||
else:
|
||||
caption_y = min(max(raw_caption_y, min_caption_y), max_caption_y)
|
||||
|
||||
for clip_set in caption_sets:
|
||||
base_positioned = clip_set.base.with_position(("center", caption_y))
|
||||
@@ -300,6 +311,7 @@ class VideoRenderer:
|
||||
font_size=self.settings.rendering.subtitle_font_size,
|
||||
color=self.settings.rendering.base_color,
|
||||
method="caption",
|
||||
align="center",
|
||||
size=(frame_w - 160, max(40, self.captions.canvas_height)),
|
||||
)
|
||||
.with_duration(duration)
|
||||
@@ -310,6 +322,9 @@ class VideoRenderer:
|
||||
[background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
|
||||
size=(frame_w, frame_h),
|
||||
)
|
||||
video_audio = video_clip.audio or resized_clip.audio or subclip.audio
|
||||
if video_audio is not None:
|
||||
composite = composite.set_audio(video_audio)
|
||||
|
||||
output_path = output_dir / f"clip_{index:02d}.mp4"
|
||||
composite.write_videofile(
|
||||
|
||||
Reference in New Issue
Block a user