#v2 - Inicia testes da v2

- Adiciona rastreamento de objetos
- Facial detection
- Legenda interativa
- Cortes mais precisos
- Refinamento do Prompt
This commit is contained in:
LeoMortari
2025-11-12 11:38:09 -03:00
parent 87c6a5e27c
commit c5d3e83a5f
15 changed files with 1739 additions and 313 deletions

View File

@@ -2,11 +2,11 @@ from __future__ import annotations
import json
import logging
import time
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Dict, List
from google import genai
from google.genai import types as genai_types
import requests
from video_render.config import BASE_DIR, Settings
@@ -14,27 +14,24 @@ from video_render.transcription import TranscriptionResult
logger = logging.getLogger(__name__)
OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
OPENROUTER_ENDPOINT = os.environ.get("OPENROUTER_API_URL", "https://openrouter.ai/api/v1/chat/completions")
class GeminiHighlighter:
class OpenRouterCopywriter:
def __init__(self, settings: Settings) -> None:
if not settings.gemini.api_key:
raise RuntimeError("GEMINI_API_KEY nao foi definido")
prompt_path = Path(settings.gemini.prompt_path)
if not settings.openrouter.api_key:
raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
self.settings = settings
prompt_path = Path(settings.openrouter.prompt_path)
if not prompt_path.is_absolute():
prompt_path = BASE_DIR / prompt_path
if not prompt_path.exists():
raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}")
self.prompt_template = prompt_path.read_text(encoding="utf-8")
self.settings = settings
self.client = genai.Client()
raise FileNotFoundError(f"Prompt nao encontrado: {prompt_path}")
self.highlights_prompt_template = prompt_path.read_text(encoding="utf-8")
def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
"""Generate video highlights using OpenRouter GPT-OSS with retry logic."""
payload = {
"transcript": transcription.full_text,
"segments": [
@@ -47,93 +44,139 @@ class GeminiHighlighter:
],
}
try:
response = self._call_gemini(payload)
except Exception as exc:
logger.error("Gemini API request falhou: %s", exc)
raise RuntimeError("Gemini API request falhou") from exc
raw_text = self._extract_response_text(response)
parsed = self._extract_json(raw_text)
highlights = parsed.get("highlights")
if not isinstance(highlights, list):
raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
return highlights
def _call_gemini(self, payload: Dict[str, Any]) -> Any:
contents = [
{
"role": "user",
"parts": [
{"text": self.prompt_template},
{"text": json.dumps(payload, ensure_ascii=False)},
],
}
]
request_kwargs: Dict[str, Any] = {
"model": self.settings.gemini.model,
"contents": contents,
body = {
"model": self.settings.openrouter.model,
"temperature": self.settings.openrouter.temperature,
"messages": [
{"role": "system", "content": self.highlights_prompt_template},
{
"role": "user",
"content": json.dumps(payload, ensure_ascii=False),
},
],
}
config = self._build_generation_config()
if config is not None:
request_kwargs["config"] = config
headers = {
"Authorization": f"Bearer {self.settings.openrouter.api_key}",
"Content-Type": "application/json",
"X-Title": "Video Render - Highlights Detection"
}
return self.client.models.generate_content(**request_kwargs)
logger.info(f"Calling OpenRouter with model: {self.settings.openrouter.model}")
logger.debug(f"Request payload keys: transcript_length={len(payload['transcript'])}, segments_count={len(payload['segments'])}")
def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]:
config_kwargs: Dict[str, Any] = {}
if self.settings.gemini.temperature is not None:
config_kwargs["temperature"] = self.settings.gemini.temperature
if self.settings.gemini.top_p is not None:
config_kwargs["top_p"] = self.settings.gemini.top_p
if self.settings.gemini.top_k is not None:
config_kwargs["top_k"] = self.settings.gemini.top_k
# Retry configuration for rate limits (especially free tier)
max_retries = 5
base_delay = 5 # Start with 5s delay
if not config_kwargs:
return None
for attempt in range(max_retries):
try:
response = requests.post(
url=OPENROUTER_ENDPOINT,
data=json.dumps(body),
headers=headers,
timeout=120,
)
response.raise_for_status()
data = response.json()
break
return genai_types.GenerateContentConfig(**config_kwargs)
except requests.exceptions.HTTPError as exc:
if exc.response.status_code == 429:
if attempt < max_retries - 1:
# Exponential backoff: 5s, 10s, 20s, 40s, 80s
delay = base_delay * (2 ** attempt)
logger.warning(f"Rate limit atingido (429). Aguardando {delay}s antes de tentar novamente (tentativa {attempt + 1}/{max_retries})")
time.sleep(delay)
continue
else:
logger.error("Rate limit atingido apos todas as tentativas")
logger.error("Solucao: Use um modelo pago ou adicione creditos na OpenRouter")
raise RuntimeError("OpenRouter rate limit excedido") from exc
else:
logger.error(f"OpenRouter API request falhou com status {exc.response.status_code}: {exc}")
raise RuntimeError("OpenRouter API request falhou") from exc
@staticmethod
def _extract_response_text(response: Any) -> str:
text = getattr(response, "text", None)
if text:
return str(text).strip()
except Exception as exc:
logger.error("OpenRouter API request falhou: %s", exc)
raise RuntimeError("OpenRouter API request falhou") from exc
candidates = getattr(response, "candidates", None) or []
for candidate in candidates:
content = getattr(candidate, "content", None)
if not content:
# Debug: log response structure
logger.info(f"OpenRouter response keys: {list(data.keys())}")
if "error" in data:
logger.error(f"OpenRouter API error: {data.get('error')}")
raise RuntimeError(f"OpenRouter API error: {data.get('error')}")
choices = data.get("choices") or []
if not choices:
logger.error(f"OpenRouter response completa: {json.dumps(data, indent=2)}")
raise RuntimeError("OpenRouter nao retornou escolhas")
message = choices[0].get("message", {}).get("content")
if not message:
raise RuntimeError("Resposta do OpenRouter sem conteudo")
parsed = self._extract_json(message)
highlights = parsed.get("highlights")
if not isinstance(highlights, list):
raise ValueError("Resposta do OpenRouter invalida: campo 'highlights' ausente")
valid_highlights = []
for highlight in highlights:
try:
start = float(highlight.get("start", 0))
end = float(highlight.get("end", 0))
summary = str(highlight.get("summary", "")).strip()
if start < 0 or end < 0:
logger.warning(f"Highlight ignorado: timestamps negativos (start={start}, end={end})")
continue
if end <= start:
logger.warning(f"Highlight ignorado: end <= start (start={start}, end={end})")
continue
duration = end - start
if duration < 45:
logger.warning(f"Highlight ignorado: muito curto ({duration}s, minimo 45s)")
continue
if duration > 120:
logger.warning(f"Highlight ignorado: muito longo ({duration}s, maximo 120s)")
continue
if not summary:
logger.warning(f"Highlight ignorado: summary vazio")
continue
valid_highlights.append({
"start": start,
"end": end,
"summary": summary
})
except (TypeError, ValueError) as e:
logger.warning(f"Highlight invalido ignorado: {highlight} - {e}")
continue
parts = getattr(content, "parts", None) or []
for part in parts:
part_text = getattr(part, "text", None)
if part_text:
return str(part_text).strip()
raise RuntimeError("Resposta do Gemini sem texto")
if not valid_highlights:
logger.warning("Nenhum highlight valido retornado pelo OpenRouter")
total_duration = 75.0
if transcription.segments:
total_duration = max(seg.end for seg in transcription.segments)
@staticmethod
def _extract_json(response_text: str) -> Dict:
try:
return json.loads(response_text)
except json.JSONDecodeError:
start = response_text.find("{")
end = response_text.rfind("}")
if start == -1 or end == -1:
raise
subset = response_text[start : end + 1]
return json.loads(subset)
fallback_end = min(75.0, total_duration)
if fallback_end < 60.0:
fallback_end = min(60.0, total_duration)
return [{
"start": 0.0,
"end": fallback_end,
"summary": "Trecho inicial do video (fallback automatico)"
}]
class OpenRouterCopywriter:
def __init__(self, settings: Settings) -> None:
if not settings.openrouter.api_key:
raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
self.settings = settings
logger.info(f"OpenRouter retornou {len(valid_highlights)} highlights validos")
return valid_highlights
def generate_titles(self, highlights: List[Dict]) -> List[str]:
if not highlights: