Cria novos components
This commit is contained in:
4
video_render/__init__.py
Normal file
4
video_render/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""
|
||||
Core package for the revamped video rendering pipeline.
|
||||
"""
|
||||
|
||||
BIN
video_render/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/config.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/config.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/ffmpeg.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/ffmpeg.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/llm.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/llm.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/logging_utils.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/logging_utils.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/media.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/media.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/messaging.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/messaging.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/pipeline.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/pipeline.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/rendering.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/rendering.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/transcription.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/transcription.cpython-39.pyc
Normal file
Binary file not shown.
BIN
video_render/__pycache__/utils.cpython-39.pyc
Normal file
BIN
video_render/__pycache__/utils.cpython-39.pyc
Normal file
Binary file not shown.
103
video_render/config.py
Normal file
103
video_render/config.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
VIDEOS_ROOT = BASE_DIR / "videos"
|
||||
OUTPUTS_ROOT = BASE_DIR / "outputs"
|
||||
TEMP_ROOT = BASE_DIR / "temp"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RabbitMQSettings:
|
||||
host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq")
|
||||
port: int = int(os.environ.get("RABBITMQ_PORT", 5672))
|
||||
user: str = os.environ.get("RABBITMQ_USER", "admin")
|
||||
password: str = os.environ.get("RABBITMQ_PASS", "")
|
||||
consume_queue: str = os.environ.get("RABBITMQ_QUEUE", "to-render")
|
||||
publish_queue: str = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload")
|
||||
prefetch_count: int = int(os.environ.get("RABBITMQ_PREFETCH", 1))
|
||||
heartbeat: int = int(os.environ.get("RABBITMQ_HEARTBEAT", 60))
|
||||
blocked_timeout: int = int(os.environ.get("RABBITMQ_BLOCKED_TIMEOUT", 300))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GeminiSettings:
|
||||
api_key: str = os.environ.get("GEMINI_API_KEY", "")
|
||||
model: str = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro-latest")
|
||||
safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS")
|
||||
temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2))
|
||||
top_k: int | None = (
|
||||
int(os.environ["GEMINI_TOP_K"]) if os.environ.get("GEMINI_TOP_K") else None
|
||||
)
|
||||
top_p: float | None = (
|
||||
float(os.environ["GEMINI_TOP_P"]) if os.environ.get("GEMINI_TOP_P") else None
|
||||
)
|
||||
prompt_path: str = os.environ.get("GEMINI_PROMPT_PATH", "prompts/generate.txt")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OpenRouterSettings:
|
||||
api_key: str = os.environ.get("OPENROUTER_API_KEY", "")
|
||||
model: str = os.environ.get(
|
||||
"OPENROUTER_MODEL", "anthropic/claude-3-haiku:beta"
|
||||
)
|
||||
temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6))
|
||||
max_output_tokens: int = int(os.environ.get("OPENROUTER_MAX_OUTPUT_TOKENS", 256))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WhisperSettings:
|
||||
model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium")
|
||||
device: str | None = os.environ.get("FASTER_WHISPER_DEVICE")
|
||||
compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE")
|
||||
download_root: Path = Path(
|
||||
os.environ.get("FASTER_WHISPER_DOWNLOAD_ROOT", str(BASE_DIR / ".whisper"))
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RenderingSettings:
|
||||
frame_width: int = int(os.environ.get("RENDER_WIDTH", 1080))
|
||||
frame_height: int = int(os.environ.get("RENDER_HEIGHT", 1920))
|
||||
fps: int = int(os.environ.get("RENDER_FPS", 30))
|
||||
video_codec: str = os.environ.get("RENDER_CODEC", "libx264")
|
||||
audio_codec: str = os.environ.get("RENDER_AUDIO_CODEC", "aac")
|
||||
bitrate: str = os.environ.get("RENDER_BITRATE", "5000k")
|
||||
preset: str = os.environ.get("RENDER_PRESET", "faster")
|
||||
highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#FFD200")
|
||||
base_color: str = os.environ.get("SUBTITLE_BASE_COLOR", "#FFFFFF")
|
||||
font_path: Path = Path(os.environ.get("RENDER_FONT_PATH", "./Montserrat.ttf"))
|
||||
title_font_size: int = int(os.environ.get("RENDER_TITLE_FONT_SIZE", 110))
|
||||
subtitle_font_size: int = int(os.environ.get("RENDER_SUBTITLE_FONT_SIZE", 64))
|
||||
caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 3))
|
||||
caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 4))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Settings:
|
||||
rabbitmq: RabbitMQSettings = RabbitMQSettings()
|
||||
gemini: GeminiSettings = GeminiSettings()
|
||||
openrouter: OpenRouterSettings = OpenRouterSettings()
|
||||
whisper: WhisperSettings = WhisperSettings()
|
||||
rendering: RenderingSettings = RenderingSettings()
|
||||
|
||||
videos_dir: Path = VIDEOS_ROOT
|
||||
outputs_dir: Path = OUTPUTS_ROOT
|
||||
temp_dir: Path = TEMP_ROOT
|
||||
|
||||
|
||||
def load_settings() -> Settings:
|
||||
settings = Settings()
|
||||
|
||||
if not settings.rabbitmq.password:
|
||||
raise RuntimeError("RABBITMQ_PASS must be provided")
|
||||
|
||||
settings.videos_dir.mkdir(parents=True, exist_ok=True)
|
||||
settings.outputs_dir.mkdir(parents=True, exist_ok=True)
|
||||
settings.temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return settings
|
||||
54
video_render/ffmpeg.py
Normal file
54
video_render/ffmpeg.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shlex
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Sequence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _run_ffmpeg(args: Sequence[str]) -> None:
|
||||
cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", *args]
|
||||
logger.debug("Executando ffmpeg: %s", " ".join(shlex.quote(part) for part in cmd))
|
||||
completed = subprocess.run(cmd, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg falhou com exit code {completed.returncode}")
|
||||
|
||||
|
||||
def extract_audio_to_wav(input_video: Path, output_wav: Path) -> Path:
|
||||
_run_ffmpeg(
|
||||
[
|
||||
"-y",
|
||||
"-i",
|
||||
str(input_video),
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-vn",
|
||||
str(output_wav),
|
||||
]
|
||||
)
|
||||
return output_wav
|
||||
|
||||
|
||||
def create_video_segment(input_video: Path, start: float, end: float, output_path: Path) -> Path:
|
||||
duration = max(0.01, end - start)
|
||||
_run_ffmpeg(
|
||||
[
|
||||
"-y",
|
||||
"-i",
|
||||
str(input_video),
|
||||
"-ss",
|
||||
f"{start:.3f}",
|
||||
"-t",
|
||||
f"{duration:.3f}",
|
||||
"-c",
|
||||
"copy",
|
||||
str(output_path),
|
||||
]
|
||||
)
|
||||
return output_path
|
||||
|
||||
187
video_render/llm.py
Normal file
187
video_render/llm.py
Normal file
@@ -0,0 +1,187 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from .config import BASE_DIR, Settings
|
||||
from .transcription import TranscriptionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
|
||||
OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
|
||||
class GeminiHighlighter:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
if not settings.gemini.api_key:
|
||||
raise RuntimeError("GEMINI_API_KEY nao foi definido")
|
||||
|
||||
prompt_path = Path(settings.gemini.prompt_path)
|
||||
|
||||
if not prompt_path.is_absolute():
|
||||
prompt_path = BASE_DIR / prompt_path
|
||||
|
||||
if not prompt_path.exists():
|
||||
raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}")
|
||||
|
||||
self.prompt_template = prompt_path.read_text(encoding="utf-8")
|
||||
self.settings = settings
|
||||
|
||||
def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]:
|
||||
payload = {
|
||||
"transcript": transcription.full_text,
|
||||
"segments": [
|
||||
{
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
"text": segment.text,
|
||||
}
|
||||
for segment in transcription.segments
|
||||
],
|
||||
}
|
||||
|
||||
body = {
|
||||
"contents": [
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"text": self.prompt_template},
|
||||
{"text": json.dumps(payload, ensure_ascii=False)},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
if self.settings.gemini.temperature is not None:
|
||||
body["generationConfig"] = {
|
||||
"temperature": self.settings.gemini.temperature,
|
||||
}
|
||||
if self.settings.gemini.top_p is not None:
|
||||
body["generationConfig"]["topP"] = self.settings.gemini.top_p
|
||||
if self.settings.gemini.top_k is not None:
|
||||
body["generationConfig"]["topK"] = self.settings.gemini.top_k
|
||||
|
||||
url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model)
|
||||
params = {"key": self.settings.gemini.api_key}
|
||||
|
||||
response = requests.post(url, params=params, json=body, timeout=120)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
candidates = data.get("candidates") or []
|
||||
if not candidates:
|
||||
raise RuntimeError("Gemini nao retornou candidatos")
|
||||
|
||||
text_parts = candidates[0].get("content", {}).get("parts", [])
|
||||
if not text_parts:
|
||||
raise RuntimeError("Resposta do Gemini sem conteudo")
|
||||
|
||||
raw_text = text_parts[0].get("text")
|
||||
if not raw_text:
|
||||
raise RuntimeError("Resposta do Gemini sem texto")
|
||||
|
||||
parsed = self._extract_json(raw_text)
|
||||
highlights = parsed.get("highlights")
|
||||
if not isinstance(highlights, list):
|
||||
raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente")
|
||||
return highlights
|
||||
|
||||
@staticmethod
|
||||
def _extract_json(response_text: str) -> Dict:
|
||||
try:
|
||||
return json.loads(response_text)
|
||||
except json.JSONDecodeError:
|
||||
start = response_text.find("{")
|
||||
end = response_text.rfind("}")
|
||||
if start == -1 or end == -1:
|
||||
raise
|
||||
subset = response_text[start : end + 1]
|
||||
return json.loads(subset)
|
||||
|
||||
|
||||
class OpenRouterCopywriter:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
if not settings.openrouter.api_key:
|
||||
raise RuntimeError("OPENROUTER_API_KEY nao foi definido")
|
||||
self.settings = settings
|
||||
|
||||
def generate_titles(self, highlights: List[Dict]) -> List[str]:
|
||||
if not highlights:
|
||||
return []
|
||||
|
||||
prompt = (
|
||||
"Voce e um copywriter especializado em titulos curtos e virais para reels.\n"
|
||||
"Recebera uma lista de trechos destacados de um video com resumo e tempo.\n"
|
||||
"Produza um titulo envolvente (ate 60 caracteres) para cada item.\n"
|
||||
"Responda apenas em JSON com a seguinte estrutura:\n"
|
||||
'{"titles": ["titulo 1", "titulo 2"]}\n'
|
||||
"Titulos devem ser em portugues, usar verbos fortes e refletir o resumo."
|
||||
)
|
||||
|
||||
user_payload = {
|
||||
"highlights": [
|
||||
{
|
||||
"start": item.get("start"),
|
||||
"end": item.get("end"),
|
||||
"summary": item.get("summary"),
|
||||
}
|
||||
for item in highlights
|
||||
]
|
||||
}
|
||||
|
||||
body = {
|
||||
"model": self.settings.openrouter.model,
|
||||
"temperature": self.settings.openrouter.temperature,
|
||||
"max_tokens": self.settings.openrouter.max_output_tokens,
|
||||
"messages": [
|
||||
{"role": "system", "content": prompt},
|
||||
{
|
||||
"role": "user",
|
||||
"content": json.dumps(user_payload, ensure_ascii=False),
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.settings.openrouter.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://localhost",
|
||||
"X-Title": "video-render-pipeline",
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
raise RuntimeError("OpenRouter nao retornou escolhas")
|
||||
|
||||
message = choices[0].get("message", {}).get("content")
|
||||
if not message:
|
||||
raise RuntimeError("Resposta do OpenRouter sem conteudo")
|
||||
|
||||
parsed = self._extract_json(message)
|
||||
titles = parsed.get("titles")
|
||||
if not isinstance(titles, list):
|
||||
raise ValueError("Resposta do OpenRouter invalida: campo 'titles'")
|
||||
return [str(title) for title in titles]
|
||||
|
||||
@staticmethod
|
||||
def _extract_json(response_text: str) -> Dict:
|
||||
try:
|
||||
return json.loads(response_text)
|
||||
except json.JSONDecodeError:
|
||||
start = response_text.find("{")
|
||||
end = response_text.rfind("}")
|
||||
if start == -1 or end == -1:
|
||||
raise
|
||||
subset = response_text[start : end + 1]
|
||||
return json.loads(subset)
|
||||
13
video_render/logging_utils.py
Normal file
13
video_render/logging_utils.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
def setup_logging() -> None:
|
||||
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
64
video_render/media.py
Normal file
64
video_render/media.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Settings
|
||||
from .ffmpeg import extract_audio_to_wav
|
||||
from .utils import ensure_workspace, remove_paths, sanitize_filename
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VideoWorkspace:
|
||||
original_filename: str
|
||||
sanitized_name: str
|
||||
workspace_dir: Path
|
||||
output_dir: Path
|
||||
source_path: Path
|
||||
working_video_path: Path
|
||||
audio_path: Path
|
||||
|
||||
|
||||
class MediaPreparer:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
|
||||
def prepare(self, filename: str) -> VideoWorkspace:
|
||||
source_path = self.settings.videos_dir / filename
|
||||
if not source_path.exists():
|
||||
raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {source_path}")
|
||||
|
||||
sanitized_name = sanitize_filename(Path(filename).stem)
|
||||
workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name)
|
||||
|
||||
existing_children = list(workspace_dir.iterdir())
|
||||
if existing_children:
|
||||
logger.info("Limpando workspace existente para %s", sanitized_name)
|
||||
remove_paths(existing_children)
|
||||
|
||||
destination_name = f"{sanitized_name}{source_path.suffix.lower()}"
|
||||
working_video_path = workspace_dir / destination_name
|
||||
shutil.copy2(source_path, working_video_path)
|
||||
logger.info("Cópia do vídeo criada em %s", working_video_path)
|
||||
|
||||
output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name)
|
||||
existing_outputs = list(output_dir.iterdir())
|
||||
if existing_outputs:
|
||||
remove_paths(existing_outputs)
|
||||
|
||||
audio_path = workspace_dir / "audio.wav"
|
||||
extract_audio_to_wav(working_video_path, audio_path)
|
||||
|
||||
return VideoWorkspace(
|
||||
original_filename=filename,
|
||||
sanitized_name=sanitized_name,
|
||||
workspace_dir=workspace_dir,
|
||||
output_dir=output_dir,
|
||||
source_path=source_path,
|
||||
working_video_path=working_video_path,
|
||||
audio_path=audio_path,
|
||||
)
|
||||
85
video_render/messaging.py
Normal file
85
video_render/messaging.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Callable, Dict
|
||||
|
||||
import pika
|
||||
|
||||
from .config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
|
||||
|
||||
|
||||
class RabbitMQWorker:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self._params = pika.ConnectionParameters(
|
||||
host=settings.rabbitmq.host,
|
||||
port=settings.rabbitmq.port,
|
||||
credentials=pika.PlainCredentials(
|
||||
settings.rabbitmq.user, settings.rabbitmq.password
|
||||
),
|
||||
heartbeat=settings.rabbitmq.heartbeat,
|
||||
blocked_connection_timeout=settings.rabbitmq.blocked_timeout,
|
||||
)
|
||||
|
||||
def consume_forever(self, handler: MessageHandler) -> None:
|
||||
while True:
|
||||
try:
|
||||
with pika.BlockingConnection(self._params) as connection:
|
||||
channel = connection.channel()
|
||||
channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True)
|
||||
channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True)
|
||||
channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count)
|
||||
|
||||
def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body):
|
||||
try:
|
||||
message = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Mensagem inválida recebida: %s", body)
|
||||
ch.basic_ack(delivery_tag=method.delivery_tag)
|
||||
return
|
||||
|
||||
logger.info("Mensagem recebida: %s", message.get("filename", "<sem_nome>"))
|
||||
try:
|
||||
response = handler(message)
|
||||
except Exception:
|
||||
logger.exception("Erro não tratado durante o processamento")
|
||||
response = {
|
||||
"hasError": True,
|
||||
"error": "Erro não tratado no pipeline",
|
||||
"filename": message.get("filename"),
|
||||
"videoId": message.get("videoId"),
|
||||
"url": message.get("url"),
|
||||
"processedFiles": [],
|
||||
}
|
||||
|
||||
try:
|
||||
payload = json.dumps(response)
|
||||
ch.basic_publish(
|
||||
exchange="",
|
||||
routing_key=self.settings.rabbitmq.publish_queue,
|
||||
body=payload,
|
||||
properties=pika.BasicProperties(delivery_mode=2),
|
||||
)
|
||||
logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue)
|
||||
except Exception:
|
||||
logger.exception("Falha ao publicar a resposta na fila de upload")
|
||||
finally:
|
||||
ch.basic_ack(delivery_tag=method.delivery_tag)
|
||||
|
||||
channel.basic_consume(
|
||||
queue=self.settings.rabbitmq.consume_queue,
|
||||
on_message_callback=_on_message,
|
||||
auto_ack=False,
|
||||
)
|
||||
logger.info("Consumidor iniciado. Aguardando mensagens...")
|
||||
channel.start_consuming()
|
||||
except pika.exceptions.AMQPConnectionError:
|
||||
logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...")
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Encerrando consumidor por interrupção do usuário.")
|
||||
break
|
||||
236
video_render/pipeline.py
Normal file
236
video_render/pipeline.py
Normal file
@@ -0,0 +1,236 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .config import Settings
|
||||
from .llm import GeminiHighlighter, OpenRouterCopywriter
|
||||
from .media import MediaPreparer, VideoWorkspace
|
||||
from .transcription import TranscriptionResult, TranscriptionService
|
||||
from .utils import remove_paths, sanitize_filename
|
||||
from .rendering import VideoRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class JobMessage:
|
||||
filename: str
|
||||
url: Optional[str]
|
||||
video_id: Optional[str]
|
||||
extras: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class HighlightWindow:
|
||||
start: float
|
||||
end: float
|
||||
summary: str
|
||||
title: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RenderedClip:
|
||||
path: Path
|
||||
start: float
|
||||
end: float
|
||||
title: str
|
||||
summary: str
|
||||
index: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineContext:
|
||||
job: JobMessage
|
||||
workspace: Optional[VideoWorkspace] = None
|
||||
transcription: Optional[TranscriptionResult] = None
|
||||
highlight_windows: List[HighlightWindow] = field(default_factory=list)
|
||||
rendered_clips: List[RenderedClip] = field(default_factory=list)
|
||||
|
||||
|
||||
class VideoPipeline:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self.media_preparer = MediaPreparer(settings)
|
||||
self.transcriber = TranscriptionService(settings)
|
||||
self.highlighter = GeminiHighlighter(settings)
|
||||
self.copywriter = OpenRouterCopywriter(settings)
|
||||
self.renderer = VideoRenderer(settings)
|
||||
|
||||
def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
context = PipelineContext(job=self._parse_job(message))
|
||||
try:
|
||||
self._prepare_workspace(context)
|
||||
self._generate_transcription(context)
|
||||
self._determine_highlights(context)
|
||||
self._generate_titles(context)
|
||||
self._render_clips(context)
|
||||
return self._build_success_payload(context)
|
||||
except Exception as exc:
|
||||
logger.exception("Falha ao processar vídeo %s", context.job.filename)
|
||||
return self._handle_failure(context, exc)
|
||||
|
||||
def _parse_job(self, message: Dict[str, Any]) -> JobMessage:
|
||||
filename = message.get("filename")
|
||||
if not filename:
|
||||
raise ValueError("Mensagem inválida: 'filename' é obrigatório")
|
||||
|
||||
url = message.get("url")
|
||||
video_id = message.get("videoId") or message.get("video_id")
|
||||
extras = {
|
||||
key: value
|
||||
for key, value in message.items()
|
||||
if key not in {"filename", "url", "videoId", "video_id"}
|
||||
}
|
||||
return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras)
|
||||
|
||||
def _prepare_workspace(self, context: PipelineContext) -> None:
|
||||
context.workspace = self.media_preparer.prepare(context.job.filename)
|
||||
|
||||
def _generate_transcription(self, context: PipelineContext) -> None:
|
||||
if not context.workspace:
|
||||
raise RuntimeError("Workspace não preparado")
|
||||
transcription = self.transcriber.transcribe(context.workspace.audio_path)
|
||||
TranscriptionService.persist(transcription, context.workspace.workspace_dir)
|
||||
context.transcription = transcription
|
||||
|
||||
def _determine_highlights(self, context: PipelineContext) -> None:
|
||||
if not context.transcription:
|
||||
raise RuntimeError("Transcricao nao disponivel")
|
||||
|
||||
highlights_raw = self.highlighter.generate_highlights(context.transcription)
|
||||
windows: List[HighlightWindow] = []
|
||||
|
||||
for item in highlights_raw:
|
||||
try:
|
||||
start = float(item.get("start", 0)) # type: ignore[arg-type]
|
||||
end = float(item.get("end", start)) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Highlight invalido ignorado: %s", item)
|
||||
continue
|
||||
|
||||
summary = str(item.get("summary", "")).strip()
|
||||
if end <= start:
|
||||
logger.debug("Highlight com intervalo invalido ignorado: %s", item)
|
||||
continue
|
||||
|
||||
windows.append(HighlightWindow(start=start, end=end, summary=summary))
|
||||
|
||||
if not windows:
|
||||
last_end = (
|
||||
context.transcription.segments[-1].end
|
||||
if context.transcription.segments
|
||||
else 0
|
||||
)
|
||||
windows.append(
|
||||
HighlightWindow(
|
||||
start=0.0,
|
||||
end=max(last_end, 10.0),
|
||||
summary="Sem destaque identificado; fallback automatico.",
|
||||
)
|
||||
)
|
||||
|
||||
context.highlight_windows = windows
|
||||
|
||||
def _generate_titles(self, context: PipelineContext) -> None:
|
||||
if not context.highlight_windows:
|
||||
return
|
||||
|
||||
highlight_dicts = [
|
||||
{"start": window.start, "end": window.end, "summary": window.summary}
|
||||
for window in context.highlight_windows
|
||||
]
|
||||
titles = self.copywriter.generate_titles(highlight_dicts)
|
||||
|
||||
for window, title in zip(context.highlight_windows, titles):
|
||||
window.title = title.strip()
|
||||
|
||||
|
||||
def _render_clips(self, context: PipelineContext) -> None:
|
||||
if not context.workspace or not context.highlight_windows or not context.transcription:
|
||||
return
|
||||
|
||||
titles = [
|
||||
window.title or window.summary for window in context.highlight_windows
|
||||
]
|
||||
|
||||
render_results = self.renderer.render(
|
||||
workspace_path=str(context.workspace.working_video_path),
|
||||
highlight_windows=context.highlight_windows,
|
||||
transcription=context.transcription,
|
||||
titles=titles,
|
||||
output_dir=context.workspace.output_dir,
|
||||
)
|
||||
|
||||
context.rendered_clips = [
|
||||
RenderedClip(
|
||||
path=Path(path),
|
||||
start=start,
|
||||
end=end,
|
||||
title=title,
|
||||
summary=summary,
|
||||
index=index,
|
||||
)
|
||||
for path, start, end, title, summary, index in render_results
|
||||
]
|
||||
|
||||
def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]:
|
||||
return {
|
||||
"hasError": False,
|
||||
"videosProcessedQuantity": len(context.rendered_clips),
|
||||
"filename": context.job.filename,
|
||||
"videoId": context.job.video_id,
|
||||
"url": context.job.url,
|
||||
"workspaceFolder": context.workspace.sanitized_name if context.workspace else None,
|
||||
"outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None,
|
||||
"processedFiles": [
|
||||
{
|
||||
"path": self._relative_path(clip.path),
|
||||
"start": clip.start,
|
||||
"end": clip.end,
|
||||
"title": clip.title,
|
||||
"summary": clip.summary,
|
||||
"clipIndex": clip.index,
|
||||
}
|
||||
for clip in context.rendered_clips
|
||||
],
|
||||
}
|
||||
|
||||
def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]:
|
||||
logger.error("Erro no pipeline: %s", exc)
|
||||
cleanup_targets: List[Path] = []
|
||||
|
||||
if context.workspace:
|
||||
cleanup_targets.append(context.workspace.workspace_dir)
|
||||
cleanup_targets.append(context.workspace.output_dir)
|
||||
original_path = context.workspace.source_path
|
||||
if original_path.exists():
|
||||
cleanup_targets.append(original_path)
|
||||
else:
|
||||
sanitized = sanitize_filename(Path(context.job.filename).stem)
|
||||
job_output_dir = self.settings.outputs_dir / sanitized
|
||||
if job_output_dir.exists():
|
||||
cleanup_targets.append(job_output_dir)
|
||||
original_path = self.settings.videos_dir / context.job.filename
|
||||
if original_path.exists():
|
||||
cleanup_targets.append(original_path)
|
||||
|
||||
remove_paths(cleanup_targets)
|
||||
|
||||
return {
|
||||
"hasError": True,
|
||||
"error": str(exc),
|
||||
"filename": context.job.filename,
|
||||
"videoId": context.job.video_id,
|
||||
"url": context.job.url,
|
||||
"processedFiles": [],
|
||||
}
|
||||
|
||||
def _relative_path(self, path: Path) -> str:
|
||||
base = self.settings.videos_dir.parent
|
||||
try:
|
||||
return str(path.relative_to(base))
|
||||
except ValueError:
|
||||
return str(path)
|
||||
406
video_render/rendering.py
Normal file
406
video_render/rendering.py
Normal file
@@ -0,0 +1,406 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, List, Sequence, Tuple
|
||||
|
||||
import numpy as np
|
||||
from moviepy.editor import (
|
||||
ColorClip,
|
||||
CompositeVideoClip,
|
||||
ImageClip,
|
||||
TextClip,
|
||||
VideoFileClip,
|
||||
)
|
||||
from PIL import Image, ImageColor, ImageDraw, ImageFont
|
||||
|
||||
from .config import Settings
|
||||
from .transcription import TranscriptionResult, WordTiming
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def clamp_time(value: float, minimum: float = 0.0) -> float:
|
||||
return max(minimum, float(value))
|
||||
|
||||
|
||||
@dataclass
|
||||
class CaptionClipSet:
|
||||
base: ImageClip
|
||||
highlights: List[ImageClip]
|
||||
|
||||
|
||||
class CaptionBuilder:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self.font_path = settings.rendering.font_path
|
||||
if not self.font_path.exists():
|
||||
raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}")
|
||||
|
||||
self.font = ImageFont.truetype(
|
||||
str(self.font_path), settings.rendering.subtitle_font_size
|
||||
)
|
||||
self.base_color = ImageColor.getrgb(settings.rendering.base_color)
|
||||
self.highlight_color = ImageColor.getrgb(settings.rendering.highlight_color)
|
||||
self.canvas_width = settings.rendering.frame_width - 160
|
||||
self.canvas_height = int(settings.rendering.subtitle_font_size * 2.2)
|
||||
self.min_words = settings.rendering.caption_min_words
|
||||
self.max_words = settings.rendering.caption_max_words
|
||||
|
||||
bbox = self.font.getbbox("Ay")
|
||||
self.text_height = bbox[3] - bbox[1]
|
||||
self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1]
|
||||
self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0]
|
||||
|
||||
def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]:
|
||||
grouped = self._group_words(words)
|
||||
clip_sets: List[CaptionClipSet] = []
|
||||
|
||||
for group in grouped:
|
||||
group_start = clamp_time(group[0].start, minimum=clip_start)
|
||||
group_end = clamp_time(group[-1].end, minimum=group_start + 0.05)
|
||||
duration = max(0.05, group_end - group_start)
|
||||
start_offset = group_start - clip_start
|
||||
|
||||
base_image, highlight_images = self._render_group(group)
|
||||
|
||||
base_clip = (
|
||||
ImageClip(np.array(base_image))
|
||||
.with_start(start_offset)
|
||||
.with_duration(duration)
|
||||
)
|
||||
|
||||
highlight_clips: List[ImageClip] = []
|
||||
for word, image in zip(group, highlight_images):
|
||||
h_start = clamp_time(word.start, minimum=clip_start) - clip_start
|
||||
h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start
|
||||
h_duration = max(0.05, h_end - h_start)
|
||||
highlight_clip = (
|
||||
ImageClip(np.array(image))
|
||||
.with_start(h_start)
|
||||
.with_duration(h_duration)
|
||||
)
|
||||
highlight_clips.append(highlight_clip)
|
||||
|
||||
clip_sets.append(CaptionClipSet(base=base_clip, highlights=highlight_clips))
|
||||
|
||||
return clip_sets
|
||||
|
||||
def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]:
|
||||
texts = [self._clean_word(word.word) for word in group]
|
||||
|
||||
widths = []
|
||||
for text in texts:
|
||||
bbox = self.font.getbbox(text)
|
||||
widths.append(bbox[2] - bbox[0])
|
||||
|
||||
total_width = sum(widths)
|
||||
if len(widths) > 1:
|
||||
total_width += self.space_width * (len(widths) - 1)
|
||||
|
||||
start_x = max(0, (self.canvas_width - total_width) // 2)
|
||||
|
||||
base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0))
|
||||
base_draw = ImageDraw.Draw(base_image)
|
||||
highlight_images: List[Image.Image] = []
|
||||
|
||||
x = start_x
|
||||
for text, width in zip(texts, widths):
|
||||
base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color)
|
||||
|
||||
highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0))
|
||||
highlight_draw = ImageDraw.Draw(highlight_image)
|
||||
highlight_draw.text(
|
||||
(x, self.baseline), text, font=self.font, fill=self.highlight_color
|
||||
)
|
||||
highlight_images.append(highlight_image)
|
||||
|
||||
x += width + self.space_width
|
||||
|
||||
return base_image, highlight_images
|
||||
|
||||
def _group_words(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]:
|
||||
if not words:
|
||||
return []
|
||||
|
||||
grouped: List[List[WordTiming]] = []
|
||||
buffer: List[WordTiming] = []
|
||||
|
||||
for word in words:
|
||||
buffer.append(word)
|
||||
if len(buffer) == self.max_words:
|
||||
grouped.append(buffer)
|
||||
buffer = []
|
||||
|
||||
if buffer:
|
||||
if len(buffer) == 1 and grouped:
|
||||
grouped[-1].extend(buffer)
|
||||
else:
|
||||
grouped.append(buffer)
|
||||
|
||||
# Rebalance groups to respect minimum size when possible
|
||||
for idx, group in enumerate(grouped[:-1]):
|
||||
if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words:
|
||||
deficit = self.min_words - len(group)
|
||||
transfer = grouped[idx + 1][:deficit]
|
||||
grouped[idx] = group + transfer
|
||||
grouped[idx + 1] = grouped[idx + 1][deficit:]
|
||||
|
||||
grouped = [grp for grp in grouped if grp]
|
||||
return grouped
|
||||
|
||||
@staticmethod
|
||||
def _clean_word(text: str) -> str:
|
||||
text = text.strip()
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
return text or "..."
|
||||
|
||||
|
||||
class VideoRenderer:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self.captions = CaptionBuilder(settings)
|
||||
|
||||
def render(
|
||||
self,
|
||||
workspace_path: str,
|
||||
highlight_windows: Sequence,
|
||||
transcription: TranscriptionResult,
|
||||
titles: Sequence[str],
|
||||
output_dir,
|
||||
) -> List[Tuple[str, float, float, str, str, int]]:
|
||||
results: List[Tuple[str, float, float, str, str, int]] = []
|
||||
|
||||
with VideoFileClip(workspace_path) as base_clip:
|
||||
video_duration = base_clip.duration or 0
|
||||
for index, window in enumerate(highlight_windows, start=1):
|
||||
start = clamp_time(window.start)
|
||||
end = clamp_time(window.end)
|
||||
start = min(start, video_duration)
|
||||
end = min(end, video_duration)
|
||||
if end <= start:
|
||||
logger.info("Janela ignorada por intervalo invalido: %s", window)
|
||||
continue
|
||||
|
||||
subclip = base_clip.subclipped(start, end)
|
||||
try:
|
||||
rendered_path = self._render_single_clip(
|
||||
subclip=subclip,
|
||||
start=start,
|
||||
end=end,
|
||||
title=titles[index - 1] if index - 1 < len(titles) else window.summary,
|
||||
summary=window.summary,
|
||||
index=index,
|
||||
transcription=transcription,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
finally:
|
||||
subclip.close()
|
||||
|
||||
results.append(
|
||||
(
|
||||
rendered_path,
|
||||
float(start),
|
||||
float(end),
|
||||
titles[index - 1] if index - 1 < len(titles) else window.summary,
|
||||
window.summary,
|
||||
index,
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _render_single_clip(
|
||||
self,
|
||||
subclip: VideoFileClip,
|
||||
start: float,
|
||||
end: float,
|
||||
title: str,
|
||||
summary: str,
|
||||
index: int,
|
||||
transcription: TranscriptionResult,
|
||||
output_dir,
|
||||
) -> str:
|
||||
duration = end - start
|
||||
frame_w = self.settings.rendering.frame_width
|
||||
frame_h = self.settings.rendering.frame_height
|
||||
top_h = int(frame_h * 0.18)
|
||||
bottom_h = int(frame_h * 0.20)
|
||||
video_area_h = frame_h - top_h - bottom_h
|
||||
|
||||
scale_factor = min(
|
||||
frame_w / subclip.w,
|
||||
video_area_h / subclip.h,
|
||||
)
|
||||
resized_clip = subclip.resized(scale_factor)
|
||||
video_y = top_h + (video_area_h - resized_clip.h) // 2
|
||||
|
||||
video_clip = resized_clip.with_position(
|
||||
((frame_w - resized_clip.w) // 2, video_y)
|
||||
)
|
||||
|
||||
background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration)
|
||||
top_panel = (
|
||||
ColorClip(size=(frame_w, top_h), color=(12, 12, 12))
|
||||
.with_duration(duration)
|
||||
.with_opacity(0.85)
|
||||
)
|
||||
bottom_panel = (
|
||||
ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12))
|
||||
.with_position((0, frame_h - bottom_h))
|
||||
.with_duration(duration)
|
||||
.with_opacity(0.85)
|
||||
)
|
||||
|
||||
title_text = title or summary
|
||||
wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160)
|
||||
title_clip = (
|
||||
TextClip(
|
||||
text=wrapped_title,
|
||||
font=str(self.settings.rendering.font_path),
|
||||
font_size=self.settings.rendering.title_font_size,
|
||||
color=self.settings.rendering.base_color,
|
||||
method="caption",
|
||||
size=(frame_w - 160, top_h - 40),
|
||||
)
|
||||
.with_duration(duration)
|
||||
)
|
||||
title_clip = title_clip.with_position(
|
||||
((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
|
||||
)
|
||||
|
||||
words = self._collect_words(transcription, start, end)
|
||||
caption_sets = self.captions.build(words, clip_start=start)
|
||||
|
||||
caption_clips = []
|
||||
caption_resources: List[ImageClip] = []
|
||||
caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2
|
||||
for clip_set in caption_sets:
|
||||
base_positioned = clip_set.base.with_position(("center", caption_y))
|
||||
caption_clips.append(base_positioned)
|
||||
caption_resources.append(clip_set.base)
|
||||
for highlight in clip_set.highlights:
|
||||
positioned = highlight.with_position(("center", caption_y))
|
||||
caption_clips.append(positioned)
|
||||
caption_resources.append(highlight)
|
||||
|
||||
if not caption_clips:
|
||||
fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
|
||||
caption_clips.append(
|
||||
TextClip(
|
||||
text=fallback_text,
|
||||
font=str(self.settings.rendering.font_path),
|
||||
font_size=self.settings.rendering.subtitle_font_size,
|
||||
color=self.settings.rendering.base_color,
|
||||
method="caption",
|
||||
size=(frame_w - 160, bottom_h - 40),
|
||||
)
|
||||
.with_duration(duration)
|
||||
.with_position(("center", caption_y))
|
||||
)
|
||||
|
||||
composite = CompositeVideoClip(
|
||||
[background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
|
||||
size=(frame_w, frame_h),
|
||||
)
|
||||
|
||||
output_path = output_dir / f"clip_{index:02d}.mp4"
|
||||
composite.write_videofile(
|
||||
str(output_path),
|
||||
codec=self.settings.rendering.video_codec,
|
||||
audio_codec=self.settings.rendering.audio_codec,
|
||||
fps=self.settings.rendering.fps,
|
||||
bitrate=self.settings.rendering.bitrate,
|
||||
ffmpeg_params=[
|
||||
"-preset",
|
||||
self.settings.rendering.preset,
|
||||
"-pix_fmt",
|
||||
"yuv420p",
|
||||
],
|
||||
temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
|
||||
remove_temp=True,
|
||||
threads=4,
|
||||
)
|
||||
|
||||
composite.close()
|
||||
resized_clip.close()
|
||||
video_clip.close()
|
||||
title_clip.close()
|
||||
background.close()
|
||||
top_panel.close()
|
||||
bottom_panel.close()
|
||||
for clip in caption_clips:
|
||||
clip.close()
|
||||
for clip in caption_resources:
|
||||
clip.close()
|
||||
|
||||
return str(output_path)
|
||||
|
||||
def _collect_words(
|
||||
self, transcription: TranscriptionResult, start: float, end: float
|
||||
) -> List[WordTiming]:
|
||||
collected: List[WordTiming] = []
|
||||
for segment in transcription.segments:
|
||||
if segment.end < start or segment.start > end:
|
||||
continue
|
||||
|
||||
if segment.words:
|
||||
for word in segment.words:
|
||||
if word.end < start or word.start > end:
|
||||
continue
|
||||
collected.append(
|
||||
WordTiming(
|
||||
start=max(start, word.start),
|
||||
end=min(end, word.end),
|
||||
word=word.word,
|
||||
)
|
||||
)
|
||||
else:
|
||||
collected.extend(self._fallback_words(segment.text, segment.start, segment.end, start, end))
|
||||
|
||||
collected.sort(key=lambda w: w.start)
|
||||
return collected
|
||||
|
||||
def _fallback_words(
|
||||
self,
|
||||
text: str,
|
||||
segment_start: float,
|
||||
segment_end: float,
|
||||
window_start: float,
|
||||
window_end: float,
|
||||
) -> Iterable[WordTiming]:
|
||||
words = [w for w in re.split(r"\s+", text.strip()) if w]
|
||||
if not words:
|
||||
return []
|
||||
|
||||
seg_start = max(segment_start, window_start)
|
||||
seg_end = min(segment_end, window_end)
|
||||
duration = max(0.01, seg_end - seg_start)
|
||||
step = duration / len(words)
|
||||
|
||||
timings: List[WordTiming] = []
|
||||
for idx, word in enumerate(words):
|
||||
w_start = seg_start + idx * step
|
||||
w_end = min(seg_end, w_start + step)
|
||||
timings.append(WordTiming(start=w_start, end=w_end, word=word))
|
||||
return timings
|
||||
|
||||
@staticmethod
|
||||
def _wrap_text(text: str, max_width: int) -> str:
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
words = text.split()
|
||||
lines: List[str] = []
|
||||
current: List[str] = []
|
||||
for word in words:
|
||||
current.append(word)
|
||||
if len(" ".join(current)) > max_width // 18:
|
||||
lines.append(" ".join(current[:-1]))
|
||||
current = [current[-1]]
|
||||
if current:
|
||||
lines.append(" ".join(current))
|
||||
return "\n".join(lines)
|
||||
122
video_render/transcription.py
Normal file
122
video_render/transcription.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
from .config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WordTiming:
|
||||
start: float
|
||||
end: float
|
||||
word: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TranscriptSegment:
|
||||
id: int
|
||||
start: float
|
||||
end: float
|
||||
text: str
|
||||
words: List[WordTiming]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TranscriptionResult:
|
||||
segments: List[TranscriptSegment]
|
||||
full_text: str
|
||||
|
||||
|
||||
class TranscriptionService:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self._model: Optional[WhisperModel] = None
|
||||
|
||||
def _load_model(self) -> WhisperModel:
|
||||
if self._model is None:
|
||||
logger.info(
|
||||
"Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)",
|
||||
self.settings.whisper.model_size,
|
||||
self.settings.whisper.device or "auto",
|
||||
self.settings.whisper.compute_type or "default",
|
||||
)
|
||||
self._model = WhisperModel(
|
||||
self.settings.whisper.model_size,
|
||||
device=self.settings.whisper.device or "auto",
|
||||
compute_type=self.settings.whisper.compute_type or "default",
|
||||
download_root=str(self.settings.whisper.download_root),
|
||||
)
|
||||
return self._model
|
||||
|
||||
def transcribe(self, audio_path: Path) -> TranscriptionResult:
|
||||
model = self._load_model()
|
||||
segments, _ = model.transcribe(
|
||||
str(audio_path),
|
||||
beam_size=5,
|
||||
word_timestamps=True,
|
||||
)
|
||||
|
||||
parsed_segments: List[TranscriptSegment] = []
|
||||
full_text_parts: List[str] = []
|
||||
|
||||
for idx, segment in enumerate(segments):
|
||||
words = [
|
||||
WordTiming(start=w.start, end=w.end, word=w.word.strip())
|
||||
for w in segment.words or []
|
||||
if w.word.strip()
|
||||
]
|
||||
text = segment.text.strip()
|
||||
full_text_parts.append(text)
|
||||
parsed_segments.append(
|
||||
TranscriptSegment(
|
||||
id=idx,
|
||||
start=segment.start,
|
||||
end=segment.end,
|
||||
text=text,
|
||||
words=words,
|
||||
)
|
||||
)
|
||||
|
||||
return TranscriptionResult(
|
||||
segments=parsed_segments,
|
||||
full_text=" ".join(full_text_parts).strip(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def persist(result: TranscriptionResult, destination: Path) -> None:
|
||||
json_path = destination / "transcription.json"
|
||||
text_path = destination / "transcription.txt"
|
||||
|
||||
payload = {
|
||||
"segments": [
|
||||
{
|
||||
"id": segment.id,
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
"text": segment.text,
|
||||
"words": [
|
||||
{"start": word.start, "end": word.end, "text": word.word}
|
||||
for word in segment.words
|
||||
],
|
||||
}
|
||||
for segment in result.segments
|
||||
],
|
||||
"full_text": result.full_text,
|
||||
}
|
||||
|
||||
with json_path.open("w", encoding="utf-8") as fp:
|
||||
json.dump(payload, fp, ensure_ascii=False, indent=2)
|
||||
|
||||
with text_path.open("w", encoding="utf-8") as fp:
|
||||
fp.write(result.full_text)
|
||||
|
||||
logger.info("Transcrição salva em %s", destination)
|
||||
|
||||
38
video_render/utils.py
Normal file
38
video_render/utils.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
normalized = unicodedata.normalize("NFKD", name)
|
||||
ascii_text = normalized.encode("ASCII", "ignore").decode()
|
||||
ascii_text = ascii_text.lower()
|
||||
ascii_text = ascii_text.replace(" ", "_")
|
||||
ascii_text = re.sub(r"[^a-z0-9_\-\.]", "", ascii_text)
|
||||
ascii_text = re.sub(r"_+", "_", ascii_text)
|
||||
return ascii_text.strip("_") or "video"
|
||||
|
||||
|
||||
def ensure_workspace(root: Path, folder_name: str) -> Path:
|
||||
workspace = root / folder_name
|
||||
workspace.mkdir(parents=True, exist_ok=True)
|
||||
return workspace
|
||||
|
||||
|
||||
def remove_paths(paths: Iterable[Path]) -> None:
|
||||
for path in paths:
|
||||
if not path.exists():
|
||||
continue
|
||||
if path.is_file() or path.is_symlink():
|
||||
path.unlink(missing_ok=True)
|
||||
else:
|
||||
for child in sorted(path.rglob("*"), reverse=True):
|
||||
if child.is_file() or child.is_symlink():
|
||||
child.unlink(missing_ok=True)
|
||||
elif child.is_dir():
|
||||
child.rmdir()
|
||||
path.rmdir()
|
||||
|
||||
Reference in New Issue
Block a user