Finaliza os ajustes para render de video

This commit is contained in:
LeoMortari
2025-10-28 17:34:13 -03:00
parent b5a27fa938
commit c18884e778
4 changed files with 420 additions and 63 deletions

View File

@@ -13,6 +13,22 @@ logger = logging.getLogger(__name__)
MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]]
def _safe_ack(
channel: pika.adapters.blocking_connection.BlockingChannel, delivery_tag
) -> bool:
if not channel.is_open:
logger.warning(
"Canal fechado antes do ACK; mensagem sera reprocessada apos reconexao"
)
return False
try:
channel.basic_ack(delivery_tag=delivery_tag)
return True
except Exception:
logger.exception("Falha ao confirmar mensagem")
return False
class RabbitMQWorker: class RabbitMQWorker:
def __init__(self, settings: Settings) -> None: def __init__(self, settings: Settings) -> None:
self.settings = settings self.settings = settings
@@ -27,50 +43,59 @@ class RabbitMQWorker:
) )
def consume_forever(self, handler: MessageHandler) -> None: def consume_forever(self, handler: MessageHandler) -> None:
while True: while True:
try: try:
with pika.BlockingConnection(self._params) as connection: with pika.BlockingConnection(self._params) as connection:
channel = connection.channel() channel = connection.channel()
channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True) channel.queue_declare(
channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True) queue=self.settings.rabbitmq.consume_queue, durable=True
channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count) )
channel.queue_declare(
queue=self.settings.rabbitmq.publish_queue, durable=True
)
channel.basic_qos(
prefetch_count=self.settings.rabbitmq.prefetch_count
)
def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body): def _on_message(
ch: pika.adapters.blocking_connection.BlockingChannel,
method,
properties,
body,
) -> None:
"""Consume message, ACK immediately, then process."""
try: try:
message = json.loads(body) message = json.loads(body)
except json.JSONDecodeError: except json.JSONDecodeError:
logger.error("Mensagem inválida recebida: %s", body) logger.error("Mensagem invalida recebida: %s", body)
ch.basic_ack(delivery_tag=method.delivery_tag) _safe_ack(ch, method.delivery_tag)
return return
logger.info("Mensagem recebida: %s", message.get("filename", "<sem_nome>")) if not _safe_ack(ch, method.delivery_tag):
logger.warning(
"Nao foi possivel confirmar mensagem; abortando processamento"
)
return
logger.info(
"Mensagem recebida: %s",
message.get("filename", "<sem_nome>"),
)
try: try:
response = handler(message) response = handler(message)
except Exception: except Exception:
logger.exception("Erro não tratado durante o processamento") logger.exception("Erro nao tratado durante o processamento")
response = { response = {
"hasError": True, "hasError": True,
"error": "Erro não tratado no pipeline", "error": "Erro nao tratado no pipeline",
"filename": message.get("filename"), "filename": message.get("filename"),
"videoId": message.get("videoId"), "videoId": message.get("videoId"),
"url": message.get("url"), "url": message.get("url"),
"processedFiles": [], "processedFiles": [],
} }
try: self._publish_response(response)
payload = json.dumps(response)
ch.basic_publish(
exchange="",
routing_key=self.settings.rabbitmq.publish_queue,
body=payload,
properties=pika.BasicProperties(delivery_mode=2),
)
logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue)
except Exception:
logger.exception("Falha ao publicar a resposta na fila de upload")
finally:
ch.basic_ack(delivery_tag=method.delivery_tag)
channel.basic_consume( channel.basic_consume(
queue=self.settings.rabbitmq.consume_queue, queue=self.settings.rabbitmq.consume_queue,
@@ -80,7 +105,32 @@ class RabbitMQWorker:
logger.info("Consumidor iniciado. Aguardando mensagens...") logger.info("Consumidor iniciado. Aguardando mensagens...")
channel.start_consuming() channel.start_consuming()
except pika.exceptions.AMQPConnectionError: except pika.exceptions.AMQPConnectionError:
logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...") logger.exception(
"Conexao com RabbitMQ perdida. Tentando reconectar..."
)
except pika.exceptions.AMQPError:
logger.exception("Erro AMQP inesperado. Reiniciando consumo...")
except KeyboardInterrupt: except KeyboardInterrupt:
logger.info("Encerrando consumidor por interrupção do usuário.") logger.info("Encerrando consumidor por interrupcao do usuario.")
break break
def _publish_response(self, response: Dict[str, Any]) -> None:
payload = json.dumps(response)
try:
with pika.BlockingConnection(self._params) as publish_connection:
publish_channel = publish_connection.channel()
publish_channel.queue_declare(
queue=self.settings.rabbitmq.publish_queue, durable=True
)
publish_channel.basic_publish(
exchange="",
routing_key=self.settings.rabbitmq.publish_queue,
body=payload,
properties=pika.BasicProperties(delivery_mode=2),
)
logger.info(
"Resposta publicada para '%s'",
self.settings.rabbitmq.publish_queue,
)
except Exception:
logger.exception("Falha ao publicar a resposta na fila de upload apos ACK")

View File

@@ -93,6 +93,15 @@ class VideoPipeline:
def _generate_transcription(self, context: PipelineContext) -> None: def _generate_transcription(self, context: PipelineContext) -> None:
if not context.workspace: if not context.workspace:
raise RuntimeError("Workspace não preparado") raise RuntimeError("Workspace não preparado")
existing = TranscriptionService.load(context.workspace.workspace_dir)
if existing:
logger.info(
"Transcricao existente encontrada em %s; reutilizando resultado",
context.workspace.workspace_dir,
)
context.transcription = existing
return
transcription = self.transcriber.transcribe(context.workspace.audio_path) transcription = self.transcriber.transcribe(context.workspace.audio_path)
TranscriptionService.persist(transcription, context.workspace.workspace_dir) TranscriptionService.persist(transcription, context.workspace.workspace_dir)
context.transcription = transcription context.transcription = transcription

View File

@@ -3,9 +3,11 @@ from __future__ import annotations
import logging import logging
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from typing import Iterable, List, Sequence, Tuple from typing import Dict, Iterable, List, Sequence, Tuple, Optional
import numpy as np import numpy as np
from moviepy.audio.AudioClip import AudioArrayClip, AudioClip
from moviepy.audio.io.AudioFileClip import AudioFileClip
from moviepy.video.VideoClip import ColorClip, ImageClip, TextClip from moviepy.video.VideoClip import ColorClip, ImageClip, TextClip
from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
from moviepy.video.io.VideoFileClip import VideoFileClip from moviepy.video.io.VideoFileClip import VideoFileClip
@@ -199,6 +201,7 @@ class VideoRenderer:
index=index, index=index,
transcription=transcription, transcription=transcription,
output_dir=output_dir, output_dir=output_dir,
source_path=workspace_path,
) )
finally: finally:
subclip.close() subclip.close()
@@ -226,13 +229,14 @@ class VideoRenderer:
index: int, index: int,
transcription: TranscriptionResult, transcription: TranscriptionResult,
output_dir, output_dir,
source_path: str,
) -> str: ) -> str:
duration = end - start duration = end - start
frame_w = self.settings.rendering.frame_width frame_w = self.settings.rendering.frame_width
frame_h = self.settings.rendering.frame_height frame_h = self.settings.rendering.frame_height
top_h = int(frame_h * 0.18) top_h = int(frame_h * 0.18)
bottom_h = int(frame_h * 0.20) bottom_h = int(frame_h * 0.20)
video_area_h = frame_h - top_h - bottom_h video_area_h = max(1, frame_h - top_h - bottom_h)
scale_factor = min( scale_factor = min(
frame_w / subclip.w, frame_w / subclip.w,
@@ -257,19 +261,12 @@ class VideoRenderer:
.with_opacity(0.85) .with_opacity(0.85)
) )
title_text = title or summary title_clip = self._build_title_clip(
wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160) title=title,
title_clip = ( summary=summary,
TextClip( duration=duration,
text=wrapped_title, frame_width=frame_w,
font=str(self.settings.rendering.font_path), top_panel_height=top_h,
font_size=self.settings.rendering.title_font_size,
color=self.settings.rendering.base_color,
method="caption",
size=(frame_w - 160, top_h - 40),
align="center",
)
.with_duration(duration)
) )
title_clip = title_clip.with_position( title_clip = title_clip.with_position(
((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2) ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
@@ -305,43 +302,38 @@ class VideoRenderer:
if not caption_clips: if not caption_clips:
fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160) fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
caption_clips.append( caption_clips.append(
TextClip( self._make_textclip(
text=fallback_text, text=fallback_text,
font=str(self.settings.rendering.font_path), font_path=self.settings.rendering.font_path,
font_size=self.settings.rendering.subtitle_font_size, font_size=self.settings.rendering.subtitle_font_size,
color=self.settings.rendering.base_color, color=self.settings.rendering.base_color,
method="caption",
align="center",
size=(frame_w - 160, max(40, self.captions.canvas_height)), size=(frame_w - 160, max(40, self.captions.canvas_height)),
) )
.with_duration(duration) .with_duration(duration)
.with_position(("center", caption_y)) .with_position(("center", caption_y))
) )
audio_clip, audio_needs_close = self._materialize_audio(
source_path=source_path,
start=start,
end=end,
duration=duration,
fallback_audio=video_clip.audio or resized_clip.audio or subclip.audio,
)
composite = CompositeVideoClip( composite = CompositeVideoClip(
[background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
size=(frame_w, frame_h), size=(frame_w, frame_h),
) )
video_audio = video_clip.audio or resized_clip.audio or subclip.audio if audio_clip is not None:
if video_audio is not None: composite = self._with_audio(composite, audio_clip)
composite = composite.set_audio(video_audio)
output_path = output_dir / f"clip_{index:02d}.mp4" output_path = output_dir / f"clip_{index:02d}.mp4"
composite.write_videofile( self._write_with_fallback(
str(output_path), composite=composite,
codec=self.settings.rendering.video_codec, output_path=output_path,
audio_codec=self.settings.rendering.audio_codec, index=index,
fps=self.settings.rendering.fps, output_dir=output_dir,
bitrate=self.settings.rendering.bitrate,
ffmpeg_params=[
"-preset",
self.settings.rendering.preset,
"-pix_fmt",
"yuv420p",
],
temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
remove_temp=True,
threads=4,
) )
composite.close() composite.close()
@@ -355,9 +347,128 @@ class VideoRenderer:
clip.close() clip.close()
for clip in caption_resources: for clip in caption_resources:
clip.close() clip.close()
if audio_clip is not None and audio_needs_close:
audio_clip.close()
return str(output_path) return str(output_path)
def _build_title_clip(
self,
*,
title: str,
summary: str,
duration: float,
frame_width: int,
top_panel_height: int,
) -> ImageClip:
text = (title or summary or "").strip()
if not text:
text = summary or ""
max_width = max(200, frame_width - 160)
font_size = self.settings.rendering.title_font_size
min_font_size = max(28, int(font_size * 0.6))
target_height = max(80, top_panel_height - 40)
title_color = ImageColor.getrgb(self.settings.rendering.base_color)
font_path = self.settings.rendering.font_path
while True:
font = ImageFont.truetype(str(font_path), font_size)
lines = self._split_title_lines(text, font, max_width)
line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1]
spacing = max(4, int(line_height * 0.25))
text_height = self._measure_text_height(len(lines), line_height, spacing)
if text_height <= target_height or font_size <= min_font_size:
break
font_size = max(min_font_size, font_size - 6)
# Recompute dimensions with final font size to ensure consistency
font = ImageFont.truetype(str(font_path), font_size)
lines = self._split_title_lines(text, font, max_width)
line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1]
spacing = max(4, int(line_height * 0.25))
text_height = self._measure_text_height(len(lines), line_height, spacing)
canvas_height = max(1, text_height)
image = Image.new("RGBA", (max_width, canvas_height), (0, 0, 0, 0))
draw = ImageDraw.Draw(image)
y = 0
for idx, line in enumerate(lines):
bbox = font.getbbox(line)
line_width = bbox[2] - bbox[0]
x = max(0, (max_width - line_width) // 2)
draw.text((x, y - bbox[1]), line, font=font, fill=title_color)
y += line_height
if idx < len(lines) - 1:
y += spacing
return ImageClip(np.array(image)).with_duration(duration)
@staticmethod
def _measure_text_height(line_count: int, line_height: int, spacing: int) -> int:
if line_count <= 0:
return line_height
return line_count * line_height + max(0, line_count - 1) * spacing
@staticmethod
def _split_title_lines(
text: str, font: ImageFont.FreeTypeFont, max_width: int
) -> List[str]:
words = text.split()
if not words:
return [""]
lines: List[str] = []
current: List[str] = []
for word in words:
test_line = " ".join(current + [word]) if current else word
bbox = font.getbbox(test_line)
line_width = bbox[2] - bbox[0]
if line_width <= max_width or not current:
current.append(word)
if line_width > max_width and not current[:-1]:
lines.append(" ".join(current))
current = []
continue
lines.append(" ".join(current))
current = [word]
if current:
lines.append(" ".join(current))
return lines
def _materialize_audio(
self,
*,
source_path: str,
start: float,
end: float,
duration: float,
fallback_audio,
) -> Tuple[Optional[AudioClip], bool]:
try:
with AudioFileClip(source_path) as audio_file:
segment = audio_file.subclipped(start, end)
fps = (
getattr(segment, "fps", None)
or getattr(audio_file, "fps", None)
or 44100
)
samples = segment.to_soundarray(fps=fps)
except Exception:
logger.warning(
"Falha ao carregar audio independente; utilizando fluxo original",
exc_info=True,
)
return fallback_audio, False
audio_clip = AudioArrayClip(samples, fps=fps).with_duration(duration)
return audio_clip, True
def _collect_words( def _collect_words(
self, transcription: TranscriptionResult, start: float, end: float self, transcription: TranscriptionResult, start: float, end: float
) -> List[WordTiming]: ) -> List[WordTiming]:
@@ -424,3 +535,120 @@ class VideoRenderer:
if current: if current:
lines.append(" ".join(current)) lines.append(" ".join(current))
return "\n".join(lines) return "\n".join(lines)
def _write_with_fallback(
self,
*,
composite: CompositeVideoClip,
output_path,
index: int,
output_dir,
) -> None:
attempts = self._encoding_attempts()
temp_audio_path = output_dir / f"temp_audio_{index:02d}.m4a"
last_error: Exception | None = None
for attempt in attempts:
codec = attempt["codec"]
bitrate = attempt["bitrate"]
preset = attempt["preset"]
ffmpeg_params = ["-pix_fmt", "yuv420p"]
if preset:
ffmpeg_params = ["-preset", preset, "-pix_fmt", "yuv420p"]
try:
logger.info(
"Renderizando clip %02d com codec %s (bitrate=%s, preset=%s)",
index,
codec,
bitrate,
preset or "default",
)
composite.write_videofile(
str(output_path),
codec=codec,
audio_codec=self.settings.rendering.audio_codec,
fps=self.settings.rendering.fps,
bitrate=bitrate,
ffmpeg_params=ffmpeg_params,
temp_audiofile=str(temp_audio_path),
remove_temp=True,
threads=4,
)
return
except Exception as exc: # noqa: BLE001 - propagate after fallbacks
last_error = exc
logger.warning(
"Falha ao renderizar com codec %s: %s", codec, exc, exc_info=True
)
if output_path.exists():
output_path.unlink(missing_ok=True)
if temp_audio_path.exists():
temp_audio_path.unlink(missing_ok=True)
raise RuntimeError("Todas as tentativas de renderizacao falharam") from last_error
def _encoding_attempts(self) -> List[Dict[str, str | None]]:
settings = self.settings.rendering
attempts: List[Dict[str, str | None]] = []
attempts.append(
{
"codec": settings.video_codec,
"bitrate": settings.bitrate,
"preset": settings.preset,
}
)
deduped: List[Dict[str, str | None]] = []
seen = set()
for attempt in attempts:
key = (attempt["codec"], attempt["bitrate"], attempt["preset"])
if key in seen:
continue
seen.add(key)
deduped.append(attempt)
return deduped
@staticmethod
def _with_audio(
composite: CompositeVideoClip,
audio_clip,
) -> CompositeVideoClip:
"""Attach audio to a composite clip across MoviePy versions."""
if hasattr(composite, "with_audio"):
return composite.with_audio(audio_clip)
if hasattr(composite, "set_audio"):
return composite.set_audio(audio_clip)
raise AttributeError("CompositeVideoClip does not support audio assignment")
@staticmethod
def _make_textclip(
*,
text: str,
font_path,
font_size: int,
color: str,
size: Tuple[int, int],
) -> TextClip:
"""Create a TextClip compatible with MoviePy 1.x and 2.x.
MoviePy 2.x removed the 'align' keyword from TextClip. We try with
'align' for older versions and fall back to a call without it when
unsupported.
"""
kwargs = dict(
text=text,
font=str(font_path),
font_size=font_size,
color=color,
method="caption",
size=size,
)
try:
return TextClip(**kwargs, align="center") # MoviePy 1.x style
except TypeError:
logger.debug("TextClip 'align' not supported; falling back without it")
return TextClip(**kwargs) # MoviePy 2.x style

View File

@@ -118,5 +118,75 @@ class TranscriptionService:
with text_path.open("w", encoding="utf-8") as fp: with text_path.open("w", encoding="utf-8") as fp:
fp.write(result.full_text) fp.write(result.full_text)
logger.info("Transcrição salva em %s", destination) logger.info("Transcricao salva em %s", destination)
@staticmethod
def load(source: Path) -> Optional[TranscriptionResult]:
json_path = source / "transcription.json"
if not json_path.exists():
return None
try:
with json_path.open("r", encoding="utf-8") as fp:
payload = json.load(fp)
except (OSError, json.JSONDecodeError) as exc:
logger.warning(
"Falha ao carregar transcricao existente de %s: %s", json_path, exc
)
return None
segments_payload = payload.get("segments", [])
if not isinstance(segments_payload, list):
logger.warning(
"Formato inesperado ao carregar transcricao de %s: 'segments' invalido",
json_path,
)
return None
segments: List[TranscriptSegment] = []
for idx, segment_data in enumerate(segments_payload):
if not isinstance(segment_data, dict):
logger.debug("Segmento invalido ignorado ao carregar: %s", segment_data)
continue
try:
segment_id = int(segment_data.get("id", idx))
start = float(segment_data["start"])
end = float(segment_data["end"])
except (KeyError, TypeError, ValueError):
logger.debug("Segmento sem dados obrigatorios ignorado: %s", segment_data)
continue
text = str(segment_data.get("text", "")).strip()
words_payload = segment_data.get("words", [])
words: List[WordTiming] = []
if isinstance(words_payload, list):
for word_data in words_payload:
if not isinstance(word_data, dict):
continue
try:
w_start = float(word_data["start"])
w_end = float(word_data["end"])
except (KeyError, TypeError, ValueError):
logger.debug(
"Palavra sem dados obrigatorios ignorada: %s", word_data
)
continue
word_text = str(word_data.get("text", "")).strip()
if not word_text:
continue
words.append(WordTiming(start=w_start, end=w_end, word=word_text))
segments.append(
TranscriptSegment(
id=segment_id,
start=start,
end=end,
text=text,
words=words,
)
)
full_text = str(payload.get("full_text", "")).strip()
return TranscriptionResult(segments=segments, full_text=full_text)