Finaliza os ajustes para render de video

2025-10-28 17:34:13 -03:00
parent b5a27fa938
commit c18884e778
4 changed files with 420 additions and 63 deletions
--- a/video_render/rendering.py
+++ b/video_render/rendering.py
@@ -3,9 +3,11 @@ from __future__ import annotations
 import logging
 import re
 from dataclasses import dataclass
-from typing import Iterable, List, Sequence, Tuple
+from typing import Dict, Iterable, List, Sequence, Tuple, Optional

 import numpy as np
+from moviepy.audio.AudioClip import AudioArrayClip, AudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
 from moviepy.video.VideoClip import ColorClip, ImageClip, TextClip
 from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
 from moviepy.video.io.VideoFileClip import VideoFileClip
@@ -199,6 +201,7 @@ class VideoRenderer:
                        index=index,
                        transcription=transcription,
                        output_dir=output_dir,
+                        source_path=workspace_path,
                    )
                finally:
                    subclip.close()
@@ -226,13 +229,14 @@ class VideoRenderer:
        index: int,
        transcription: TranscriptionResult,
        output_dir,
+        source_path: str,
    ) -> str:
        duration = end - start
        frame_w = self.settings.rendering.frame_width
        frame_h = self.settings.rendering.frame_height
        top_h = int(frame_h * 0.18)
        bottom_h = int(frame_h * 0.20)
-        video_area_h = frame_h - top_h - bottom_h
+        video_area_h = max(1, frame_h - top_h - bottom_h)

        scale_factor = min(
            frame_w / subclip.w,
@@ -257,19 +261,12 @@ class VideoRenderer:
            .with_opacity(0.85)
        )

-        title_text = title or summary
-        wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160)
-        title_clip = (
-            TextClip(
-                text=wrapped_title,
-                font=str(self.settings.rendering.font_path),
-                font_size=self.settings.rendering.title_font_size,
-                color=self.settings.rendering.base_color,
-                method="caption",
-                size=(frame_w - 160, top_h - 40),
-                align="center",
-            )
-            .with_duration(duration)
+        title_clip = self._build_title_clip(
+            title=title,
+            summary=summary,
+            duration=duration,
+            frame_width=frame_w,
+            top_panel_height=top_h,
        )
        title_clip = title_clip.with_position(
            ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2)
@@ -305,43 +302,38 @@ class VideoRenderer:
        if not caption_clips:
            fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160)
            caption_clips.append(
-                TextClip(
+                self._make_textclip(
                    text=fallback_text,
-                    font=str(self.settings.rendering.font_path),
+                    font_path=self.settings.rendering.font_path,
                    font_size=self.settings.rendering.subtitle_font_size,
                    color=self.settings.rendering.base_color,
-                    method="caption",
-                    align="center",
                    size=(frame_w - 160, max(40, self.captions.canvas_height)),
                )
                .with_duration(duration)
                .with_position(("center", caption_y))
            )

+        audio_clip, audio_needs_close = self._materialize_audio(
+            source_path=source_path,
+            start=start,
+            end=end,
+            duration=duration,
+            fallback_audio=video_clip.audio or resized_clip.audio or subclip.audio,
+        )
+
        composite = CompositeVideoClip(
            [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips],
            size=(frame_w, frame_h),
        )
-        video_audio = video_clip.audio or resized_clip.audio or subclip.audio
-        if video_audio is not None:
-            composite = composite.set_audio(video_audio)
+        if audio_clip is not None:
+            composite = self._with_audio(composite, audio_clip)

        output_path = output_dir / f"clip_{index:02d}.mp4"
-        composite.write_videofile(
-            str(output_path),
-            codec=self.settings.rendering.video_codec,
-            audio_codec=self.settings.rendering.audio_codec,
-            fps=self.settings.rendering.fps,
-            bitrate=self.settings.rendering.bitrate,
-            ffmpeg_params=[
-                "-preset",
-                self.settings.rendering.preset,
-                "-pix_fmt",
-                "yuv420p",
-            ],
-            temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"),
-            remove_temp=True,
-            threads=4,
+        self._write_with_fallback(
+            composite=composite,
+            output_path=output_path,
+            index=index,
+            output_dir=output_dir,
        )

        composite.close()
@@ -355,9 +347,128 @@ class VideoRenderer:
            clip.close()
        for clip in caption_resources:
            clip.close()
+        if audio_clip is not None and audio_needs_close:
+            audio_clip.close()

        return str(output_path)

+    def _build_title_clip(
+        self,
+        *,
+        title: str,
+        summary: str,
+        duration: float,
+        frame_width: int,
+        top_panel_height: int,
+    ) -> ImageClip:
+        text = (title or summary or "").strip()
+        if not text:
+            text = summary or ""
+
+        max_width = max(200, frame_width - 160)
+        font_size = self.settings.rendering.title_font_size
+        min_font_size = max(28, int(font_size * 0.6))
+        target_height = max(80, top_panel_height - 40)
+        title_color = ImageColor.getrgb(self.settings.rendering.base_color)
+        font_path = self.settings.rendering.font_path
+
+        while True:
+            font = ImageFont.truetype(str(font_path), font_size)
+            lines = self._split_title_lines(text, font, max_width)
+            line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1]
+            spacing = max(4, int(line_height * 0.25))
+            text_height = self._measure_text_height(len(lines), line_height, spacing)
+
+            if text_height <= target_height or font_size <= min_font_size:
+                break
+
+            font_size = max(min_font_size, font_size - 6)
+
+        # Recompute dimensions with final font size to ensure consistency
+        font = ImageFont.truetype(str(font_path), font_size)
+        lines = self._split_title_lines(text, font, max_width)
+        line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1]
+        spacing = max(4, int(line_height * 0.25))
+        text_height = self._measure_text_height(len(lines), line_height, spacing)
+        canvas_height = max(1, text_height)
+
+        image = Image.new("RGBA", (max_width, canvas_height), (0, 0, 0, 0))
+        draw = ImageDraw.Draw(image)
+        y = 0
+        for idx, line in enumerate(lines):
+            bbox = font.getbbox(line)
+            line_width = bbox[2] - bbox[0]
+            x = max(0, (max_width - line_width) // 2)
+            draw.text((x, y - bbox[1]), line, font=font, fill=title_color)
+            y += line_height
+            if idx < len(lines) - 1:
+                y += spacing
+
+        return ImageClip(np.array(image)).with_duration(duration)
+
+    @staticmethod
+    def _measure_text_height(line_count: int, line_height: int, spacing: int) -> int:
+        if line_count <= 0:
+            return line_height
+        return line_count * line_height + max(0, line_count - 1) * spacing
+
+    @staticmethod
+    def _split_title_lines(
+        text: str, font: ImageFont.FreeTypeFont, max_width: int
+    ) -> List[str]:
+        words = text.split()
+        if not words:
+            return [""]
+
+        lines: List[str] = []
+        current: List[str] = []
+        for word in words:
+            test_line = " ".join(current + [word]) if current else word
+            bbox = font.getbbox(test_line)
+            line_width = bbox[2] - bbox[0]
+            if line_width <= max_width or not current:
+                current.append(word)
+                if line_width > max_width and not current[:-1]:
+                    lines.append(" ".join(current))
+                    current = []
+                continue
+
+            lines.append(" ".join(current))
+            current = [word]
+
+        if current:
+            lines.append(" ".join(current))
+
+        return lines
+
+    def _materialize_audio(
+        self,
+        *,
+        source_path: str,
+        start: float,
+        end: float,
+        duration: float,
+        fallback_audio,
+    ) -> Tuple[Optional[AudioClip], bool]:
+        try:
+            with AudioFileClip(source_path) as audio_file:
+                segment = audio_file.subclipped(start, end)
+                fps = (
+                    getattr(segment, "fps", None)
+                    or getattr(audio_file, "fps", None)
+                    or 44100
+                )
+                samples = segment.to_soundarray(fps=fps)
+        except Exception:
+            logger.warning(
+                "Falha ao carregar audio independente; utilizando fluxo original",
+                exc_info=True,
+            )
+            return fallback_audio, False
+
+        audio_clip = AudioArrayClip(samples, fps=fps).with_duration(duration)
+        return audio_clip, True
+
    def _collect_words(
        self, transcription: TranscriptionResult, start: float, end: float
    ) -> List[WordTiming]:
@@ -424,3 +535,120 @@ class VideoRenderer:
        if current:
            lines.append(" ".join(current))
        return "\n".join(lines)
+
+    def _write_with_fallback(
+        self,
+        *,
+        composite: CompositeVideoClip,
+        output_path,
+        index: int,
+        output_dir,
+    ) -> None:
+        attempts = self._encoding_attempts()
+        temp_audio_path = output_dir / f"temp_audio_{index:02d}.m4a"
+        last_error: Exception | None = None
+
+        for attempt in attempts:
+            codec = attempt["codec"]
+            bitrate = attempt["bitrate"]
+            preset = attempt["preset"]
+
+            ffmpeg_params = ["-pix_fmt", "yuv420p"]
+            if preset:
+                ffmpeg_params = ["-preset", preset, "-pix_fmt", "yuv420p"]
+
+            try:
+                logger.info(
+                    "Renderizando clip %02d com codec %s (bitrate=%s, preset=%s)",
+                    index,
+                    codec,
+                    bitrate,
+                    preset or "default",
+                )
+                composite.write_videofile(
+                    str(output_path),
+                    codec=codec,
+                    audio_codec=self.settings.rendering.audio_codec,
+                    fps=self.settings.rendering.fps,
+                    bitrate=bitrate,
+                    ffmpeg_params=ffmpeg_params,
+                    temp_audiofile=str(temp_audio_path),
+                    remove_temp=True,
+                    threads=4,
+                )
+                return
+            except Exception as exc:  # noqa: BLE001 - propagate after fallbacks
+                last_error = exc
+                logger.warning(
+                    "Falha ao renderizar com codec %s: %s", codec, exc, exc_info=True
+                )
+                if output_path.exists():
+                    output_path.unlink(missing_ok=True)
+                if temp_audio_path.exists():
+                    temp_audio_path.unlink(missing_ok=True)
+
+        raise RuntimeError("Todas as tentativas de renderizacao falharam") from last_error
+
+    def _encoding_attempts(self) -> List[Dict[str, str | None]]:
+        settings = self.settings.rendering
+        attempts: List[Dict[str, str | None]] = []
+
+        attempts.append(
+            {
+                "codec": settings.video_codec,
+                "bitrate": settings.bitrate,
+                "preset": settings.preset,
+            }
+        )
+
+        deduped: List[Dict[str, str | None]] = []
+        seen = set()
+        for attempt in attempts:
+            key = (attempt["codec"], attempt["bitrate"], attempt["preset"])
+            if key in seen:
+                continue
+            seen.add(key)
+            deduped.append(attempt)
+
+        return deduped
+
+    @staticmethod
+    def _with_audio(
+        composite: CompositeVideoClip,
+        audio_clip,
+    ) -> CompositeVideoClip:
+        """Attach audio to a composite clip across MoviePy versions."""
+        if hasattr(composite, "with_audio"):
+            return composite.with_audio(audio_clip)
+        if hasattr(composite, "set_audio"):
+            return composite.set_audio(audio_clip)
+        raise AttributeError("CompositeVideoClip does not support audio assignment")
+
+    @staticmethod
+    def _make_textclip(
+        *,
+        text: str,
+        font_path,
+        font_size: int,
+        color: str,
+        size: Tuple[int, int],
+    ) -> TextClip:
+        """Create a TextClip compatible with MoviePy 1.x and 2.x.
+
+        MoviePy 2.x removed the 'align' keyword from TextClip. We try with
+        'align' for older versions and fall back to a call without it when
+        unsupported.
+        """
+        kwargs = dict(
+            text=text,
+            font=str(font_path),
+            font_size=font_size,
+            color=color,
+            method="caption",
+            size=size,
+        )
+        try:
+            return TextClip(**kwargs, align="center")  # MoviePy 1.x style
+        except TypeError:
+            logger.debug("TextClip 'align' not supported; falling back without it")
+            return TextClip(**kwargs)  # MoviePy 2.x style