video-render/utils.py

import re
import unicodedata
from typing import List, Tuple


def sanitize_filename(name: str) -> str:
    """Return a sanitized version of a filename.

    This helper removes accents, converts to lowercase, replaces spaces
    with underscores and removes any non alphanumeric characters except
    underscores and dots. This makes the directory names safe to use on
    most filesystems and matches the behaviour described in the spec.
    """
    if not name:
        return ""
    # Decompose Unicode characters and strip accents
    nfkd_form = unicodedata.normalize("NFKD", name)
    no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
    # Replace spaces with underscores
    no_spaces = no_accents.replace(" ", "_")
    # Lowercase and remove any character that is not a letter, digit, dot or underscore
    sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
    return sanitized


def timestamp_to_seconds(ts: str) -> float:
    """Convert a timestamp in HH:MM:SS,mmm format to seconds.

    The Gemini and OpenRouter prompts use timestamps formatted with a comma
    as the decimal separator. This helper splits the string into hours,
    minutes and seconds and returns a float expressed in seconds.
    """
    if ts is None:
        return 0.0
    ts = ts.strip()
    if not ts:
        return 0.0
    # Replace comma by dot for decimal seconds
    ts = ts.replace(",", ".")
    parts = ts.split(":")
    parts = [float(p) for p in parts]
    if len(parts) == 3:
        h, m, s = parts
        return h * 3600 + m * 60 + s
    elif len(parts) == 2:
        m, s = parts
        return m * 60 + s
    else:
        # only seconds
        return parts[0]


def seconds_to_timestamp(seconds: float) -> str:
    """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
    if seconds < 0:
        seconds = 0
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = seconds % 60
    # Format with comma as decimal separator and three decimal places
    return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")


def wrap_text(text: str, max_chars: int = 80) -> List[str]:
    """Simple word-wrap for a string.

    Splits ``text`` into a list of lines, each at most ``max_chars``
    characters long. This does not attempt to hyphenate words – a word
    longer than ``max_chars`` will occupy its own line. The return value
    is a list of lines without trailing whitespace.
    """
    if not text:
        return []
    words = text.split()
    lines: List[str] = []
    current: List[str] = []
    current_len = 0
    for word in words:
        # If adding this word would exceed the max, flush current line
        if current and current_len + 1 + len(word) > max_chars:
            lines.append(" ".join(current))
            current = [word]
            current_len = len(word)
        else:
            # Add to current line
            if current:
                current_len += 1 + len(word)
            else:
                current_len = len(word)
            current.append(word)
    if current:
        lines.append(" ".join(current))
    return lines