import re import unicodedata from typing import List, Tuple def sanitize_filename(name: str) -> str: """Return a sanitized version of a filename. This helper removes accents, converts to lowercase, replaces spaces with underscores and removes any non alphanumeric characters except underscores and dots. This makes the directory names safe to use on most filesystems and matches the behaviour described in the spec. """ if not name: return "" # Decompose Unicode characters and strip accents nfkd_form = unicodedata.normalize("NFKD", name) no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c)) # Replace spaces with underscores no_spaces = no_accents.replace(" ", "_") # Lowercase and remove any character that is not a letter, digit, dot or underscore sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces) return sanitized def timestamp_to_seconds(ts: str) -> float: """Convert a timestamp in HH:MM:SS,mmm format to seconds. The Gemini and OpenRouter prompts use timestamps formatted with a comma as the decimal separator. This helper splits the string into hours, minutes and seconds and returns a float expressed in seconds. """ if ts is None: return 0.0 ts = ts.strip() if not ts: return 0.0 # Replace comma by dot for decimal seconds ts = ts.replace(",", ".") parts = ts.split(":") parts = [float(p) for p in parts] if len(parts) == 3: h, m, s = parts return h * 3600 + m * 60 + s elif len(parts) == 2: m, s = parts return m * 60 + s else: # only seconds return parts[0] def seconds_to_timestamp(seconds: float) -> str: """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT.""" if seconds < 0: seconds = 0 h = int(seconds // 3600) m = int((seconds % 3600) // 60) s = seconds % 60 # Format with comma as decimal separator and three decimal places return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",") def wrap_text(text: str, max_chars: int = 80) -> List[str]: """Simple word-wrap for a string. Splits ``text`` into a list of lines, each at most ``max_chars`` characters long. This does not attempt to hyphenate words – a word longer than ``max_chars`` will occupy its own line. The return value is a list of lines without trailing whitespace. """ if not text: return [] words = text.split() lines: List[str] = [] current: List[str] = [] current_len = 0 for word in words: # If adding this word would exceed the max, flush current line if current and current_len + 1 + len(word) > max_chars: lines.append(" ".join(current)) current = [word] current_len = len(word) else: # Add to current line if current: current_len += 1 + len(word) else: current_len = len(word) current.append(word) if current: lines.append(" ".join(current)) return lines