Inicia novos recursos

Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router
2025-10-17 09:27:50 -03:00
commit 0c0a9c3b5c
15 changed files with 997 additions and 0 deletions
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,93 @@
+import re
+import unicodedata
+from typing import List, Tuple
+
+
+def sanitize_filename(name: str) -> str:
+    """Return a sanitized version of a filename.
+
+    This helper removes accents, converts to lowercase, replaces spaces
+    with underscores and removes any non alphanumeric characters except
+    underscores and dots. This makes the directory names safe to use on
+    most filesystems and matches the behaviour described in the spec.
+    """
+    if not name:
+        return ""
+    # Decompose Unicode characters and strip accents
+    nfkd_form = unicodedata.normalize("NFKD", name)
+    no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
+    # Replace spaces with underscores
+    no_spaces = no_accents.replace(" ", "_")
+    # Lowercase and remove any character that is not a letter, digit, dot or underscore
+    sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
+    return sanitized
+
+
+def timestamp_to_seconds(ts: str) -> float:
+    """Convert a timestamp in HH:MM:SS,mmm format to seconds.
+
+    The Gemini and OpenRouter prompts use timestamps formatted with a comma
+    as the decimal separator. This helper splits the string into hours,
+    minutes and seconds and returns a float expressed in seconds.
+    """
+    if ts is None:
+        return 0.0
+    ts = ts.strip()
+    if not ts:
+        return 0.0
+    # Replace comma by dot for decimal seconds
+    ts = ts.replace(",", ".")
+    parts = ts.split(":")
+    parts = [float(p) for p in parts]
+    if len(parts) == 3:
+        h, m, s = parts
+        return h * 3600 + m * 60 + s
+    elif len(parts) == 2:
+        m, s = parts
+        return m * 60 + s
+    else:
+        # only seconds
+        return parts[0]
+
+
+def seconds_to_timestamp(seconds: float) -> str:
+    """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
+    if seconds < 0:
+        seconds = 0
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = seconds % 60
+    # Format with comma as decimal separator and three decimal places
+    return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
+
+
+def wrap_text(text: str, max_chars: int = 80) -> List[str]:
+    """Simple word-wrap for a string.
+
+    Splits ``text`` into a list of lines, each at most ``max_chars``
+    characters long. This does not attempt to hyphenate words – a word
+    longer than ``max_chars`` will occupy its own line. The return value
+    is a list of lines without trailing whitespace.
+    """
+    if not text:
+        return []
+    words = text.split()
+    lines: List[str] = []
+    current: List[str] = []
+    current_len = 0
+    for word in words:
+        # If adding this word would exceed the max, flush current line
+        if current and current_len + 1 + len(word) > max_chars:
+            lines.append(" ".join(current))
+            current = [word]
+            current_len = len(word)
+        else:
+            # Add to current line
+            if current:
+                current_len += 1 + len(word)
+            else:
+                current_len = len(word)
+            current.append(word)
+    if current:
+        lines.append(" ".join(current))
+    return lines