Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router
93 lines
3.1 KiB
Python
93 lines
3.1 KiB
Python
import re
|
||
import unicodedata
|
||
from typing import List, Tuple
|
||
|
||
|
||
def sanitize_filename(name: str) -> str:
|
||
"""Return a sanitized version of a filename.
|
||
|
||
This helper removes accents, converts to lowercase, replaces spaces
|
||
with underscores and removes any non alphanumeric characters except
|
||
underscores and dots. This makes the directory names safe to use on
|
||
most filesystems and matches the behaviour described in the spec.
|
||
"""
|
||
if not name:
|
||
return ""
|
||
# Decompose Unicode characters and strip accents
|
||
nfkd_form = unicodedata.normalize("NFKD", name)
|
||
no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
|
||
# Replace spaces with underscores
|
||
no_spaces = no_accents.replace(" ", "_")
|
||
# Lowercase and remove any character that is not a letter, digit, dot or underscore
|
||
sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
|
||
return sanitized
|
||
|
||
|
||
def timestamp_to_seconds(ts: str) -> float:
|
||
"""Convert a timestamp in HH:MM:SS,mmm format to seconds.
|
||
|
||
The Gemini and OpenRouter prompts use timestamps formatted with a comma
|
||
as the decimal separator. This helper splits the string into hours,
|
||
minutes and seconds and returns a float expressed in seconds.
|
||
"""
|
||
if ts is None:
|
||
return 0.0
|
||
ts = ts.strip()
|
||
if not ts:
|
||
return 0.0
|
||
# Replace comma by dot for decimal seconds
|
||
ts = ts.replace(",", ".")
|
||
parts = ts.split(":")
|
||
parts = [float(p) for p in parts]
|
||
if len(parts) == 3:
|
||
h, m, s = parts
|
||
return h * 3600 + m * 60 + s
|
||
elif len(parts) == 2:
|
||
m, s = parts
|
||
return m * 60 + s
|
||
else:
|
||
# only seconds
|
||
return parts[0]
|
||
|
||
|
||
def seconds_to_timestamp(seconds: float) -> str:
|
||
"""Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
|
||
if seconds < 0:
|
||
seconds = 0
|
||
h = int(seconds // 3600)
|
||
m = int((seconds % 3600) // 60)
|
||
s = seconds % 60
|
||
# Format with comma as decimal separator and three decimal places
|
||
return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
|
||
|
||
|
||
def wrap_text(text: str, max_chars: int = 80) -> List[str]:
|
||
"""Simple word-wrap for a string.
|
||
|
||
Splits ``text`` into a list of lines, each at most ``max_chars``
|
||
characters long. This does not attempt to hyphenate words – a word
|
||
longer than ``max_chars`` will occupy its own line. The return value
|
||
is a list of lines without trailing whitespace.
|
||
"""
|
||
if not text:
|
||
return []
|
||
words = text.split()
|
||
lines: List[str] = []
|
||
current: List[str] = []
|
||
current_len = 0
|
||
for word in words:
|
||
# If adding this word would exceed the max, flush current line
|
||
if current and current_len + 1 + len(word) > max_chars:
|
||
lines.append(" ".join(current))
|
||
current = [word]
|
||
current_len = len(word)
|
||
else:
|
||
# Add to current line
|
||
if current:
|
||
current_len += 1 + len(word)
|
||
else:
|
||
current_len = len(word)
|
||
current.append(word)
|
||
if current:
|
||
lines.append(" ".join(current))
|
||
return lines |