Inicia novos recursos
Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router
This commit is contained in:
93
utils.py
Normal file
93
utils.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import List, Tuple
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
"""Return a sanitized version of a filename.
|
||||
|
||||
This helper removes accents, converts to lowercase, replaces spaces
|
||||
with underscores and removes any non alphanumeric characters except
|
||||
underscores and dots. This makes the directory names safe to use on
|
||||
most filesystems and matches the behaviour described in the spec.
|
||||
"""
|
||||
if not name:
|
||||
return ""
|
||||
# Decompose Unicode characters and strip accents
|
||||
nfkd_form = unicodedata.normalize("NFKD", name)
|
||||
no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
|
||||
# Replace spaces with underscores
|
||||
no_spaces = no_accents.replace(" ", "_")
|
||||
# Lowercase and remove any character that is not a letter, digit, dot or underscore
|
||||
sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
|
||||
return sanitized
|
||||
|
||||
|
||||
def timestamp_to_seconds(ts: str) -> float:
|
||||
"""Convert a timestamp in HH:MM:SS,mmm format to seconds.
|
||||
|
||||
The Gemini and OpenRouter prompts use timestamps formatted with a comma
|
||||
as the decimal separator. This helper splits the string into hours,
|
||||
minutes and seconds and returns a float expressed in seconds.
|
||||
"""
|
||||
if ts is None:
|
||||
return 0.0
|
||||
ts = ts.strip()
|
||||
if not ts:
|
||||
return 0.0
|
||||
# Replace comma by dot for decimal seconds
|
||||
ts = ts.replace(",", ".")
|
||||
parts = ts.split(":")
|
||||
parts = [float(p) for p in parts]
|
||||
if len(parts) == 3:
|
||||
h, m, s = parts
|
||||
return h * 3600 + m * 60 + s
|
||||
elif len(parts) == 2:
|
||||
m, s = parts
|
||||
return m * 60 + s
|
||||
else:
|
||||
# only seconds
|
||||
return parts[0]
|
||||
|
||||
|
||||
def seconds_to_timestamp(seconds: float) -> str:
|
||||
"""Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
|
||||
if seconds < 0:
|
||||
seconds = 0
|
||||
h = int(seconds // 3600)
|
||||
m = int((seconds % 3600) // 60)
|
||||
s = seconds % 60
|
||||
# Format with comma as decimal separator and three decimal places
|
||||
return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
|
||||
|
||||
|
||||
def wrap_text(text: str, max_chars: int = 80) -> List[str]:
|
||||
"""Simple word-wrap for a string.
|
||||
|
||||
Splits ``text`` into a list of lines, each at most ``max_chars``
|
||||
characters long. This does not attempt to hyphenate words – a word
|
||||
longer than ``max_chars`` will occupy its own line. The return value
|
||||
is a list of lines without trailing whitespace.
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
words = text.split()
|
||||
lines: List[str] = []
|
||||
current: List[str] = []
|
||||
current_len = 0
|
||||
for word in words:
|
||||
# If adding this word would exceed the max, flush current line
|
||||
if current and current_len + 1 + len(word) > max_chars:
|
||||
lines.append(" ".join(current))
|
||||
current = [word]
|
||||
current_len = len(word)
|
||||
else:
|
||||
# Add to current line
|
||||
if current:
|
||||
current_len += 1 + len(word)
|
||||
else:
|
||||
current_len = len(word)
|
||||
current.append(word)
|
||||
if current:
|
||||
lines.append(" ".join(current))
|
||||
return lines
|
||||
Reference in New Issue
Block a user