Files
video-render/utils.py
LeoMortari 0c0a9c3b5c Inicia novos recursos
Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router
2025-10-17 09:27:50 -03:00

93 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
import unicodedata
from typing import List, Tuple
def sanitize_filename(name: str) -> str:
"""Return a sanitized version of a filename.
This helper removes accents, converts to lowercase, replaces spaces
with underscores and removes any non alphanumeric characters except
underscores and dots. This makes the directory names safe to use on
most filesystems and matches the behaviour described in the spec.
"""
if not name:
return ""
# Decompose Unicode characters and strip accents
nfkd_form = unicodedata.normalize("NFKD", name)
no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c))
# Replace spaces with underscores
no_spaces = no_accents.replace(" ", "_")
# Lowercase and remove any character that is not a letter, digit, dot or underscore
sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces)
return sanitized
def timestamp_to_seconds(ts: str) -> float:
"""Convert a timestamp in HH:MM:SS,mmm format to seconds.
The Gemini and OpenRouter prompts use timestamps formatted with a comma
as the decimal separator. This helper splits the string into hours,
minutes and seconds and returns a float expressed in seconds.
"""
if ts is None:
return 0.0
ts = ts.strip()
if not ts:
return 0.0
# Replace comma by dot for decimal seconds
ts = ts.replace(",", ".")
parts = ts.split(":")
parts = [float(p) for p in parts]
if len(parts) == 3:
h, m, s = parts
return h * 3600 + m * 60 + s
elif len(parts) == 2:
m, s = parts
return m * 60 + s
else:
# only seconds
return parts[0]
def seconds_to_timestamp(seconds: float) -> str:
"""Convert a time in seconds to HH:MM:SS,mmm format expected by SRT."""
if seconds < 0:
seconds = 0
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = seconds % 60
# Format with comma as decimal separator and three decimal places
return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",")
def wrap_text(text: str, max_chars: int = 80) -> List[str]:
"""Simple word-wrap for a string.
Splits ``text`` into a list of lines, each at most ``max_chars``
characters long. This does not attempt to hyphenate words a word
longer than ``max_chars`` will occupy its own line. The return value
is a list of lines without trailing whitespace.
"""
if not text:
return []
words = text.split()
lines: List[str] = []
current: List[str] = []
current_len = 0
for word in words:
# If adding this word would exceed the max, flush current line
if current and current_len + 1 + len(word) > max_chars:
lines.append(" ".join(current))
current = [word]
current_len = len(word)
else:
# Add to current line
if current:
current_len += 1 + len(word)
else:
current_len = len(word)
current.append(word)
if current:
lines.append(" ".join(current))
return lines