Cria novos components
This commit is contained in:
122
video_render/transcription.py
Normal file
122
video_render/transcription.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
from .config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WordTiming:
|
||||
start: float
|
||||
end: float
|
||||
word: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TranscriptSegment:
|
||||
id: int
|
||||
start: float
|
||||
end: float
|
||||
text: str
|
||||
words: List[WordTiming]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TranscriptionResult:
|
||||
segments: List[TranscriptSegment]
|
||||
full_text: str
|
||||
|
||||
|
||||
class TranscriptionService:
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self.settings = settings
|
||||
self._model: Optional[WhisperModel] = None
|
||||
|
||||
def _load_model(self) -> WhisperModel:
|
||||
if self._model is None:
|
||||
logger.info(
|
||||
"Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)",
|
||||
self.settings.whisper.model_size,
|
||||
self.settings.whisper.device or "auto",
|
||||
self.settings.whisper.compute_type or "default",
|
||||
)
|
||||
self._model = WhisperModel(
|
||||
self.settings.whisper.model_size,
|
||||
device=self.settings.whisper.device or "auto",
|
||||
compute_type=self.settings.whisper.compute_type or "default",
|
||||
download_root=str(self.settings.whisper.download_root),
|
||||
)
|
||||
return self._model
|
||||
|
||||
def transcribe(self, audio_path: Path) -> TranscriptionResult:
|
||||
model = self._load_model()
|
||||
segments, _ = model.transcribe(
|
||||
str(audio_path),
|
||||
beam_size=5,
|
||||
word_timestamps=True,
|
||||
)
|
||||
|
||||
parsed_segments: List[TranscriptSegment] = []
|
||||
full_text_parts: List[str] = []
|
||||
|
||||
for idx, segment in enumerate(segments):
|
||||
words = [
|
||||
WordTiming(start=w.start, end=w.end, word=w.word.strip())
|
||||
for w in segment.words or []
|
||||
if w.word.strip()
|
||||
]
|
||||
text = segment.text.strip()
|
||||
full_text_parts.append(text)
|
||||
parsed_segments.append(
|
||||
TranscriptSegment(
|
||||
id=idx,
|
||||
start=segment.start,
|
||||
end=segment.end,
|
||||
text=text,
|
||||
words=words,
|
||||
)
|
||||
)
|
||||
|
||||
return TranscriptionResult(
|
||||
segments=parsed_segments,
|
||||
full_text=" ".join(full_text_parts).strip(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def persist(result: TranscriptionResult, destination: Path) -> None:
|
||||
json_path = destination / "transcription.json"
|
||||
text_path = destination / "transcription.txt"
|
||||
|
||||
payload = {
|
||||
"segments": [
|
||||
{
|
||||
"id": segment.id,
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
"text": segment.text,
|
||||
"words": [
|
||||
{"start": word.start, "end": word.end, "text": word.word}
|
||||
for word in segment.words
|
||||
],
|
||||
}
|
||||
for segment in result.segments
|
||||
],
|
||||
"full_text": result.full_text,
|
||||
}
|
||||
|
||||
with json_path.open("w", encoding="utf-8") as fp:
|
||||
json.dump(payload, fp, ensure_ascii=False, indent=2)
|
||||
|
||||
with text_path.open("w", encoding="utf-8") as fp:
|
||||
fp.write(result.full_text)
|
||||
|
||||
logger.info("Transcrição salva em %s", destination)
|
||||
|
||||
Reference in New Issue
Block a user