from __future__ import annotations import json import logging from dataclasses import dataclass from pathlib import Path from typing import List, Optional from faster_whisper import WhisperModel from .config import Settings logger = logging.getLogger(__name__) @dataclass(frozen=True) class WordTiming: start: float end: float word: str @dataclass(frozen=True) class TranscriptSegment: id: int start: float end: float text: str words: List[WordTiming] @dataclass(frozen=True) class TranscriptionResult: segments: List[TranscriptSegment] full_text: str class TranscriptionService: def __init__(self, settings: Settings) -> None: self.settings = settings self._model: Optional[WhisperModel] = None def _load_model(self) -> WhisperModel: if self._model is None: logger.info( "Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)", self.settings.whisper.model_size, self.settings.whisper.device or "auto", self.settings.whisper.compute_type or "default", ) self._model = WhisperModel( self.settings.whisper.model_size, device=self.settings.whisper.device or "auto", compute_type=self.settings.whisper.compute_type or "default", download_root=str(self.settings.whisper.download_root), ) return self._model def transcribe(self, audio_path: Path) -> TranscriptionResult: model = self._load_model() segments, _ = model.transcribe( str(audio_path), beam_size=5, word_timestamps=True, ) parsed_segments: List[TranscriptSegment] = [] full_text_parts: List[str] = [] for idx, segment in enumerate(segments): words = [ WordTiming(start=w.start, end=w.end, word=w.word.strip()) for w in segment.words or [] if w.word.strip() ] text = segment.text.strip() full_text_parts.append(text) parsed_segments.append( TranscriptSegment( id=idx, start=segment.start, end=segment.end, text=text, words=words, ) ) return TranscriptionResult( segments=parsed_segments, full_text=" ".join(full_text_parts).strip(), ) @staticmethod def persist(result: TranscriptionResult, destination: Path) -> None: json_path = destination / "transcription.json" text_path = destination / "transcription.txt" payload = { "segments": [ { "id": segment.id, "start": segment.start, "end": segment.end, "text": segment.text, "words": [ {"start": word.start, "end": word.end, "text": word.word} for word in segment.words ], } for segment in result.segments ], "full_text": result.full_text, } with json_path.open("w", encoding="utf-8") as fp: json.dump(payload, fp, ensure_ascii=False, indent=2) with text_path.open("w", encoding="utf-8") as fp: fp.write(result.full_text) logger.info("Transcrição salva em %s", destination)