From ebba8ef8bd1c3a419413de973d5ce19be9072cc0 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Mon, 29 Sep 2025 00:57:16 -0300 Subject: [PATCH] Volta versao do download-video --- main.py | 116 ++++++++++++++++--------------------------------------- utils.py | 16 ++++++++ 2 files changed, 49 insertions(+), 83 deletions(-) create mode 100644 utils.py diff --git a/main.py b/main.py index c849a82..b037b6e 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ import os import uuid -import re from typing import Optional from fastapi import FastAPI, HTTPException, Query @@ -8,20 +7,13 @@ from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import SRTFormatter from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound from yt_dlp import YoutubeDL -from unidecode import unidecode +from utils import extract_video_id, sanitize_title app = FastAPI( title="YouTube Transcript, Download and Metadata API", version="1.0.0" ) -def extract_video_id(url: str) -> str: - import re - match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url) - if not match: - raise ValueError("URL inválida do YouTube") - return match.group(1) - @app.get("/get-transcript") def get_transcript( url: Optional[str] = Query(None, description="URL completa do vídeo"), @@ -146,7 +138,6 @@ def download_video( ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") - if url: target = url try: @@ -157,106 +148,65 @@ def download_video( target = f"https://www.youtube.com/watch?v={videoId}" video_id = videoId - quality_targets = {"low": 480, "medium": 720, "high": 1080} + quality_map = { + "low": "bestvideo[height<=480]+bestaudio/best[height<=480]", + "medium": "bestvideo[height<=720]+bestaudio/best[height<=720]", + "high": "bestvideo+bestaudio/best" + } qualidade = qualidade.lower() - - if qualidade not in quality_targets: + if qualidade not in quality_map: raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high") - target_height = quality_targets[qualidade] - videos_dir = "/app/videos" os.makedirs(videos_dir, exist_ok=True) unique_id = str(uuid.uuid4()) - outtmpl = os.path.join(videos_dir, f"{unique_id}.%(ext)s") - - fmt_expr = ( - # tenta vídeo <= alvo + melhor áudio - f"bv*[height<={target_height}]+ba/" - # cai pra qualquer melhor vídeo <= alvo (progressivo se houver) - f"b[height<={target_height}]/" - # último recurso: qualquer best - f"b" - ) + output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s") ydl_opts = { - "outtmpl": outtmpl, + "format": quality_map[qualidade], + "outtmpl": output_template, "quiet": True, - "no_warnings": True, - "ignoreerrors": False, "noplaylist": True, "merge_output_format": "mp4", - "force_ipv4": True, - "geo_bypass": True, - "extractor_args": {"youtube": {"player_client": ["android"], "player_skip": ["webpage"]}}, - "http_headers": { - "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7", - "User-Agent": "com.google.android.youtube/19.17.36 (Linux; U; Android 13) gzip", - }, - "hls_prefer_native": True, - "concurrent_fragment_downloads": 1, } try: with YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(target, download=False) - if not info: - raise HTTPException(status_code=404, detail="Não foi possível obter informações do vídeo.") - - if info.get("_type") == "playlist": - entries = info.get("entries") or [] - if not entries: - raise HTTPException(status_code=404, detail="Nenhum vídeo encontrado na playlist.") - info = entries[0] - - title = info.get("title") or unique_id - clean_title = unidecode(title) - clean_title = re.sub(r"[^\w\s-]", "", clean_title).strip() - clean_title = re.sub(r"\s+", "_", clean_title) + base = ydl.extract_info(target, download=False) + title = base.get("title", unique_id) + clean_title = sanitize_title(title) filename = f"{clean_title}_{qualidade}.mp4" final_path = os.path.join(videos_dir, filename) + print('Info ok') + if os.path.exists(final_path): - return {"videoId": video_id, "filename": filename} - - # Baixa de fato + return { + "videoId": video_id, + "filename": filename + } + + print('Lets download') + result = ydl.extract_info(target, download=True) - # Descobre o arquivo gerado - real_file_path = None - if isinstance(result, dict): - # yt-dlp costuma preencher requested_downloads - reqs = result.get("requested_downloads") or [] - if reqs: - real_file_path = reqs[0].get("filepath") - if not real_file_path: - real_file_path = result.get("filepath") - - if not real_file_path: - # fallback bruto para o template com mp4 - real_file_path = outtmpl.replace("%(ext)s", "mp4") - - if not os.path.exists(real_file_path): - # Ajuda a diagnosticar quando o formato pedido não existe - # (por segurança não expomos toda a lista ao cliente) - raise HTTPException( - status_code=500, - detail="Falha ao localizar o arquivo baixado. O formato selecionado pode não estar disponível para este vídeo." - ) + if "requested_downloads" in result and len(result["requested_downloads"]) > 0: + real_file_path = result["requested_downloads"][0]["filepath"] + elif "filepath" in result: + real_file_path = result["filepath"] + else: + real_file_path = output_template.replace("%(ext)s", "mp4") os.rename(real_file_path, final_path) - except HTTPException: - raise except Exception as e: - # Erros comuns: falta do ffmpeg no container - msg = str(e) - if "ffmpeg" in msg.lower(): - msg += " (verifique se o ffmpeg está instalado no container)" - raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {msg}") + raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}") - return {"videoId": video_id, "filename": filename} + return { + "videoId": video_id, + "filename": filename + } @app.get("/search") def search_youtube_yt_dlp( diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..f53054e --- /dev/null +++ b/utils.py @@ -0,0 +1,16 @@ + +import re +from unidecode import unidecode + +def extract_video_id(url: str) -> str: + match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url) + + if not match: + raise ValueError("URL inválida do YouTube") + return match.group(1) + +def sanitize_title(s: str) -> str: + s = unidecode(s or "video") + s = re.sub(r"[^\w\s-]", "", s).strip() + + return re.sub(r"_+", "_", re.sub(r"\s+", "_", s)) or "video" \ No newline at end of file