From ebba8ef8bd1c3a419413de973d5ce19be9072cc0 Mon Sep 17 00:00:00 2001
From: LeoMortari <leo.mortari.forinn@gmail.com>
Date: Mon, 29 Sep 2025 00:57:16 -0300
Subject: [PATCH] Volta versao do download-video

---
 main.py  | 116 ++++++++++++++++---------------------------------------
 utils.py |  16 ++++++++
 2 files changed, 49 insertions(+), 83 deletions(-)
 create mode 100644 utils.py

diff --git a/main.py b/main.py
index c849a82..b037b6e 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,5 @@
 import os
 import uuid
-import re
 
 from typing import Optional
 from fastapi import FastAPI, HTTPException, Query
@@ -8,20 +7,13 @@ from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.formatters import SRTFormatter
 from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
 from yt_dlp import YoutubeDL
-from unidecode import unidecode
+from utils import extract_video_id, sanitize_title
 
 app = FastAPI(
     title="YouTube Transcript, Download and Metadata API",
     version="1.0.0"
 )
 
-def extract_video_id(url: str) -> str:
-    import re
-    match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
-    if not match:
-        raise ValueError("URL inválida do YouTube")
-    return match.group(1)
-
 @app.get("/get-transcript")
 def get_transcript(
     url: Optional[str] = Query(None, description="URL completa do vídeo"),
@@ -146,7 +138,6 @@ def download_video(
 ):
     if not url and not videoId:
         raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
-
     if url:
         target = url
         try:
@@ -157,106 +148,65 @@ def download_video(
         target = f"https://www.youtube.com/watch?v={videoId}"
         video_id = videoId
 
-    quality_targets = {"low": 480, "medium": 720, "high": 1080}
+    quality_map = {
+        "low": "bestvideo[height<=480]+bestaudio/best[height<=480]",
+        "medium": "bestvideo[height<=720]+bestaudio/best[height<=720]",
+        "high": "bestvideo+bestaudio/best"
+    }
     qualidade = qualidade.lower()
-
-    if qualidade not in quality_targets:
+    if qualidade not in quality_map:
         raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high")
 
-    target_height = quality_targets[qualidade]
-
     videos_dir = "/app/videos"
     os.makedirs(videos_dir, exist_ok=True)
 
     unique_id = str(uuid.uuid4())
-    outtmpl = os.path.join(videos_dir, f"{unique_id}.%(ext)s")
-
-    fmt_expr = (
-        # tenta vídeo <= alvo + melhor áudio
-        f"bv*[height<={target_height}]+ba/"
-        # cai pra qualquer melhor vídeo <= alvo (progressivo se houver)
-        f"b[height<={target_height}]/"
-        # último recurso: qualquer best
-        f"b"
-    )
+    output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s")
 
     ydl_opts = {
-        "outtmpl": outtmpl,
+        "format": quality_map[qualidade],
+        "outtmpl": output_template,
         "quiet": True,
-        "no_warnings": True,
-        "ignoreerrors": False,
         "noplaylist": True,
         "merge_output_format": "mp4",
-        "force_ipv4": True,
-        "geo_bypass": True,
-        "extractor_args": {"youtube": {"player_client": ["android"], "player_skip": ["webpage"]}},
-        "http_headers": {
-            "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7",
-            "User-Agent": "com.google.android.youtube/19.17.36 (Linux; U; Android 13) gzip",
-        },
-        "hls_prefer_native": True,
-        "concurrent_fragment_downloads": 1, 
     }
 
     try:
         with YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(target, download=False)
-            if not info:
-                raise HTTPException(status_code=404, detail="Não foi possível obter informações do vídeo.")
-
-            if info.get("_type") == "playlist":
-                entries = info.get("entries") or []
-                if not entries:
-                    raise HTTPException(status_code=404, detail="Nenhum vídeo encontrado na playlist.")
-                info = entries[0]
-
-            title = info.get("title") or unique_id
-            clean_title = unidecode(title)
-            clean_title = re.sub(r"[^\w\s-]", "", clean_title).strip()
-            clean_title = re.sub(r"\s+", "_", clean_title)
+            base = ydl.extract_info(target, download=False)
+            title = base.get("title", unique_id)
+            clean_title = sanitize_title(title)
             filename = f"{clean_title}_{qualidade}.mp4"
             final_path = os.path.join(videos_dir, filename)
 
+            print('Info ok')
+            
             if os.path.exists(final_path):
-                return {"videoId": video_id, "filename": filename}
-
-            # Baixa de fato
+                return {
+                    "videoId": video_id,
+                    "filename": filename
+                }
+            
+            print('Lets download')
+            
             result = ydl.extract_info(target, download=True)
 
-            # Descobre o arquivo gerado
-            real_file_path = None
-            if isinstance(result, dict):
-                # yt-dlp costuma preencher requested_downloads
-                reqs = result.get("requested_downloads") or []
-                if reqs:
-                    real_file_path = reqs[0].get("filepath")
-                if not real_file_path:
-                    real_file_path = result.get("filepath")
-
-            if not real_file_path:
-                # fallback bruto para o template com mp4
-                real_file_path = outtmpl.replace("%(ext)s", "mp4")
-
-            if not os.path.exists(real_file_path):
-                # Ajuda a diagnosticar quando o formato pedido não existe
-                # (por segurança não expomos toda a lista ao cliente)
-                raise HTTPException(
-                    status_code=500,
-                    detail="Falha ao localizar o arquivo baixado. O formato selecionado pode não estar disponível para este vídeo."
-                )
+            if "requested_downloads" in result and len(result["requested_downloads"]) > 0:
+                real_file_path = result["requested_downloads"][0]["filepath"]
+            elif "filepath" in result:
+                real_file_path = result["filepath"]
+            else:
+                real_file_path = output_template.replace("%(ext)s", "mp4")
 
             os.rename(real_file_path, final_path)
 
-    except HTTPException:
-        raise
     except Exception as e:
-        # Erros comuns: falta do ffmpeg no container
-        msg = str(e)
-        if "ffmpeg" in msg.lower():
-            msg += " (verifique se o ffmpeg está instalado no container)"
-        raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {msg}")
+        raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}")
 
-    return {"videoId": video_id, "filename": filename}
+    return {
+        "videoId": video_id,
+        "filename": filename
+    }
     
 @app.get("/search")
 def search_youtube_yt_dlp(
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..f53054e
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,16 @@
+
+import re
+from unidecode import unidecode
+
+def extract_video_id(url: str) -> str:
+    match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
+
+    if not match:
+        raise ValueError("URL inválida do YouTube")
+    return match.group(1)
+
+def sanitize_title(s: str) -> str:
+    s = unidecode(s or "video")
+    s = re.sub(r"[^\w\s-]", "", s).strip()
+
+    return re.sub(r"_+", "_", re.sub(r"\s+", "_", s)) or "video"
\ No newline at end of file