import os import uuid import re from typing import Optional from fastapi import FastAPI, HTTPException, Query from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import SRTFormatter from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound from yt_dlp import YoutubeDL from unidecode import unidecode app = FastAPI( title="YouTube Transcript, Download and Metadata API", version="1.0.0" ) def extract_video_id(url: str) -> str: import re match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url) if not match: raise ValueError("URL inválida do YouTube") return match.group(1) @app.get("/get-transcript") def get_transcript( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: try: video_id = extract_video_id(url) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) else: video_id = videoId try: ytt_api = YouTubeTranscriptApi() result = ytt_api.fetch(video_id, languages=['pt']) formatter = SRTFormatter() if not result: raise NoTranscriptFound("Nenhuma transcrição encontrada para este vídeo") except TranscriptsDisabled: raise HTTPException(status_code=404, detail="Transcrição desativada para este vídeo") except NoTranscriptFound: raise HTTPException(status_code=404, detail="Nenhuma transcrição encontrada") except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao obter transcrição: {e}") return { "video_id": video_id, "transcript": formatter.format_transcript(result), } @app.get("/get-video-metadata") def get_video_metadata( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: target = url else: target = f"https://www.youtube.com/watch?v={videoId}" ydl_opts = { 'quiet': True, 'no_warnings': True, 'skip_download': True, 'nocheckcertificate': True, 'ignoreerrors': True, 'no_color': True, 'extract_flat': 'in_playlist', 'force_generic_extractor': True, 'format': 'best[ext=mp4]/best[ext=webm]/best', 'allow_unplayable_formats': True, } try: with YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(target, download=False, process=False) if not info or 'title' not in info: info = ydl.extract_info(target, download=False) if not info or 'title' not in info: simple_ydl_opts = { 'quiet': True, 'skip_download': True, 'extract_flat': True, 'force_generic_extractor': True, } with YoutubeDL(simple_ydl_opts) as simple_ydl: info = simple_ydl.extract_info(target, download=False) if not info: raise Exception("Não foi possível extrair as informações do vídeo") if isinstance(info, dict): if 'title' not in info and 'url' in info: with YoutubeDL(ydl_opts) as ydl_redirect: info = ydl_redirect.extract_info(info['url'], download=False) if 'title' not in info: info['title'] = f"Vídeo {videoId or 'desconhecido'}" except Exception as e: error_msg = str(e).replace('\n', ' ').strip() try: import requests from bs4 import BeautifulSoup video_id = videoId or (url.split('v=')[-1].split('&')[0] if 'v=' in url else '') if video_id: response = requests.get(f'https://www.youtube.com/watch?v={video_id}', timeout=10) soup = BeautifulSoup(response.text, 'html.parser') title = soup.find('title').text.replace(' - YouTube', '').strip() if title and title != 'YouTube': return { 'id': video_id, 'title': title, 'url': f'https://www.youtube.com/watch?v={video_id}', 'thumbnail': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg', 'error': f'Informações limitadas: {error_msg}' } except Exception as fallback_error: pass raise HTTPException( status_code=500, detail=f"Erro ao processar o vídeo: {error_msg}" ) return info @app.get("/download-video") def download_video( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube"), qualidade: str = Query("high", description="Qualidade do vídeo: low, medium, high") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: target = url try: video_id = extract_video_id(url) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) else: target = f"https://www.youtube.com/watch?v={videoId}" video_id = videoId quality_map = { "low": "bestvideo[height<=480]+bestaudio/best[height<=480]", "medium": "bestvideo[height<=720]+bestaudio/best[height<=720]", "high": "bestvideo+bestaudio/best" } qualidade = qualidade.lower() if qualidade not in quality_map: raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high") videos_dir = "/app/videos" os.makedirs(videos_dir, exist_ok=True) unique_id = str(uuid.uuid4()) output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s") ydl_opts = { "format": quality_map[qualidade], "outtmpl": output_template, 'quiet': True, 'no_warnings': True, 'skip_download': True, 'nocheckcertificate': True, 'ignoreerrors': True, 'no_color': True, 'extract_flat': 'in_playlist', 'force_generic_extractor': True, 'allow_unplayable_formats': True, } try: with YoutubeDL(ydl_opts) as ydl: base = ydl.extract_info(target, download=False) title = base.get("title", unique_id) clean_title = unidecode(title) clean_title = re.sub(r"[^\w\s-]", "", clean_title) clean_title = clean_title.replace(" ", "_") clean_title = re.sub(r"_+", "_", clean_title) clean_title = clean_title.strip("_") filename = f"{clean_title}_{qualidade}.mp4" final_path = os.path.join(videos_dir, filename) print('Info ok') if os.path.exists(final_path): return { "videoId": video_id, "filename": filename } print('Lets download') result = ydl.extract_info(target, download=True) if "requested_downloads" in result and len(result["requested_downloads"]) > 0: real_file_path = result["requested_downloads"][0]["filepath"] elif "filepath" in result: real_file_path = result["filepath"] else: real_file_path = output_template.replace("%(ext)s", "mp4") os.rename(real_file_path, final_path) except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}") return { "videoId": video_id, "filename": filename } @app.get("/search") def search_youtube_yt_dlp( q: str = Query(..., description="Termo de busca"), max_results: int = Query(5, ge=1, le=10, description="Número de resultados (máx 10)") ): ydl_opts = { "quiet": True, "extract_flat": "in_playlist", "skip_download": True, } search_query = f"ytsearch{max_results}:{q}" try: with YoutubeDL(ydl_opts) as ydl: search_result = ydl.extract_info(search_query, download=False) entries = search_result.get("entries", [])[:max_results] results = [] for item in entries: results.append({ "videoId": item.get("id"), "title": item.get("title"), "duration": item.get("duration"), "url": item.get("webpage_url"), "channel": item.get("uploader"), "thumbnail": item.get("thumbnail"), }) return {"results": results} except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao buscar vídeos: {e}")