import os import uuid import re from typing import Optional from fastapi import FastAPI, HTTPException, Query from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import SRTFormatter from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound from yt_dlp import YoutubeDL from unidecode import unidecode app = FastAPI( title="YouTube Transcript, Download and Metadata API", version="1.0.0" ) def extract_video_id(url: str) -> str: import re match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url) if not match: raise ValueError("URL inválida do YouTube") return match.group(1) @app.get("/get-transcript") def get_transcript( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: try: video_id = extract_video_id(url) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) else: video_id = videoId try: ytt_api = YouTubeTranscriptApi() result = ytt_api.fetch(video_id, languages=['pt']) formatter = SRTFormatter() if not result: raise NoTranscriptFound("Nenhuma transcrição encontrada para este vídeo") except TranscriptsDisabled: raise HTTPException(status_code=404, detail="Transcrição desativada para este vídeo") except NoTranscriptFound: raise HTTPException(status_code=404, detail="Nenhuma transcrição encontrada") except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao obter transcrição: {e}") return { "video_id": video_id, "transcript": formatter.format_transcript(result), } @app.get("/get-video-metadata") def get_video_metadata( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: target = url else: target = f"https://www.youtube.com/watch?v={videoId}" ydl_opts = { 'quiet': True, 'no_warnings': True, 'skip_download': True, 'nocheckcertificate': True, 'ignoreerrors': True, 'no_color': True, 'extract_flat': 'in_playlist', 'force_generic_extractor': True, 'format': 'best[ext=mp4]/best[ext=webm]/best', 'allow_unplayable_formats': True, } try: with YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(target, download=False, process=False) if not info or 'title' not in info: info = ydl.extract_info(target, download=False) if not info or 'title' not in info: simple_ydl_opts = { 'quiet': True, 'skip_download': True, 'extract_flat': True, 'force_generic_extractor': True, } with YoutubeDL(simple_ydl_opts) as simple_ydl: info = simple_ydl.extract_info(target, download=False) if not info: raise Exception("Não foi possível extrair as informações do vídeo") if isinstance(info, dict): if 'title' not in info and 'url' in info: with YoutubeDL(ydl_opts) as ydl_redirect: info = ydl_redirect.extract_info(info['url'], download=False) if 'title' not in info: info['title'] = f"Vídeo {videoId or 'desconhecido'}" except Exception as e: error_msg = str(e).replace('\n', ' ').strip() try: import requests from bs4 import BeautifulSoup video_id = videoId or (url.split('v=')[-1].split('&')[0] if 'v=' in url else '') if video_id: response = requests.get(f'https://www.youtube.com/watch?v={video_id}', timeout=10) soup = BeautifulSoup(response.text, 'html.parser') title = soup.find('title').text.replace(' - YouTube', '').strip() if title and title != 'YouTube': return { 'id': video_id, 'title': title, 'url': f'https://www.youtube.com/watch?v={video_id}', 'thumbnail': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg', 'error': f'Informações limitadas: {error_msg}' } except Exception as fallback_error: pass raise HTTPException( status_code=500, detail=f"Erro ao processar o vídeo: {error_msg}" ) return info @app.get("/download-video") def download_video( url: Optional[str] = Query(None, description="URL completa do vídeo"), videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube"), qualidade: str = Query("high", description="Qualidade do vídeo: low, medium, high") ): if not url and not videoId: raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'") if url: target = url try: video_id = extract_video_id(url) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) else: target = f"https://www.youtube.com/watch?v={videoId}" video_id = videoId quality_targets = { "low": 480, "medium": 720, "high": 1080 } qualidade = qualidade.lower() if qualidade not in quality_targets: raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high") videos_dir = "/app/videos" os.makedirs(videos_dir, exist_ok=True) unique_id = str(uuid.uuid4()) output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s") ydl_opts = { 'outtmpl': output_template, 'quiet': True, 'no_warnings': True, 'ignoreerrors': False, 'no_color': True, 'extract_flat': 'in_playlist', 'force_generic_extractor': True, 'allow_unplayable_formats': True, } def get_best_format(ydl, target_height): info = ydl.extract_info(target, download=False) if not info or 'formats' not in info: return 'best' formats = info['formats'] best_format = None best_height = 0 for f in formats: if f.get('height') and f.get('acodec') != 'none': if f['height'] <= target_height and f['height'] > best_height: best_format = f best_height = f['height'] if not best_format: video_format = None audio_format = None for f in formats: if f.get('vcodec') != 'none' and f.get('acodec') == 'none': if f.get('height') and f['height'] <= target_height and (not video_format or f['height'] > video_format.get('height', 0)): video_format = f for f in formats: if f.get('acodec') != 'none' and f.get('vcodec') == 'none': if not audio_format or f.get('tbr', 0) > audio_format.get('tbr', 0): audio_format = f if video_format and audio_format: return f"{video_format['format_id']}+{audio_format['format_id']}" return best_format['format_id'] if best_format else 'best' try: with YoutubeDL(ydl_opts) as ydl: base = ydl.extract_info(target, download=False) if not base: raise HTTPException(status_code=404, detail="Não foi possível obter informações do vídeo. Verifique a URL ou o ID do vídeo.") if '_type' in base and base['_type'] == 'playlist': if 'entries' in base and len(base['entries']) > 0: base = base['entries'][0] else: raise HTTPException(status_code=404, detail="Nenhum vídeo encontrado na playlist.") title = base.get("title", unique_id) if not title: title = f"video_{unique_id[:8]}" clean_title = unidecode(title) clean_title = re.sub(r"[^\w\s-]", "", clean_title) clean_title = clean_title.replace(" ", "_") clean_title = re.sub(r"_+", "_", clean_title) clean_title = clean_title.strip("_") filename = f"{clean_title}_{qualidade}.mp4" final_path = os.path.join(videos_dir, filename) print('Informações do vídeo obtidas com sucesso') if os.path.exists(final_path): return { "videoId": video_id, "filename": filename } print(f'Buscando melhor formato disponível para qualidade: {qualidade}') best_format = get_best_format(ydl, target_height) print(f'Melhor formato encontrado: {best_format}') ydl.params['format'] = best_format result = ydl.extract_info(target, download=True) if "requested_downloads" in result and len(result["requested_downloads"]) > 0: real_file_path = result["requested_downloads"][0]["filepath"] elif "filepath" in result: real_file_path = result["filepath"] else: real_file_path = output_template.replace("%(ext)s", "mp4") os.rename(real_file_path, final_path) except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}") return { "videoId": video_id, "filename": filename } @app.get("/search") def search_youtube_yt_dlp( q: str = Query(..., description="Termo de busca"), max_results: int = Query(5, ge=1, le=10, description="Número de resultados (máx 10)") ): ydl_opts = { "quiet": True, "extract_flat": "in_playlist", "skip_download": True, } search_query = f"ytsearch{max_results}:{q}" try: with YoutubeDL(ydl_opts) as ydl: search_result = ydl.extract_info(search_query, download=False) entries = search_result.get("entries", [])[:max_results] results = [] for item in entries: results.append({ "videoId": item.get("id"), "title": item.get("title"), "duration": item.get("duration"), "url": item.get("webpage_url"), "channel": item.get("uploader"), "thumbnail": item.get("thumbnail"), }) return {"results": results} except Exception as e: raise HTTPException(status_code=500, detail=f"Erro ao buscar vídeos: {e}")