youtube-api/main.py

import os
import uuid
import re

from typing import Optional
from fastapi import FastAPI, HTTPException, Query
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
from yt_dlp import YoutubeDL
from unidecode import unidecode

app = FastAPI(
    title="YouTube Transcript, Download and Metadata API",
    version="1.0.0"
)

def extract_video_id(url: str) -> str:
    import re
    match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
    if not match:
        raise ValueError("URL inválida do YouTube")
    return match.group(1)

@app.get("/get-transcript")
def get_transcript(
    url: Optional[str] = Query(None, description="URL completa do vídeo"),
    videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
):
    if not url and not videoId:
        raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
    if url:
        try:
            video_id = extract_video_id(url)
        except ValueError as e:
            raise HTTPException(status_code=400, detail=str(e))
    else:
        video_id = videoId

    try:
        ytt_api = YouTubeTranscriptApi()
        result = ytt_api.fetch(video_id, languages=['pt'])
        formatter = SRTFormatter()

        if not result:
            raise NoTranscriptFound("Nenhuma transcrição encontrada para este vídeo")
    except TranscriptsDisabled:
        raise HTTPException(status_code=404, detail="Transcrição desativada para este vídeo")
    except NoTranscriptFound:
        raise HTTPException(status_code=404, detail="Nenhuma transcrição encontrada")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro ao obter transcrição: {e}")

    return {
        "video_id": video_id,
        "transcript": formatter.format_transcript(result),
    }

@app.get("/get-video-metadata")
def get_video_metadata(
    url: Optional[str] = Query(None, description="URL completa do vídeo"),
    videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
):
    if not url and not videoId:
        raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
    if url:
        target = url
    else:
        target = f"https://www.youtube.com/watch?v={videoId}"

    ydl_opts = {
        'quiet': True,
        'no_warnings': True,
        'skip_download': True,
        'nocheckcertificate': True,
        'ignoreerrors': True,
        'no_color': True,
        'extract_flat': 'in_playlist',
        'force_generic_extractor': True,
        'format': 'best[ext=mp4]/best[ext=webm]/best',
        'allow_unplayable_formats': True,
    }

    try:
        with YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(target, download=False, process=False)

            if not info or 'title' not in info:
                info = ydl.extract_info(target, download=False)

            if not info or 'title' not in info:
                simple_ydl_opts = {
                    'quiet': True,
                    'skip_download': True,
                    'extract_flat': True,
                    'force_generic_extractor': True,
                }
                with YoutubeDL(simple_ydl_opts) as simple_ydl:
                    info = simple_ydl.extract_info(target, download=False)

            if not info:
                raise Exception("Não foi possível extrair as informações do vídeo")

            if isinstance(info, dict):
                if 'title' not in info and 'url' in info:
                    with YoutubeDL(ydl_opts) as ydl_redirect:
                        info = ydl_redirect.extract_info(info['url'], download=False)

                if 'title' not in info:
                    info['title'] = f"Vídeo {videoId or 'desconhecido'}"

    except Exception as e:
        error_msg = str(e).replace('\n', ' ').strip()
        try:
            import requests
            from bs4 import BeautifulSoup

            video_id = videoId or (url.split('v=')[-1].split('&')[0] if 'v=' in url else '')
            if video_id:
                response = requests.get(f'https://www.youtube.com/watch?v={video_id}', timeout=10)
                soup = BeautifulSoup(response.text, 'html.parser')
                title = soup.find('title').text.replace(' - YouTube', '').strip()
                if title and title != 'YouTube':
                    return {
                        'id': video_id,
                        'title': title,
                        'url': f'https://www.youtube.com/watch?v={video_id}',
                        'thumbnail': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg',
                        'error': f'Informações limitadas: {error_msg}'
                    }
        except Exception as fallback_error:
            pass

        raise HTTPException(
            status_code=500,
            detail=f"Erro ao processar o vídeo: {error_msg}"
        )

    return info

@app.get("/download-video")
def download_video(
    url: Optional[str] = Query(None, description="URL completa do vídeo"),
    videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube"),
    qualidade: str = Query("high", description="Qualidade do vídeo: low, medium, high")
):
    if not url and not videoId:
        raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
    if url:
        target = url
        try:
            video_id = extract_video_id(url)
        except ValueError as e:
            raise HTTPException(status_code=400, detail=str(e))
    else:
        target = f"https://www.youtube.com/watch?v={videoId}"
        video_id = videoId

    quality_targets = {
        "low": 480,
        "medium": 720,
        "high": 1080
    }

    qualidade = qualidade.lower()
    if qualidade not in quality_targets:
        raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high")

    videos_dir = "/app/videos"
    os.makedirs(videos_dir, exist_ok=True)

    unique_id = str(uuid.uuid4())
    output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s")

    ydl_opts = {
        'outtmpl': output_template,
        'quiet': True,
        'no_warnings': True,
        'ignoreerrors': False,
        'no_color': True,
        'extract_flat': 'in_playlist',
        'force_generic_extractor': True,
        'allow_unplayable_formats': True,
    }

    def get_best_format(ydl, target_height):
        info = ydl.extract_info(target, download=False)
        if not info or 'formats' not in info:
            return 'best'

        formats = info['formats']
        best_format = None
        best_height = 0

        for f in formats:
            if f.get('height') and f.get('acodec') != 'none':
                if f['height'] <= target_height and f['height'] > best_height:
                    best_format = f
                    best_height = f['height']

        if not best_format:
            video_format = None
            audio_format = None

            for f in formats:
                if f.get('vcodec') != 'none' and f.get('acodec') == 'none':
                    if f.get('height') and f['height'] <= target_height and (not video_format or f['height'] > video_format.get('height', 0)):
                        video_format = f

            for f in formats:
                if f.get('acodec') != 'none' and f.get('vcodec') == 'none':
                    if not audio_format or f.get('tbr', 0) > audio_format.get('tbr', 0):
                        audio_format = f

            if video_format and audio_format:
                return f"{video_format['format_id']}+{audio_format['format_id']}"

        return best_format['format_id'] if best_format else 'best'

    try:
        with YoutubeDL(ydl_opts) as ydl:
            base = ydl.extract_info(target, download=False)

            if not base:
                raise HTTPException(status_code=404, detail="Não foi possível obter informações do vídeo. Verifique a URL ou o ID do vídeo.")

            if '_type' in base and base['_type'] == 'playlist':
                if 'entries' in base and len(base['entries']) > 0:
                    base = base['entries'][0]
                else:
                    raise HTTPException(status_code=404, detail="Nenhum vídeo encontrado na playlist.")

            title = base.get("title", unique_id)
            if not title:
                title = f"video_{unique_id[:8]}"

            clean_title = unidecode(title)
            clean_title = re.sub(r"[^\w\s-]", "", clean_title)
            clean_title = clean_title.replace(" ", "_")
            clean_title = re.sub(r"_+", "_", clean_title)
            clean_title = clean_title.strip("_")
            filename = f"{clean_title}_{qualidade}.mp4"
            final_path = os.path.join(videos_dir, filename)

            print('Informações do vídeo obtidas com sucesso')

            if os.path.exists(final_path):
                return {
                    "videoId": video_id,
                    "filename": filename
                }

            print(f'Buscando melhor formato disponível para qualidade: {qualidade}')

            best_format = get_best_format(ydl, target_height)
            print(f'Melhor formato encontrado: {best_format}')

            ydl.params['format'] = best_format
            result = ydl.extract_info(target, download=True)

            if "requested_downloads" in result and len(result["requested_downloads"]) > 0:
                real_file_path = result["requested_downloads"][0]["filepath"]
            elif "filepath" in result:
                real_file_path = result["filepath"]
            else:
                real_file_path = output_template.replace("%(ext)s", "mp4")

            os.rename(real_file_path, final_path)

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}")

    return {
        "videoId": video_id,
        "filename": filename
    }

@app.get("/search")
def search_youtube_yt_dlp(
    q: str = Query(..., description="Termo de busca"),
    max_results: int = Query(5, ge=1, le=10, description="Número de resultados (máx 10)")
):
    ydl_opts = {
        "quiet": True,
        "extract_flat": "in_playlist",
        "skip_download": True,
    }

    search_query = f"ytsearch{max_results}:{q}"

    try:
        with YoutubeDL(ydl_opts) as ydl:
            search_result = ydl.extract_info(search_query, download=False)
            entries = search_result.get("entries", [])[:max_results]

            results = []
            for item in entries:
                results.append({
                    "videoId": item.get("id"),
                    "title": item.get("title"),
                    "duration": item.get("duration"),
                    "url": item.get("webpage_url"),
                    "channel": item.get("uploader"),
                    "thumbnail": item.get("thumbnail"),
                })
        return {"results": results}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro ao buscar vídeos: {e}")