309 lines
11 KiB
Python
309 lines
11 KiB
Python
import os
|
|
import uuid
|
|
import re
|
|
|
|
from typing import Optional
|
|
from fastapi import FastAPI, HTTPException, Query
|
|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
from youtube_transcript_api.formatters import SRTFormatter
|
|
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
|
|
from yt_dlp import YoutubeDL
|
|
from unidecode import unidecode
|
|
|
|
app = FastAPI(
|
|
title="YouTube Transcript, Download and Metadata API",
|
|
version="1.0.0"
|
|
)
|
|
|
|
def extract_video_id(url: str) -> str:
|
|
import re
|
|
match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
|
|
if not match:
|
|
raise ValueError("URL inválida do YouTube")
|
|
return match.group(1)
|
|
|
|
@app.get("/get-transcript")
|
|
def get_transcript(
|
|
url: Optional[str] = Query(None, description="URL completa do vídeo"),
|
|
videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
|
|
):
|
|
if not url and not videoId:
|
|
raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
|
|
if url:
|
|
try:
|
|
video_id = extract_video_id(url)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
else:
|
|
video_id = videoId
|
|
|
|
try:
|
|
ytt_api = YouTubeTranscriptApi()
|
|
result = ytt_api.fetch(video_id, languages=['pt'])
|
|
formatter = SRTFormatter()
|
|
|
|
if not result:
|
|
raise NoTranscriptFound("Nenhuma transcrição encontrada para este vídeo")
|
|
except TranscriptsDisabled:
|
|
raise HTTPException(status_code=404, detail="Transcrição desativada para este vídeo")
|
|
except NoTranscriptFound:
|
|
raise HTTPException(status_code=404, detail="Nenhuma transcrição encontrada")
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Erro ao obter transcrição: {e}")
|
|
|
|
return {
|
|
"video_id": video_id,
|
|
"transcript": formatter.format_transcript(result),
|
|
}
|
|
|
|
@app.get("/get-video-metadata")
|
|
def get_video_metadata(
|
|
url: Optional[str] = Query(None, description="URL completa do vídeo"),
|
|
videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
|
|
):
|
|
if not url and not videoId:
|
|
raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
|
|
if url:
|
|
target = url
|
|
else:
|
|
target = f"https://www.youtube.com/watch?v={videoId}"
|
|
|
|
ydl_opts = {
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'skip_download': True,
|
|
'nocheckcertificate': True,
|
|
'ignoreerrors': True,
|
|
'no_color': True,
|
|
'extract_flat': 'in_playlist',
|
|
'force_generic_extractor': True,
|
|
'format': 'best[ext=mp4]/best[ext=webm]/best',
|
|
'allow_unplayable_formats': True,
|
|
}
|
|
|
|
try:
|
|
with YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(target, download=False, process=False)
|
|
|
|
if not info or 'title' not in info:
|
|
info = ydl.extract_info(target, download=False)
|
|
|
|
if not info or 'title' not in info:
|
|
simple_ydl_opts = {
|
|
'quiet': True,
|
|
'skip_download': True,
|
|
'extract_flat': True,
|
|
'force_generic_extractor': True,
|
|
}
|
|
with YoutubeDL(simple_ydl_opts) as simple_ydl:
|
|
info = simple_ydl.extract_info(target, download=False)
|
|
|
|
if not info:
|
|
raise Exception("Não foi possível extrair as informações do vídeo")
|
|
|
|
if isinstance(info, dict):
|
|
if 'title' not in info and 'url' in info:
|
|
with YoutubeDL(ydl_opts) as ydl_redirect:
|
|
info = ydl_redirect.extract_info(info['url'], download=False)
|
|
|
|
if 'title' not in info:
|
|
info['title'] = f"Vídeo {videoId or 'desconhecido'}"
|
|
|
|
except Exception as e:
|
|
error_msg = str(e).replace('\n', ' ').strip()
|
|
try:
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
video_id = videoId or (url.split('v=')[-1].split('&')[0] if 'v=' in url else '')
|
|
if video_id:
|
|
response = requests.get(f'https://www.youtube.com/watch?v={video_id}', timeout=10)
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
title = soup.find('title').text.replace(' - YouTube', '').strip()
|
|
if title and title != 'YouTube':
|
|
return {
|
|
'id': video_id,
|
|
'title': title,
|
|
'url': f'https://www.youtube.com/watch?v={video_id}',
|
|
'thumbnail': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg',
|
|
'error': f'Informações limitadas: {error_msg}'
|
|
}
|
|
except Exception as fallback_error:
|
|
pass
|
|
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Erro ao processar o vídeo: {error_msg}"
|
|
)
|
|
|
|
return info
|
|
|
|
@app.get("/download-video")
|
|
def download_video(
|
|
url: Optional[str] = Query(None, description="URL completa do vídeo"),
|
|
videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube"),
|
|
qualidade: str = Query("high", description="Qualidade do vídeo: low, medium, high")
|
|
):
|
|
if not url and not videoId:
|
|
raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
|
|
if url:
|
|
target = url
|
|
try:
|
|
video_id = extract_video_id(url)
|
|
except ValueError as e:
|
|
raise HTTPException(status_code=400, detail=str(e))
|
|
else:
|
|
target = f"https://www.youtube.com/watch?v={videoId}"
|
|
video_id = videoId
|
|
|
|
quality_targets = {
|
|
"low": 480,
|
|
"medium": 720,
|
|
"high": 1080
|
|
}
|
|
|
|
qualidade = qualidade.lower()
|
|
if qualidade not in quality_targets:
|
|
raise HTTPException(status_code=400, detail="Qualidade deve ser: low, medium ou high")
|
|
|
|
videos_dir = "/app/videos"
|
|
os.makedirs(videos_dir, exist_ok=True)
|
|
|
|
unique_id = str(uuid.uuid4())
|
|
output_template = os.path.join(videos_dir, f"{unique_id}.%(ext)s")
|
|
|
|
ydl_opts = {
|
|
'outtmpl': output_template,
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'ignoreerrors': False,
|
|
'no_color': True,
|
|
'extract_flat': 'in_playlist',
|
|
'force_generic_extractor': True,
|
|
'allow_unplayable_formats': True,
|
|
}
|
|
|
|
def get_best_format(ydl, target_height):
|
|
info = ydl.extract_info(target, download=False)
|
|
if not info or 'formats' not in info:
|
|
return 'best'
|
|
|
|
formats = info['formats']
|
|
best_format = None
|
|
best_height = 0
|
|
|
|
for f in formats:
|
|
if f.get('height') and f.get('acodec') != 'none':
|
|
if f['height'] <= target_height and f['height'] > best_height:
|
|
best_format = f
|
|
best_height = f['height']
|
|
|
|
if not best_format:
|
|
video_format = None
|
|
audio_format = None
|
|
|
|
for f in formats:
|
|
if f.get('vcodec') != 'none' and f.get('acodec') == 'none':
|
|
if f.get('height') and f['height'] <= target_height and (not video_format or f['height'] > video_format.get('height', 0)):
|
|
video_format = f
|
|
|
|
for f in formats:
|
|
if f.get('acodec') != 'none' and f.get('vcodec') == 'none':
|
|
if not audio_format or f.get('tbr', 0) > audio_format.get('tbr', 0):
|
|
audio_format = f
|
|
|
|
if video_format and audio_format:
|
|
return f"{video_format['format_id']}+{audio_format['format_id']}"
|
|
|
|
return best_format['format_id'] if best_format else 'best'
|
|
|
|
try:
|
|
with YoutubeDL(ydl_opts) as ydl:
|
|
base = ydl.extract_info(target, download=False)
|
|
|
|
if not base:
|
|
raise HTTPException(status_code=404, detail="Não foi possível obter informações do vídeo. Verifique a URL ou o ID do vídeo.")
|
|
|
|
if '_type' in base and base['_type'] == 'playlist':
|
|
if 'entries' in base and len(base['entries']) > 0:
|
|
base = base['entries'][0]
|
|
else:
|
|
raise HTTPException(status_code=404, detail="Nenhum vídeo encontrado na playlist.")
|
|
|
|
title = base.get("title", unique_id)
|
|
if not title:
|
|
title = f"video_{unique_id[:8]}"
|
|
|
|
clean_title = unidecode(title)
|
|
clean_title = re.sub(r"[^\w\s-]", "", clean_title)
|
|
clean_title = clean_title.replace(" ", "_")
|
|
clean_title = re.sub(r"_+", "_", clean_title)
|
|
clean_title = clean_title.strip("_")
|
|
filename = f"{clean_title}_{qualidade}.mp4"
|
|
final_path = os.path.join(videos_dir, filename)
|
|
|
|
print('Informações do vídeo obtidas com sucesso')
|
|
|
|
if os.path.exists(final_path):
|
|
return {
|
|
"videoId": video_id,
|
|
"filename": filename
|
|
}
|
|
|
|
print(f'Buscando melhor formato disponível para qualidade: {qualidade}')
|
|
|
|
best_format = get_best_format(ydl, target_height)
|
|
print(f'Melhor formato encontrado: {best_format}')
|
|
|
|
ydl.params['format'] = best_format
|
|
result = ydl.extract_info(target, download=True)
|
|
|
|
if "requested_downloads" in result and len(result["requested_downloads"]) > 0:
|
|
real_file_path = result["requested_downloads"][0]["filepath"]
|
|
elif "filepath" in result:
|
|
real_file_path = result["filepath"]
|
|
else:
|
|
real_file_path = output_template.replace("%(ext)s", "mp4")
|
|
|
|
os.rename(real_file_path, final_path)
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Erro ao baixar vídeo: {e}")
|
|
|
|
return {
|
|
"videoId": video_id,
|
|
"filename": filename
|
|
}
|
|
|
|
@app.get("/search")
|
|
def search_youtube_yt_dlp(
|
|
q: str = Query(..., description="Termo de busca"),
|
|
max_results: int = Query(5, ge=1, le=10, description="Número de resultados (máx 10)")
|
|
):
|
|
ydl_opts = {
|
|
"quiet": True,
|
|
"extract_flat": "in_playlist",
|
|
"skip_download": True,
|
|
}
|
|
|
|
search_query = f"ytsearch{max_results}:{q}"
|
|
|
|
try:
|
|
with YoutubeDL(ydl_opts) as ydl:
|
|
search_result = ydl.extract_info(search_query, download=False)
|
|
entries = search_result.get("entries", [])[:max_results]
|
|
|
|
results = []
|
|
for item in entries:
|
|
results.append({
|
|
"videoId": item.get("id"),
|
|
"title": item.get("title"),
|
|
"duration": item.get("duration"),
|
|
"url": item.get("webpage_url"),
|
|
"channel": item.get("uploader"),
|
|
"thumbnail": item.get("thumbnail"),
|
|
})
|
|
return {"results": results}
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Erro ao buscar vídeos: {e}") |