init repo

This commit is contained in:
Leonardo Mortari
2025-07-31 19:53:12 -03:00
commit b71a7fb71e
4 changed files with 122 additions and 0 deletions

20
docker-compose.yml Normal file
View File

@@ -0,0 +1,20 @@
version: "3.8"
services:
api:
build:
context: .
dockerfile: dockerfile
container_name: youtube-api
ports:
- "3011:8000"
volumes:
- /home/well/youtube-api:/app
environment:
- PYTHONUNBUFFERED=1
networks:
- dokploy-network
networks:
dokploy-network:
external: true

15
dockerfile Normal file
View File

@@ -0,0 +1,15 @@
FROM python:3.10-slim
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

83
main.py Normal file
View File

@@ -0,0 +1,83 @@
from typing import Optional
from fastapi import FastAPI, HTTPException, Query
from youtube_transcript_api import YouTubeTranscriptApi, \
TranscriptsDisabled, NoTranscriptFound
from yt_dlp import YoutubeDL
app = FastAPI(
title="YouTube Transcript & Metadata API",
version="1.0.0"
)
def extract_video_id(url: str) -> str:
import re
match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", url)
if not match:
raise ValueError("URL inválida do YouTube")
return match.group(1)
@app.get("/get-transcript")
def get_transcript(
url: Optional[str] = Query(None, description="URL completa do vídeo"),
videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
):
if not url and not videoId:
raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
if url:
try:
video_id = extract_video_id(url)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
else:
video_id = videoId
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['pt', 'en'])
except TranscriptsDisabled:
raise HTTPException(status_code=404, detail="Transcrição desativada para este vídeo")
except NoTranscriptFound:
raise HTTPException(status_code=404, detail="Nenhuma transcrição encontrada")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Erro ao obter transcrição: {e}")
full_text = " ".join(item["text"] for item in transcript_list)
return {
"video_id": video_id,
"transcript": transcript_list,
"full_text": full_text
}
@app.get("/get-video-metadata")
def get_video_metadata(
url: Optional[str] = Query(None, description="URL completa do vídeo"),
videoId: Optional[str] = Query(None, alias="videoId", description="ID do vídeo no YouTube")
):
if not url and not videoId:
raise HTTPException(status_code=400, detail="Informe 'url' ou 'videoId'")
if url:
target = url
else:
target = f"https://www.youtube.com/watch?v={videoId}"
ydl_opts = {
"quiet": True,
"skip_download": True,
"nocheckcertificate": True,
"restrictfilenames": True,
"simulate": True,
"forcejson": True,
}
try:
with YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(target, download=False)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Erro ao extrair metadata: {e}")
return info
# Para executar via: uvicorn main:app --reload --host 0.0.0.0 --port 8000

4
requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
fastapi
uvicorn[standard]
youtube-transcript-api
yt-dlp