From 0c0a9c3b5c5e6e50ee7308bbff077b85621e3a02 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Fri, 17 Oct 2025 09:27:50 -0300 Subject: [PATCH] Inicia novos recursos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router --- .DS_Store | Bin 0 -> 6148 bytes __init__.py | 1 + __pycache__/llm.cpython-311.pyc | Bin 0 -> 11635 bytes __pycache__/main.cpython-311.pyc | Bin 0 -> 13063 bytes __pycache__/render.cpython-311.pyc | Bin 0 -> 8779 bytes __pycache__/transcribe.cpython-311.pyc | Bin 0 -> 5529 bytes __pycache__/utils.cpython-311.pyc | Bin 0 -> 4567 bytes docker-compose.yml | 35 ++++ dockerfile | 45 +++++ llm.py | 234 ++++++++++++++++++++++ main.py | 266 +++++++++++++++++++++++++ render.py | 205 +++++++++++++++++++ requirements.txt | 7 + transcribe.py | 111 +++++++++++ utils.py | 93 +++++++++ 15 files changed, 997 insertions(+) create mode 100644 .DS_Store create mode 100644 __init__.py create mode 100644 __pycache__/llm.cpython-311.pyc create mode 100644 __pycache__/main.cpython-311.pyc create mode 100644 __pycache__/render.cpython-311.pyc create mode 100644 __pycache__/transcribe.cpython-311.pyc create mode 100644 __pycache__/utils.cpython-311.pyc create mode 100644 docker-compose.yml create mode 100644 dockerfile create mode 100644 llm.py create mode 100644 main.py create mode 100644 render.py create mode 100644 requirements.txt create mode 100644 transcribe.py create mode 100644 utils.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..38734ca2de71d90578b12a191d5ff30a57f26d5c GIT binary patch literal 6148 zcmeHKJ8Hu~5S@u#2;8`IxmU;y7U7)02atb(6aoc8igc=cE+5TrJ{W}TCXgn)ftj~E znx|!7q0xwlw%_Mhk+q04a6`FRn43K}pV>=h6bQ#VPI7>M$h&m2>c}_IdtuEgkdX>d0V;4;z`hR!Zden?K>u_g_y_=8 zBJGB?&l13531Cee1CfDgP=P_!95FQL$d|0EiDO{UMRWMjJXv!>Q9m8;FJ3NM0~x6R z6__fpi0#_?{~P?t{68geM+KA!1r*gxx&q`b_#;GW1zQV hY^)t`yeR65t?|4jj)6``-swR8445u7D)83|+yF)|6_)@2 literal 0 HcmV?d00001 diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..b437409 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +"""Top-level package for the video processing pipeline.""" \ No newline at end of file diff --git a/__pycache__/llm.cpython-311.pyc b/__pycache__/llm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36d44a6197b1726f4c07bd4fd0b04731d74e0161 GIT binary patch literal 11635 zcmd5?TWlLwdLCXyQsU@B*>Y~X6Du2=j6_?`U5>SpEZJ6KNnTkAHp;rxh%*#N8O|^> zL)lWOKpX6X8@Nc&E)eexRkn*wO;m-3m+luo4s+ZEWu-W*%4@PJ z6lJw68>Udu4MEW?*%0%VqOAy56{{#%MOip6mlRDAL`@Rr%Ca`ES5Qe9zcg)T6N!bQ zVhSZ)s;IIcYk9G3R#efFO+i#uK`$@?s=lJ+1x=PESrRN=$Ws|fzIbs44@qSm?V5>- zNySA&udEc&2CE^K%c_zWEk)O|!n7rzj>!tBC*#w?rFE;QYeHUCWDTX7gq}fiqNwYV zuq@|AJRmIV7|HENAzv%&sNqN(&g-Qz+FHg~yy0Edg+fJxMsy8hGB8N?gfyB+WQ3(9 zQ?l`BG>D*i2VX-TlemuUQ&aCv@VSXEw<1=BJ}qsM8?mLY0p-cZUUk~AbH1Yu2) zWIfGPY{*u{ptb~6F)ga#3m5d|RkUV0Q)619VX;S`LddX`l8k$$GPO;LlodnB>xLnl zWnGh~1D1TvYFDGH6-*#3!O|fHJ<%+sur6FJD*2*NMkA&-k*u3c3nd+Wqp_*$G^4V% zBCg07hJ2MwN!K9A)bom{3ME-m#4JmMXWX>KleiLupqDM7wrniY3|C ziL4puFD!`Jt%$j?^zs4*j2S5?vMLR^Bg*TVB`Tz0>ZGih%(~E1)+^=)G*2+8a=WhD zjo)jBsg5L-<-CH1BxpfoCc3mVcWG*Ne(t%2srlT@+~m|nbVb(I6hqfaWUgzXp@=Xx zxX}ceXuF|d-W12Iq2Q9J!at!)0iIE=U`h&xUSiF#4wzYCp(tZ6jP(S~$2FK)-f^LV zVqmVGxjZ+EnKQ(7j1YbTS8qNI4U>()w@wQR`K)Lq3ZkN#Loh>0cFlE-j6W|sh6}GZ zLD{=)2`VWe2>44oSYMO4b88c_oAv-j#t=r*AC2!f`i}07c8wA-`!1)pQ zvJV$;aWc0UK$QICj)=8w1gv%e$(; zCCAZf6v9gN<)7j1TU?dfrn?6YtUkeaf>>Gb?;P9SMccOoWwzJb3|Nrw9jpHHQn(th z{9hcm>ep&t!8U`{pxJH3Jqa9Ex_8jm5ByQQ7N`cd-Bq<&i>{<-d(F*IoU@Y6I;$Pt zbv4vp^D;NcSzTT!dPnMK#{--(uo-UaPe(QUGwv6`b_uTqq}XQo)$rB8Rqk5o1@3Af z9TydJdg>e9ksMic*9e3H0SAl^ZlsrF7S!DUNK20@YNDkZaCFQw>=9On5NHTuf!1Vl z7;<#-NyTK6oYmb~H$@0q!SZa7B-pk|D-i6spg8LVE+|x(m1TFqVu67AgUWIVk&2e3 zmPm43$;w&uoUIWo?jC#bn71Uk%ha(e$I=u>S#P{Z&H`-P(&R4W&I(6C$gJe^=whLw z4haa3vT+sB#t}lENsHL&o&vWjkodcGQ$bBVV+mlXDB>7cVj|wj(RZZOR5YvK>)%Q8Mjr zdVZ4R$t(_~BSsGucjQ%B)MUwyAgDl2jE0mWw+MkFM`ocwNp7a;OAI17Uqqk zC^?3I^KWt5;`aKv?$jIV>*~#iw}W+lu)z<0#y|8a|ImBqKAx!ak2mHU;B)?PZQ%QL{)GnrLM`^fZg>A1ue|=s&5=KU z^~S4P7xsdo$m!3!Q#Uql9ilSf zAFahk|A=ERgqrv^kD%B8bSgCwntjhh2lj6S!ZAPR-H7s@hJScTlBlNuz+>FM# zYG^g;jj0*_w*BztmC<)KSdD&|+}BQr_5ea7Eu!wh0%FWMZ_XXv$nb)PSWtH#?)Hxt&k zTS^m_(EQat=6$X7wadZslR*BE5`PT?I_sesHI#y~tWMzDY?15-rl13? zD;9c{O$_4!w(MetDkX8q(JUhvkb79LLZ<2!88eC=U|>6M8w%v=6=qAgA~>$fR0QB{ zW-}Qk6xO91nv4PYtF(Mg1n_0L(P*8gOdLi6e1SzA7qY4&1FEnFGMORh3;Kjjx?`z8 zQb}J!0;P%2j;QXuUlO3%GN7w7l(Tf-8E{kCs3CztL2nI=6%ChmKGKwuEO?1bqz?kSvYDTgiMbDS2oR&ti5QGlXp@plI@z4r6dCJ+By; z99E4~0bJ%6T>^0fS_WYb%06B&(1;_hAS;$W1lo#h#%Thm9AeV0&}nASG&6rEo5T5{ zxF$m?4QgIqfgE9e=k2nBW@#|)w80j1m5hj)8ze*U!=9J8)=ueXMM3B-De&Xc4l@gg zI4@!`z&w#A^DVYkP}m(fD6D8^=PzcbXXXx?*UZijzfT3N=0)~2D{Dnq0vH42trSfW z1w7kkSv;bgs5+B15AMiNoB@-{Cq2WY5oR*}PuIwi8ABwC0I)QgOnAyD)ZAkG2Ogx+ z-toaFPr{>HN5Is*!0eLxaEZKGgU#sZj;lH9XlocEZ$XHp*ofK1G*QfAKIKe~=s2U} zrT*N&pCEsVFdCQ}%$h3_Ps5)yk*w!H&~!{4CLoB*qH+xZkE~;Q=F;5!!uagM6eP9= z4L?xaK1^Uuyi=S2b}~(!JQFBQG4mCR;TP826ag4XB=sz~*#z?wkdu{vL2AM(T^6wb zusUAkgjSg4!=|Bgq8TwOz9yRyfW@zpA5Y0-hMK{iSSBbJq1DrXAM%MYlGnroah>=N z7&L$?Ge(>Y(FyCbkOKQ#uE0i*Qt8nlj5n_$Gq=5XpW8wM_U^@ zI^+~0#qo-~jLIuwgT`ZjQV)(o+-BlR0yuLWNkIuIMA*<(8MqO7A+%(NL3|tOAn+qm zyLQYY*Ea6+P%NTa2>@0ks#|b!R{&WD>I&n{#T6#*X0~gxqJy;D0H0eqfz)Zv!Od9B z5%(z-LFDr*v(+6|i?!b|v24264(g`e;ZmS>XhpW{#PgzBaj>r)brK>w>d^6a7+tjN zPA3gu47?qMVSzEU<5M0J=|Jlaq#_FV0=vTc5;b7+Fj;%L(-@>u zY}0VYNt_^-F|BseQ6~{`r81gk*_iXg7@>+WmUDn9HSVW}sSzUDu<2o$1Z>Cz+{1z9 zy$5y(j49MRgMU+ni*9jW9p<`wZ>+wdy{^^n8?N)&2A|zJzuVbIczFt-`$+Asvvq#5 z!B5s=lZ4Qpd1LnV*_&gvhtAZy#u{B?TT@@fIKKbJmF>rCkLtB#xt=UHlI5*)d$DNz z@x2fi>DxQGxBQ$C@Vw^Pl?ItPMPc zPn|#0;Lp@zXFQlcMsPpMJvP>ObgIVBT`xBHZ{2*k!QbED(+z&4#*h5=5nmCf9nQe# z3O-K?+XMAvx{*w8o%_7=(3^!{NcGOaM&}^4|BS;`T&VNY4Su>7o8Im0-I{SYh0*&b z?&WUXo0vEgy7ge+#1o-gPegESC;WXDGmnY!55nLfqD%i1=l^f~K|3yWiPH|h?;Q9A zjO7D=Jix~-wCbP2FWiAV<)qk$@pjC-8L5V=kqtDy8I=-Jr_==|B3X^nH<*YH#zgq6 z!>ZqpZMJdR?6Y$PCZa>)KkVK|4aXQ4K@^&MFfJl)HOT``>Z!(W%SH5V#$S!Ma1pU4 zy8tet<(Jgg6t~KIEv*vQ#tv{1y_<<@0xUtZA}6H|==y%|2{0>(_R`Hxi-M4QR6DH) zylakY*XzDk`rGBa7FgjnyQD)u0+Bz-y#pa1_<}Z*)y_%o%FRu_%HM(EI@~Vpj`arG z>lK2V-PPpnWmdb}>u&Z`d$s~MCaXQ$DZgti7yJ6$d&fQ;k?v~O%yxf^82_?eUbR;` z@)XLdUE3|Zkzbjs?qiSw+>M`9yTI<;E&e^+#%Fi-?fxN8@wPJs;27G_#Qv_c%2ij#= zS}3C1^DKCpvJQ_tXIuH+9whLwV5qfCy! zJt|BW7~?~Yc-xZNezwQkj%1J8eJxwB9@{g7Nk4e|8yKISmfDQPbw>*R=d1t`^;Xr-z zGO=S|6+Etmuz?GKkZKtf2S^eynRAnUhJcVap(`x&w3IbKa0kr;`w|vmL=YeDo zG(^JYM0PpNqg4ZMlF^}n z;O?9mz}{H}tinOq)w;_?66a~h%ORb7&kM6Z{REVD zPBin1Vmw3vfucfdKxpB8cRoRa zM64O|#{IVie$8O*gTDvH+rVG5hR_NCA4aRU2Y@XP90Y*}Qfs6HfiDS!y~Qa=VXmZT zlTG59c@=&In!gIcYT&PeH~O(a1KcvOjV)14GH9a+w}RP|b{OPZ$)p`UcZ2ln#8b3d z5s)0)81&X{_cf&KQ>qTPHpjBfO8<;!&0{!iakZXjozHK=Z$9wm)gKRU_ixL;x~JAV z{GR2PId5Rh-~nS6C%eOqrv{}Q>BGh;Dj|&-kI?B+Iz2|G$LU1btR0amcy-Hof^IxX zr%^hcrW57N2IVjY!Nw-CHOA;C?Qa7r>*aOQ2VN|*qjohw_5@O!FR(6~{mn(^7;}VsG*dB&% zV5tJLXeYgjIW{A-gGfnG`{Cf^?tQaj->cVf1wt;@VSSQYxfM-yRwb0tiO$Y-l;oP=g&3xbG6vH-z8HHa=u(kF4U6?jpPC$ z=ic}bL(X4ixW2c&R}$ZM`R+aP-9*CE2Q z6M@z>|KRbh3%krm{xJc6sP{bE=y`T)dbj8B9v6=sW2fukUA%j6?e%Ln zJL=s+qg&X1;N9cDJpSIJZ=d+pi5h?EdT2Lw?^|d7^2~Osp88HB^_}an-Og04^PVrd z`ge~Af1Uhk@+aLt@4lY=V({3zPyF(U_ohEK>Vu<=!O_nKpZ;|4>H6SAV{qbn@=c{4 z6Lw=gZ*;%jeKT2)J=lmnSYscE+nQte|JJvUo(*z4!Nl3Y&`ww1*>8n*z7@fBI?Ud@ zFnVEg23ab5aSC?{rM288n1;K|v*_BX%kK6Vv~Nu>eHAJca&tKY*`4FxP6(iv8tkaG zUPda*>hk*d0;Bz4O zPg2#hZ;Ch_f((Z}Jw4sk)zw|qUsdlfT`oHX*S&vyGWFlx6!jnYQo0(A4E@d728y~% z@zfZ_(|kjmPBe@)(3m&HO=BkVWX2fsG>@6dvvI7EJS}4ucrtNo!Zv10*vIS%$C#to z);Z?n&3t3rm2i)_6P__o!aL@LZ)V;S_a%H|K9aY_{fWR>AQ2o3CYr{YNSQ6(oM;(q zp(%m694fJ!3zu@$Pu|WuJ_PenYWH`Q|SW18Q@hu&t!I{3|e8$3Jt zZk~l_7vIBoz;hGd`8GASS?K1wt`KQUPN1{-LlD=;C81F;f2I-gABk}PW2UH~q#|8o z(^ z#d7RfZel`IPMl%i%m^8Q&`JtdSJP5B`HI}fUYHV+c8*Oz58R}{a*`xY3vw?jDr{Pi zfRhB!1zs+t;_=i~WCHq1%X{qh9ewN}N#GPghF-6xq$~K57bPL4q+lqugP7oCfoD^f zF2O(0mf1^UTu5>W7|IKM?9hxNaWN>xp12GzrdTYv!d?}XDfaN;6Q_rcuw0U7r3?)7 zFb6D1+b)0vfJGcNh=|X`1Ut(DE6_4&2u4gL4GO>=zb!6I3vpHvVWFo7@= zipa&u_=V&o2sHsEee5WF7f8K0)Kyqnn8XhtJc&?{gtP=Tk_yO)Ly_XFVuz`T%OFyj zjir(b2RxD~0B*JmAdt#%P(H|sBFrd#_MKHjJCzoaXHyv%Pi1(MLQ;@m?w}v&Q|1(q z6)>%cO59j80vDSq4U90{$DT^YuaV)8k1MJ4DD*Qv4)Z9)c^1~<6(OzcC7*erB7RXu zBCx1YD%b^(NF_uCM+r+hMg+*lxHNu+$%@C>34xu+NJ*#w&0ryv6rs<)3szjFxJpuz zI4L5_hLVt~Vv-l(H3OxwxR^$fNg&g73d%uhKy1~hj8z4a6eO|^66~}fDPoK;N>(2% zC?XVYA_dcpi;!daYp`_17!Iw(`h)=L(P#gDAA3480aFLF4HZjjRUAhq9Zzw5Q8Nj3 zF^MWh#Lq$x8KtzKN)yBzLqX}YawdkBpcm?Zz)Q4?>%v&ZFI%y5#3#;c;?*DxuNSeCAH1yQEw`LI(Pc` zse!?0rG}wh8NhE;xe}6Nlb`)H8uUwGJoyxxgk3T*A;Q!NdZQ#LU~41>+Qi_$d|4o8 zqA2>CpM${fQc4-_P}JpeUcqL?ODV!ce658Nv$d_uZ4?4IjpmB8NCqBR0x$2IJ_JW#Y%Y!I-gfnnvQQz8$$q>wJ|;J{#|K2V7R@aiLE zzTO9t`MMY6{*hT{rcyE}KNwL(cT|}r-8Llv3YY){rngSS(Zul43PEB5U_Ut-aYJMA z3a5Kd#{oV7WGfjqDFEF_O_1opLuJlSa}t0V$cngo;*El#Dh z1Dc>{<=;XwM}6z00xg=q`&X{NpH=-snty2S)n#9k=G(IDYK6NH+*t4j3!R%+9F6|Q zCln-qz+`UtTQ}uz)_gsSt*URQ=G!?pTyTdTx!EN*tGYWhcgNh3LU-Sz?*66j{(st_ zb|2NckE%@9-0@qzIi_F({Tx+Jm=rGSVXnbR}NmQ!*>&sH?O$nE2@?0^eX z&STWBP3ui0ZwIsIxIR#Xn_z{8M^2sG20#ZuizuV90;h#63p76nqs!*6dh6_yTA?Fp z9VTs%j7A!CGw~gD3!j0#JTBRTFR&bpEW3O+ z#}+KAi`880GT5#__X92;=vM>%TA+W$MwwfmP-Zh&DM(gG;-t);I=ca;18(UB9JaOO zg?<-AboqL%`FKM&fhnmmABY%;`OqM9mztzz=`1x!y>V)`Mvb!#JYCU~+9f}0s#XrD z*|26!CQIk4jTiK3Tv^jkA$XEyAZ37pxkBNm)cfYz{%GpvX}&R64t$kp;Ike*fl668 z3v_ufBZ6DYCE1a)qub)*6#>E?5^mfCOH`cmlKhHTVhDHqVi_w>r7}Q-gbBvSA?P51 z!;Ib+U}u0C=x=~0UFAlDn+8z=0s+dXz3hIVUkctwQI*HXj~?A~;>4bjk={fi0n`jP zk`-psum?iDiE9R(N@xSLF_DHq#7I@(0eHn-*k}MIWFpQbuR!DhLrkb0x|y)8n}Fr( zW_H`Q1MGLSU1yN%>kW2$#3bR0&>OilKp`G<5&~GDc0*WbMmjRAAz>008V*0I$a*8x zgJ-=0+9puqQCU)?5omk`{^eOn=BO1nAYz<(7k9{JaXTu26kzI zUGvsu-=fDA2NW z@pYTrk3pMqrH?gF-c-A)0q3t(lc}p|s`gVZmwV4KH60XRcr)k(=t?C}Jj<-n-u$fg z*}C3kzf#^LVFLuu0W@^gsMqMB?B!JVRIjtVrK0__X2n;jo2|11vyE%?^K402K^Bc4 zg1kO1%WT%7;O14lvP{-eE2|%Xi~PYZ`PT0 z^0vyza%G>Y)DoYypa5GSzJ;g#Pi|?)mN)k+2n32()=|fs>raxO1Eg2GlH9Ogc$D=e z`oO7+ciyi%UKpfq(Qkx_B)zjv1vl^Fm38Jy*_tPBtL3xyZPxc(H0tJ|j)osN4`gjH zGx6DAHdrm+tnUihqhu$PSIt<~e}!m>MBiMev{xAB-F5YR*C7&6C#j}YEvjZT`>Ynt z*+8yp?N|ECHf5WkM^9x%@ZNiM)&%y*L^dF|twjTP_8YzOzGt;oHdXbXz$7P5=+miLHF`|~**`Z;@fQ-AVc<|KL$kJI)wjpccgM1U; ze9tl)QX-Ywm&@9kweT(XL$&-S1>u4(J?WC$>IbD=wqN$5ilDSIF zm@ZOR>1+rvM)>+;fT|eFWw}zY!U$*LsSJdwFn~S*ySSJHh=AMA_;_8k0`YtmyoWd} zo*k6nL3NO>vZn)LhfqDnC@@T6#DtB&J%%bEssZs0BWf;-=tN*7BgQGQ zDTF_8ECjfP(1BPc;(Y{3Bs^SVr-&S^rNHF%u0wF#l483{1>JDOLk>FFBu2>L^a0LD zbW=u(>lPw~W4zvI1amW;Am{<<&5SsW5j=MKvtQwn7zyYJNeCK16q8TwVI@R&x(QH- zbPRIRFuq#AA%P&cEa5g1p~;s?$dXHE@K6ZwOS!)E1Nb1l1c`1bo^M2~x}7i*O>@dr zrK)a)3Q+_>x*5sHq^~H%ok{RRh+;%pH;Mer;gcU(^cG_<#VHr~GZ_v-%gQzBJa*;= zV1`RU;16SjT)Kd4yoO(_r7*p2CK4X~2s|)xGMSQuC@GL5ExHN99J>8H7taVr1h*k2 zL&OL9J&8mbg&9EhvhG9bCsWGdR3^zAHLY?wF5)O8R5}S!4FO*g?$`+Z;B-+!=T)}} z$xMPA7)cnl)J+L)MmmA@(O1{h zk&3+ZXV}<*XM@pXDvIK?mEzjGZpAZ2lqo@z(m2+&06vTYU%I_2KFz=w5_+Q%oYw8e ztdq&`ltoLzz-DSe95Q2cd32o9o#4B%lg{ZoR>GXKu_a*i}!b=INX}T5v#sh&B(< z%kEG<{IcqPMRUKBx4u%ayWj1((Q|9pJKJw=pF0F$A=1vKdO9>u$K27ce1SVHciV2a zEle$14GcUtyzFq@-2Lu>8wVD)sE$pVV-u1d1=1Vy?AcE*Ljt$z9?{$*dF#km{@|VN zNB)i_e@Cug_4jK2-o-PTe>()myzRNIs<%(`_RSqz_V{jIeK&g}yD*gNU%aS#c59y9 zz(6ybYh8RzZT`O2{QV!Dnmal#eC2Lgu&VBM&D}nCYqQh+#Fu$TzE6rbZ=%cqHcUqZQiLh@0>qUaJD{j zwl6u`3*nAJ%Z5TIys-J6ZNqPjo=}X-^9M}ewUjt1?*^D&+ZbJLzIf;p%cl{w z|JD4t3;B-M;8vX%HRr{=^Wt(~L!qU;;9pnpwiKGecVD~x+Jd;~SDSjZrrv^|Ed)0d znl~1_ffb)EXn#UM@&`=jtt&yw7J6jeuw>n^Fr!*~G;2@Z+5?$y8>z1D+*@krZmn~7 zA=p-EjTAioLSS7X7%uplRytS4Y2Iy}LBqaEngdu<8W~qIRKB^>A>4mrQHgFPQ-YPC0P@>>~%M3p(c{aDtrep^?HO>y9W63s3 z&DBu;a#_OJMu>&i{U!@!Ygx0t<(c)FKao!xfY#M~TC-MZXpQlrQmwkObxy5>>$~D% zP?yVYa(&I`0m-~2%S^7l&YZP#zvg;$)z;%1szHCY&IRJ#;1<|m6c{KjUfDXwvQ7&< z3dWO*7lb0c@7K|Uu}11OO=^PxKvhta_hs#?Ln5frunz4%v?r~`^D14^Z}P?TL({E> zn`ht~Z80Icw@;W9WCS?>Z@$h=Rt4tld5X9ar zIWUNEKpeanaBVoY`+7rn-}O$!)+|@906)4aF;;B4hrQlbCig5dt*b~>x-fO`VFC4< z(-4hT$cZLIW8pV#0N3Miwk!<*w-9-S!$4WL4V7Fy2~nqR18)fgf(NrU(jFjpOu|sB z;Q%Bsm&D{vOwfIhrZKsSNeYu0OpahOhzSO=A`WugLr^~cphAXUv_QZZC=}qbHC2*jr!Vh6tU8Zq&Lepz0J7lx(G@dP0kpBwxCw|fb>&9Y zrY&02mifc)95zUF+M1qFkgQM-Ts^fn5qb+-uw(R+&|C0lsO6rzjZOib(9om)?YUrdEI+=%id}97s;n4AX z%lW^Q?p(c_y`9YsE%raSsJ6VUwY;2%`|Nq3`SQ_x`0V@sdo3Tdz1Nl#AB5HLUM;*g zAKv?rpC5jVQFPk1Z`s>?$F(r^fX{pPs@}btcQ0fLo-lqqV0^s%36uBkSH1f+?|#TU z-anKNpDGicdSF$%A0*#PE{>@i`n3)H2v5U+>U`nl zVDp`03zyYkL<>gp%MmAvJE*dqMMF$dmi&asXi_w5m=?n`586nr}V^x9T0%yuBlyd5p^i|K2%D%8$^n9V}76gh|T=F#sPwf5hJX1(w~01fmMp@b@}| z2MMU4hXdo_^)wLic1U7$ARYi}?d5Dc;3b>DU`y!F6tShzVC zIJ37*w+`MJzI*ca$=u5i2G!t8TJR;+yGQfx!O#A38;k1g*S!5Tg@zGn`wyM4Zn(x4zyAW!o+E!NCW>i=VzOBN7hVK~J^%m! literal 0 HcmV?d00001 diff --git a/__pycache__/render.cpython-311.pyc b/__pycache__/render.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..634cd2e2f3f00636d37b7efb82fc306d76aa9ef5 GIT binary patch literal 8779 zcmbVRYit`wlAhuFDN-awQty#uTc#~hl4ECMJIO}zBk@{JvWYj|l@m}S&QKbeuk9J? zwL`!MWMRq(u-csOtmLu?cVYM(*f{8C_HYOIXZ9ay5HK--0RxBp&_4x9a5?|EtLouX zj&1WUXPE!OO#LzH3HF;?khsYR%q%0Yf+5MK z46_Cn)5fH6)(CG?(v&jKnp2ipORd~GYlCug(w=h6I%t_C=}ft1T`Bjho93;_j+AHC zL(?{>^Uiu{+Meu8`DT47|Exb1m<>RgLvSX$Qo-3E%h>n)2@&CtaPR~3Y)CM^!$c1A zFN4r;i)le5QhI?)W)`G4m&gzfECbO&DqO`CW;pD~43hK>silo2*3u0O%ydnzR z{2Iu_&CFbgQU)nW5TIV?${jrZe3M6QU}U2#id=?B3sM>+qzaC4GME$iDH#XHB_xGg zga+#>>F|KeRgOs60iGy_r^smoMa!nU#KBFj^R70R2R%*}G zMY)S0U~L{j96^$^Nq&vaT~ZQoe6WU)S>ZB?ni-W02)!(FD;XllT!O%msH8&pv>Yc= zRv806@k}z4qiYK5;yD8Iu7GxOHgiQx)4niPA|ckwfvG_F70PK@5?4erDgzr{QJ^2p z8eL&63n?eVqCnOcGSK|3H7+U56P~PrQDN*fi3Bu}Clrzs zm9?xmA&5)-a&F=@yxz^d1JgJDY9=W}6(zCrf{9^vu3CXS29`Vb!A)pZu<`1V=Z|XU z=O!m7uV@ZxvKX31Gs+6l%(B80MKdRo8D7zhu+udgY%?l{W~S@qNe~Tk8EtbJx{PtO z?fdU*)=W-`2@=ul^Sms^z`ux^Q4&^xq!rDMlItTm!ME_q*oq{;ur58bD8kl-fpM@! z&8F{6OzB$!Qchjgct#N8nsr`+)vAb^MW5oh7w53{IN=T2vfn%a6I?T0vtM&x8@$F{ z8@e`pZR94SpoMGCQX{RJ1@@ldwt->p!oU8ojAE{*8AfGpL+iVZ9>ssG}RqaX`_HtAF%{Ho0u-&t_TPGhB97TGT zHrwebHMoaYNC`FDs3FxRI0cvB2ECo2cgH=?ZIJnHL++xh*}9oheeGPIp^LY@RX(ir zHhC;{)YBkC=RIG$M%RY~|Mfv3aGl#2DthWITi-%gJ9oj--KhS_eXPwsI78QmpV@P- z9lOLsUlXfwk7xu6@2& z?R|z#t*T?gjr^ZlCB4c>3s=7yP{V5Pf=OS|d+nR=gFWD@x6k*h{r}LV_7z*!S-lQ+ zTYtNy^LivU%K;d%tKPfytq!Oi0;hU}Az^rd5k~&P z3j6NuzYRO)Ze!Q54D=4R_Nu3EL*i~d!$7;vpVm(3$G>(NStjiqVWt?_v|?B_{)Sm$ zS>`vu6Sxil<{Rbz2i%g5`M3r`i^D;rV>=!7Ao$t~6&!FNbKF}z0pF~M1kY!V8*e-` z+dL9ZaNyr*nck~;e#(Fh;&3$Mi3m^v4^eDF504Z2fzasp4xT80#8QBw5P$%dKuM)E zCyRix1c08{7mk{}JwRcS@ z?w}MEqxuv9DB~t0y7sk0yWM=JB?5po17K`r5%3)~8#K)36gu08uxo?MpqikITfZVS zG>ub~g&+@P0ANsKDJ3{S%tA(#)1wL}u7Y88Q5vwJU6@w^O>l@N>g%T+jB^KNJ}oKI zx+p}V+^=K`%encS1lED+Zb;K2KQhyY1bA`+)`ik&bq+34tM!5O$$V2+d__to(cIV> zrwc1%zXJyOXexVjZcb;|KsoI?yww6eF9LYG2;dDC4cw7n(f|q51q4tB5GuX0;EJe0 z`WoLRWNH`q5~8TOz!Xc>wk53gth6d7Yx}DeX0>y8okFjlz=C44AM0C_9i|=kTI=X_ zJojV0b#DS7_o?1>4ESTcYu4g^sDBG`Y@e31XH!{m0j?W>DeE%m5bEX5%GskC(j&mk}G? zHnGf=&y57(YLi5ann{MupqcRkM>Ly|qxa4<^fFV(z0>Hha}c^hxjn_N#^P|lke{%Q zowHB~O&8$;qgiWa)hu{r6-mURnQ+-^Hl#QXnZ)K6I8PQc0-2zF`J}Xvju@Ik?Yu|_ zqSX=O&i_J=yiz9-BTer-!~&G<3Cn5h^bUexf_P11719ImcmsMv2Eif#2lktJX!lPx z2-dK9wiViMgxdkcY3BXs-UIe|L*58**3bk_Dib%X8m>_8#07Q6X5AGS2E?$YA^ejz zsfIiYZ;#6CX+^II#DMi59#v5C+9R+k1d9+lwIA_$Q|r}2X+U3w*(*2nHzR!C)8D)i zR!yq;_bqzlEelNEs#-wPQ+eY?SFv&3s7r%ykh}%7w%aIg16XWoY7aTKnL{n|cGX&J zxfC@ytM+zYEc4-M#obbGZLi-5sutpF)m*i-w-Bs2Vwx4$br_LeVH`>eEMIECGQqat z-{^#^g#Dgl&jrfadW~5Ee)^+2+sDegTCXS@e$|0v!_2`oZBI*gTg%Rk&@)?hw64d! z+QBSM)Ai_q^=MK(sGrJS!3_Z0U?Y%bH+*oxbUouT1&0 zAfsEgs-EU7ZUk|pb(opF`@>hCaXH#!9RQuK^|9A#aWsG=oP-gKhRbU>-x|}eEYRmNfZhrEOjH`GEUMa04b9Zk7gIsxfBiEX-05SK%}&0 zOp0mEx*#g^^O;r6!e_I9jv#uGOA)+MYsR#=Lhu3!@f!%OkYSp{K#FEaXEifV2)`ET zCTFnR3g>qWVp|$3X)J_cDCCM^(83B_X~LpM&ol?h9YdEVYbH9jUZRIFoC-va>M;$Y zlv>bC7!A|dRn5E>gJ71%E>Rr``drNo0ahNuRx#?N$v!OeVDu6^=VGl?vt^}ZjCxzA zoaN(UEn?zCU1EB`gg^wFkuJ3lXmwz4^R;TAd-DwR*a zrOlUU@%LYad$*>yR_=6u{JV1aa3y@WaPCpC`%fcV;>U+RJycxSX8(Mp96VA99x1$5 z?dsV&Uc6N59jkPW7tTKJ?k)VH+SR+u7~N4yHrZ-u@L_25L1^?2d*{GCdpUHZ5<0SJ zs`d{Rhd#YjRPIdQS}FHOHyu@P&qMFf1Mg7rP}w_P@s5}1?@>?x*3!olpHAF4|7D_d z^74N^_r>yGPyWZrFFXJG^Dlm0n!Q{ed#~K{Ug7*#zR-=x)|uk@?Qm)2#QjLgH&gb_ zRD3gqQ;+<8H(oA|l>G-P{sV>6kG%fP%f*xBuITnqIq+QBd$i&`T9|$m2yKOn=gZv_ z+f(K6(Q@EeC2*{81}%P|9FA1N5maf4lA6WGj(x#?e&LIS2Zzs;4xicbYh0W|EwVS>FJ3NpO_sfrFn{bL zf7gu{wod)=r5i7mT--M>_Y8zT9dudHbwj40=nX&|2#MECh=+Fuphz?Fd|b1`IwyRB zhbXFT-8`=3(xT=B;E;i9i zFUa6*&;?Lr5eaDq*rGO62`(x!u^>0AoSR3LsCopCXbd+}aK%F4QWhbenUMh~X3_+Y zCC#*ykZOI#uC3tudJ64#f)Zl!8p`J9+h^&W3xnmUnp)JtB z5EYN?BN$C1P3PK&S@Z;C5DA9_gFR#f2|5?DAISkEqe##Rkq8oMHxsG}`s&|pG$RPE z8Ri!>Hyt~sD=ld3n&!r^bXF8%N&G(4%u?vDIp~YJ5Ar^8NBE?<>Q07^Vx~6{tri5< z^|b+Jrm;7_;?*^PASgn!`n*>I>c+`)F({SB#f^ zhbq291#7jVtI{!0Fn;9@Ze6}JR_goN7i*<=-YvN=mED&r?n?#3hfh?(CkpQG zY)toX@!IeJI9?7G47_oJ@ z?yvfKc0IPB<7)=Uw@Cim%Q!>V$2R4%qqpMd{cPrwnLEL9-(;n4a<`MQ`@ah^f&Pkr ze^G)l{m0Ax;}!q$f~^`JEx4;aqXiGT`oV|60}p}+igG!4xDq^kXSx!MZigREO+T2L zzCTi)I$N1Kd;hnUsdMGvxq`Fm2|e@-KJW|{jb+ciif3QJR1NlQP2E^1nu@=?Wi1Cs z3(j5BfPH9}`TvveUSXN=P-*PtFa4#^g>vXZC3K?KNz+~afSKuUncJsxLUMyNmkrUf~Ao{QDKUwjgEZ82~ z+z)O24{R`B0hptz*H;bnR>S>O|G=)n+}i;MW}Ud0uHbGjV;}gM>9E7x0okR5W9%J& za9($AxIc8moY*{na9?+Ccs}&N+?+S!TGdyqPQRwB_ghL^%b%5oo$;)y-5Dd}3S50t z#^~+8y*_-d&-k}K(>c!iw-FX-#76J~4CzFIO^6=}25I#a23!foO(Vm)^9o}D^iB4idOw3bh4&h z2FU@29Vs!xt-r^N{VM&dGX13=`aNcRrTVYRyijWYJzb(Vs50kE?Y}BBR%-uMnZu=L s{~j}arS{)$9}}Db^ap#dui6=?2157?aaDiM?*1hM8`x$3KBY$cKW8Q}y8r+H literal 0 HcmV?d00001 diff --git a/__pycache__/transcribe.cpython-311.pyc b/__pycache__/transcribe.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cac6337d8390c796bed9c63974cc35c18a71588e GIT binary patch literal 5529 zcmb7I-ES1v6`$Fi{a$-9#(X+35Yu>x?Ly)pK*$n;xj_Sm~Kn>(|1 zY^~dEgj5tEg&-u{mP&p|DHW)#`cze|ls}*kyJ(}VMzVy|=AmznXkPNv-?_81_8O>? z@%YaDJonsl?m6do_V*nfQ2|f)-@chjbqT`1sNp?6w<13K0F`@!DvS%NsQU6^!8h&` zsqWAF$Nl^(jZ64S`9L8!9xQ~$Lxu2on9m91BZcUA6!k7Oc0(GEshiafeB)}jx(VM7 z)qhn;Zc+Y;?XKB+Ubl74l5-}LwU2D3WNqD;l$DaIn{tks1$kCiHB(j$Ri4xgjVXK_ z6AdfN^rEeshBXw8j!o&7TrkyAUX!OVIS-oloGIIiHDeu%Mh^|i*DOswefr|%)EU_} zz0G8MN^9;$o6op=D$QMUI>_k;rYrJ$C*GB*9hGI-D(SW|fiYq`3l2&J&WSuR&k4Q< zZi*TshP3>UJl5I+Oft<<3dOvpf(xQ`=&(F< zs@`JGgS(9Av{tI~hEmY8knx1A+t6bY>OGh9R0oC((}q&ObDpG#U2}?_w+6shCUd1^ z(1B8nnJkl$iG4-4G}S#@mX@%ntA!%;rni)s*A#;oFBQ24ZLLr=nZo9|&Mi61G{wel zI^+utKz=ir9aW*GtZlOSq3D0edXl0OQw%W3Rbh=L{f=}>&)SZ30U~n(V+E$q;>{2QwF7ekUVAy{)z*L2&c}%d;OS9|uips2^|kkWEUKH}HM*2OOnR{)uX7oh z%$h*y5|9CBGHWu;WN@W$=eS{-CA(O%VHx?nJV6$--XJ}rC%uz#UOja9LW-=upx8t5 z31gm<(LRE5>GE)Yq1YQXYtc4mH8#g|vV$QRS2~v~6tzj1J1N-Wq!iq4V{k!ZPUgVtC z%Ou^BRhi&NT)7SbpQ9jxcmdDO165YZQl#D(U)?Ca<}r9%MwD__soJcb)#R*VkpBQ0 z$mx?Mir%w2!Y>GBGVh%|clC1WO8VlZQ>hE^NB}6@O4mkCrQSVvGKC?{nAMqSP)wC) z6{Zu+XhvpL%PFP2?V<{VwkC-)7e~E6u_2_$mW6x&jY171*fC! z^b28+U#HU>a1RfAFP5AwUgyb67cXBMOQpwtayd2XNW`-f%3*K&&|yb{Pd`F#$~~GB z&zc2zaV>4n7d1Gc(3D~UZ!l*wF`J%q@mQL3%l6^&I6l*ILQP>i{z=W|#+a0xD4&!j z^|Aw)$zG@F0p8p4Lu@yV40^>|s93*7Q4zk~B!m-RCiX5Lsf7mWp@CIF3?x?)d+Uk* zYPf%;tNZTGd%N%KUQYaZXRYgaz3X`8{7OgS*7&C%eDXoHci^E`>lm(g3|CHn-Q9Ed z*u9ZEBh`IJYTZZc-A5}Io`$;NI~sil9_F6(4OjbyZ_nSJ|Lc!###iFomv%lp{Y7sz zK3a>9*5jj;tD#JEuRz$rh!>z@)Gk#DAzPof5A9aWPyDE|c$m?y-?mc~3b! z-9kd`j%~xNsJEhh7S^{;u}oO+xu^YPYT&-VodV=cp|AOr!z#|uj^T>pq<|BhT8xxK z-{VA)?{%Wc^vg~ZneOzsQA5vjdnOvMJsxq?B>)imL+T(c>DXTStiaTmxg+T!k`l_C^H%^qXqa(Nu@Mx>IR3xJZ3r3wLP4mQY_Lj16*2b$79Ec5(I%?S)338 zB*jy9oH`}TWOg#@LiIP%!|^(MgKzEKIEv9Z(+Q)Urr!&kD3_pH_oLSl=lD@bJMQB~ z)f5q=;%)p}S0N-s)vZDxbmQ&Y;pO3~)L)bO>ry`|6i&C@>%G&vH1#l6>l&_i4Oh;8 zEg=>zkJqGwb?IPLI{0;bcP%d0<8meSv}fm6J^P>Z>|dV#qOaPszt(f6-gD*`qm{Ea z)y8Yd%GpNp^~(81V%w?^4)_V5Wd-kt%Z(r)ZzwNV; zyCaP)+wPvdH+pBZwxzGWrEfJP?TD@lD83P-NbJ8M#m8Ngv~N3U$f#bGgpf<_^<%rxQx2RWqATJJEDHS3*v!rPGW; zI=_@`D{4*<01dXyC>3Fpg)mA`oX#BoNy`Ij9E+{pSAHAV2+fwMAn*l?0VKoEeIx7| znq*4WUr?<2MNwS!B}8depkkjOzFHObt$&{ikqZB72whe0Z3unUm%mSi*Q)LBYH*u) bRa_OGD~G$qu2rGE80Zt_RpBKCKhysJ%Fw%g literal 0 HcmV?d00001 diff --git a/__pycache__/utils.cpython-311.pyc b/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c4f202100b86f021106aaf057e75124886032ee GIT binary patch literal 4567 zcmb_fO>7&-72YM6eOhxX804?Xl$kO6@Z3m7n31gLK+bknKd+ofnz zQH&lsO2ZJIfYaBBsb~i{3MUM zC+SIfC%q|QQs6notNIi{_00&1@47JQSNu*rfO-J+pb}IA&HfPTA*D+RV?;#lKr7}B zuCL3@_?6D|o8yw|xLe~P<}^4(U)ZScC= z;+Oa2JQZ5~TSOxC0SN=%N!PEL&sdp z6fHwc8gnX>B~u;7xO7sIG0;p?=J1)*tb~}+6)YtijF{-kBZm5ds6zlLnNCQ0CZ#e> z7UhJ*B-z3QQJt67wB_*G$U-qJbHtT0C0zwyE1`;t##Grd7`S35kVBeLNydz+iiYm+ zNg1Z)?A2T_Ej8uJlaees(d2PiO-R=?Bf~^RHD#t+hMLE9R3hr&q@jvEn@;tY^P2q5EJlStJqieFEsETAs zc1Si-mo;6}XVK#sIcW2;NmeI{Ykj&pH0wE3xQ^qqH4?w^$QXAkZlof^5cFYaey^x7faU@0jHR4FM+~N1piWVvh8U1<1-mZ zfm?}jk=P2y`+6GD?jrjpwyhr9U75L~)M7{Ku_J}o8{NG(`#oWy#7tB=Tm>RKYr)%orPL#v>qD;qi|nU=wsa^ zm&Y={WiRBbtFOU533Cb_#WR7tg2r#Bb_B`080C&{A<5&j6t5y|zz*WQIZFB3Z4r3p z-wd~4%YjF0Y_wn{aMj_?`f~iwu(P};?*#^VlwcM|l=CVf%ClgQ6Nmy|pTw=B$*mLI zz}Ml`-pF^5HXQj*X2So0u3*rBWi+JH|@;A+CFnNDhir2sI~VsLKA3b7KvAamFSElr06E><{eGPH{` zNduc9PghOTMXc7g$TP-g*N<(DA70}}*7##>wn#42i*I!pM%f~&e6SHkT-2^4a9oIUJClP|7l=O-0-j?2`EO>HL@sDt*H8KuH=F=bm zp~!CcU*7Taw@Zf>FD_qfH93y;keujYtPA8gmB;gusF0lRCu`& z?j(%qb$TA4XJ~1t^h&w67TR79ZAX8k>&BU-Go^{evE{MCs|{zLtbXPUF>wYLw ze4Capy|?sC@tH=nZ&9EbH2(17wKBh$tp*2I{9J5H>CNT&V$_AEK=UZtY=QujRJ-~m z2;q~$Uqy+OtMK`@yAk?4S*Uy?iaZ|qoH5|3csCh|8x`8ooA<(}h0k#B0dBatSMh%) z{MuL99OVK^upQ-q0-@~5b_gKvUw0h1)@DEe-$Cev>s($CC*Ha+;#d$PJ7mv;*xrMO z6}A(ltnc89BS&V~4(bv?5R1L=SjWiX0(*hl4-z>Cq>Rg9e1n_pov2xEB5rEARck@7W875i$pGloMJCuOxBKV9NeECZ#E~ zA-1?dsTm$8++a05O$IVO?S?8zB;clZL{igLJP=e#PKXFd&TV^ox@AVw(@s2l?24Z> z^x0-OR1AbT-LNnbH}kX;n-dG^1fC_9E0rn!v+xT@L=rlTXf`6Vh1Y}FwhUr%GASCe zoJlW;6kUxu-4Tw~Qt<$xz$spnk{Q)ugvZW?O*qjUfnT(kq+wgL;#@*Q5T>V%`v`Cl z;vu0DqkA)4e?OH+Ky1I1#>GKlTc{(zj#0-;)Zru9P21-v$_~gG#_*=H!_C4Jf|D&c zW9Vu`n1STNMu^U2iuBG-q3?(I%r%gW_t{V*61#DB>Fnb967guLN|5y-V!{XGL%eGJ;6?+jgHx@ zgRFRYo^NoWg7a%|J=OJpgL}HV*?+~~&2K5cvce&&xS3OajO-oY2g(yG9J0#WPPUP} s803#tcCT>A?tllfjb!U~zI%lODWM-3Ersk}-#z(HgHG`~iL>kf0!IUFhyVZp literal 0 HcmV?d00001 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5d575cc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +services: + video-render-new: + restart: unless-stopped + build: . + container_name: video-render-new + environment: + # RabbitMQ credentials + - RABBITMQ_PASS=${RABBITMQ_PASS} + - RABBITMQ_HOST=${RABBITMQ_HOST} + - RABBITMQ_USER=${RABBITMQ_USER} + - RABBITMQ_PORT=${RABBITMQ_PORT} + - RABBITMQ_QUEUE=${RABBITMQ_QUEUE} + - RABBITMQ_UPLOAD_QUEUE=${RABBITMQ_UPLOAD_QUEUE} + # API keys for the LLMs + - GEMINI_API_KEY=${GEMINI_API_KEY} + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - OPENROUTER_MODEL=${OPENROUTER_MODEL} + # Optional whisper settings + - WHISPER_MODEL=${WHISPER_MODEL} + - WHISPER_DEVICE=${WHISPER_DEVICE} + - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE} + volumes: + # Mount host directories into the container so that videos can be + # provided and outputs collected. These paths can be customised when + # deploying the stack. The defaults assume /root/videos and + # /root/outputs on the host. + - "/root/videos:/app/videos" + - "/root/outputs:/app/outputs" + command: "python -u main.py" + networks: + - dokploy-network + +networks: + dokploy-network: + external: true \ No newline at end of file diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..dc30f99 --- /dev/null +++ b/dockerfile @@ -0,0 +1,45 @@ +FROM python:3.11-slim + +# Create and set the working directory +WORKDIR /app + +# Prevent some interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install ffmpeg and other system dependencies. The list largely mirrors +# the original project but omits PostgreSQL development headers which are +# unused here. We include libgl1 and libglib2.0-0 so that MoviePy +# (through its dependencies) can find OpenGL and GLib when using the +# Pillow and numpy backends. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + build-essential \ + xvfb \ + xdg-utils \ + wget \ + unzip \ + libmagick++-dev \ + imagemagick \ + fonts-liberation \ + sox \ + bc \ + gsfonts && \ + rm -rf /var/lib/apt/lists/* + +# Copy dependency specification and install Python dependencies +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code +COPY . . + +# Declare volumes for videos and outputs. These paths correspond to the +# mount points defined in the docker-compose file. Using VOLUME here +# documents the intended persistent storage locations. +VOLUME ["/app/videos", "/app/outputs"] + +# The default command starts the consumer loop +CMD ["python", "-u", "main.py"] \ No newline at end of file diff --git a/llm.py b/llm.py new file mode 100644 index 0000000..f0a5a2a --- /dev/null +++ b/llm.py @@ -0,0 +1,234 @@ +"""High-level helpers for interacting with the Gemini and OpenRouter APIs. + +This module encapsulates all of the logic needed to call the LLM endpoints +used throughout the application. It uses the OpenAI Python client under the +hood because both Gemini and OpenRouter expose OpenAI-compatible APIs. + +Two functions are exposed: + +* ``select_highlights`` takes an SRT-like string (the transcription of a + video) and returns a list of highlight objects with start and end + timestamps and their corresponding text. It uses the Gemini model to + identify which parts of the video are most likely to engage viewers on + social media. +* ``generate_titles`` takes a list of highlight objects and returns a list + of the same objects enriched with a ``topText`` field, which contains a + sensational title for the clip. It uses the OpenRouter API with a model + specified via the ``OPENROUTER_MODEL`` environment variable. + +Both functions are resilient to malformed outputs from the models. They try +to extract the first JSON array found in the model responses; if that +fails, a descriptive exception is raised. These exceptions should be +handled by callers to post appropriate error messages back to the queue. +""" + +from __future__ import annotations + +import json +import os +import re +from typing import Any, Dict, List + +import openai + + +class LLMError(Exception): + """Raised when the LLM response cannot be parsed into the expected format.""" + + +def _extract_json_array(text: str) -> Any: + """Extract the first JSON array from a string. + + LLMs sometimes return explanatory text before or after the JSON. This + helper uses a regular expression to find the first substring that + resembles a JSON array (i.e. starts with '[' and ends with ']'). It + returns the corresponding Python object if successful, otherwise + raises a ``LLMError``. + """ + # Remove Markdown code fences and other formatting noise + cleaned = text.replace("`", "").replace("json", "") + # Find the first [ ... ] block + match = re.search(r"\[.*\]", cleaned, re.DOTALL) + if not match: + raise LLMError("Não foi possível encontrar um JSON válido na resposta da IA.") + json_str = match.group(0) + try: + return json.loads(json_str) + except json.JSONDecodeError as exc: + raise LLMError(f"Erro ao decodificar JSON: {exc}") + + +def select_highlights(srt_text: str) -> List[Dict[str, Any]]: + """Call the Gemini API to select highlight segments from a transcription. + + The input ``srt_text`` should be a string containing the transcription + formatted like an SRT file, with lines of the form + ``00:00:10,140 --> 00:01:00,990`` followed by the spoken text. + + Returns a list of dictionaries, each with ``start``, ``end`` and + ``text`` keys. On failure to parse the response, a ``LLMError`` is + raised. + """ + api_key = os.environ.get("GEMINI_API_KEY") + if not api_key: + raise ValueError("GEMINI_API_KEY não definido no ambiente") + + model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") + + # Initialise client for Gemini. The base_url points to the + # generativelanguage API; see the official docs for details. + client = openai.OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/") + + # System prompt: instructs Gemini how to behave. + system_prompt = ( + "Você é um assistente especializado em selecionar **HIGHLIGHTS** de vídeo " + "a partir da transcrição com timestamps.\n" + "Sua única função é **selecionar os trechos** conforme solicitado.\n" + "- **Não resuma, não interprete, não gere comentários ou textos complementares.**\n" + "- **Retorne a resposta exatamente no formato proposto pelo usuário**, sem adicionar ou remover nada além do pedido.\n" + "- Cada trecho selecionado deve ter **no mínimo 60 segundos e no máximo 120 segundos** de duração.\n" + "- Sempre responda **em português (PT-BR)**." + ) + + # Base prompt: describes how to select highlights and the format to return. + base_prompt = ( + "Você assumirá o papel de um especialista em Marketing e Social Media, " + "sua tarefa é selecionar as melhores partes de uma transcrição que irei fornecer.\n\n" + "## Critérios de Seleção\n\n" + "- Escolha trechos baseando-se em:\n" + " - **Picos de emoção ou impacto**\n" + " - **Viradas de assunto**\n" + " - **Punchlines** (frases de efeito, momentos de virada)\n" + " - **Informações-chave**\n\n" + "## Regras Rápidas\n\n" + "- Sempre devolver pelo menos 3 trechos, não possui limite máximo\n" + "- Garanta que cada trecho fique com no MÍNIMO 60 segundos e no MÁXIMO 120 segundos.\n" + "- Nenhum outro texto além do JSON final.\n\n" + "## Restrições de Duração\n\n" + "- **Duração mínima do trecho escolhido:** 60 segundos\n" + "- **Duração máxima do trecho escolhido:** 90 a 120 segundos\n\n" + "## Tarefa\n\n" + "- Proponha o **máximo de trechos** com potencial, mas **sempre devolva no mínimo 3 trechos**.\n" + "- Extraia os trechos **apenas** da transcrição fornecida abaixo.\n\n" + "## IMPORTANTE\n" + "- Cada trecho deve ter no mínimo 60 segundos, e no máximo 120 segundos. Isso é indiscutível\n\n" + "## Entrada\n\n" + "- Transcrição:\n\n" + f"{srt_text}\n\n" + "## Saída\n\n" + "- Retorne **somente** a lista de trechos selecionados em formato JSON, conforme o exemplo abaixo.\n" + "- **Não escreva comentários ou qualquer texto extra.**\n" + "- No atributo \"text\", inclua o texto presente no trecho escolhido.\n\n" + "### Exemplo de Conversão\n\n" + "#### De SRT:\n" + "00:00:10,140 --> 00:01:00,990\n" + "Exemplo de escrita presente no trecho\n\n" + "#### Para JSON:\n" + "[\n" + " {\n" + " \"start\": \"00:00:10,140\",\n" + " \"end\": \"00:01:00,990\",\n" + " \"text\": \"Exemplo de escrita presente no trecho\"\n" + " }\n" + "]\n" + ) + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": base_prompt}, + ] + try: + response = client.chat.completions.create(model=model, messages=messages) + except Exception as exc: + raise LLMError(f"Erro ao chamar a API Gemini: {exc}") + # Extract message content + content = response.choices[0].message.content if response.choices else None + if not content: + raise LLMError("A resposta da Gemini veio vazia.") + result = _extract_json_array(content) + if not isinstance(result, list): + raise LLMError("O JSON retornado pela Gemini não é uma lista.") + return result + + +def generate_titles(highlights: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Call the OpenRouter API to generate a title (topText) for each highlight. + + The ``highlights`` argument should be a list of dictionaries as returned + by ``select_highlights``, each containing ``start``, ``end`` and ``text``. + This function adds a ``topText`` field to each dictionary using the + OpenRouter model specified via the ``OPENROUTER_MODEL`` environment + variable. If parsing fails, an ``LLMError`` is raised. + """ + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + raise ValueError("OPENROUTER_API_KEY não definido no ambiente") + model = os.environ.get("OPENROUTER_MODEL") + if not model: + raise ValueError("OPENROUTER_MODEL não definido no ambiente") + # Create client for OpenRouter + client = openai.OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1") + + # Compose prompt: instruct to generate titles only + prompt_header = ( + "Você é um especialista em Marketing Digital e Criação de Conteúdo Viral.\n\n" + "Sua tarefa é criar **títulos sensacionalistas** (*topText*) para cada trecho " + "de transcrição recebido em formato JSON.\n\n" + "## Instruções\n\n" + "- O texto deve ser **chamativo, impactante** e com alto potencial de viralização " + "em redes sociais, **mas sem sair do contexto do trecho**.\n" + "- Use expressões fortes e curiosas, mas **nunca palavras de baixo calão**.\n" + "- Cada *topText* deve ter **no máximo 2 linhas**.\n" + "- Utilize **exclusivamente** o conteúdo do trecho; não invente fatos.\n" + "- Não adicione comentários, explicações, ou qualquer texto extra na resposta.\n" + "- Responda **apenas** no seguinte formato (mantendo as chaves e colchetes):\n\n" + "[\n {\n \"start\": \"00:00:10,140\",\n \"end\": \"00:01:00,990\",\n \"topText\": \"Título impactante\"\n }\n]\n\n" + "## Observações:\n\n" + "- Nunca fuja do contexto do trecho.\n" + "- Não invente informações.\n" + "- Não utilize palavrões.\n" + "- Não escreva nada além do JSON de saída.\n\n" + "Aqui estão os trechos em JSON:\n" + ) + # Compose input JSON for the model + json_input = json.dumps(highlights, ensure_ascii=False) + full_message = prompt_header + json_input + messages = [ + { + "role": "system", + "content": "Você é um assistente útil e objetivo." + }, + { + "role": "user", + "content": full_message + }, + ] + try: + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0.7, + ) + except Exception as exc: + raise LLMError(f"Erro ao chamar a API OpenRouter: {exc}") + content = response.choices[0].message.content if response.choices else None + if not content: + raise LLMError("A resposta da OpenRouter veio vazia.") + result = _extract_json_array(content) + if not isinstance(result, list): + raise LLMError("O JSON retornado pela OpenRouter não é uma lista.") + # Merge topText back into highlights + # We assume the result list has the same order and length as input highlights + enriched: List[Dict[str, Any]] = [] + input_map = {(item["start"], item["end"]): item for item in highlights} + for item in result: + key = (item.get("start"), item.get("end")) + original = input_map.get(key) + if original is None: + # If the model returns unexpected entries, skip them + continue + enriched_item = original.copy() + # Only topText is expected + enriched_item["topText"] = item.get("topText", "").strip() + enriched.append(enriched_item) + return enriched \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..32fd1d1 --- /dev/null +++ b/main.py @@ -0,0 +1,266 @@ +"""Entry point for the video processing pipeline. + +This script listens to a RabbitMQ queue for new video processing tasks. When +a message arrives, it performs the following steps: + +1. Creates a working directory for the video based off of its filename. +2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce + a transcription with word-level timestamps. +3. Uses the Gemini model to determine which parts of the video have the + highest potential for engagement. These highlight segments are + represented as a list of objects containing start/end timestamps and + text. +4. Uses the OpenRouter model to generate a sensational title for each + highlight. Only the ``topText`` field is kept; the description is + intentionally omitted since the caption will be burned into the video. +5. Cuts the original video into individual clips corresponding to each + highlight and renders them vertically with a title above and a dynamic + caption below. +6. Publishes a message to the upload queue with information about the + generated clips. On success, this message contains the list of output + files. On failure, ``hasError`` will be set to ``True`` and the + ``error`` field will describe what went wrong. +7. Cleans up temporary files (audio, transcript, working directory) and + deletes the original source video from the ``videos`` directory to + conserve disk space. + +The queue names and RabbitMQ credentials are configured via environment +variables. See the accompanying ``docker-compose.yml`` for defaults. +""" + +from __future__ import annotations + +import json +import os +import shutil +import time +import traceback +from typing import Any, Dict, List + +import pika + +from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds +from .transcribe import transcribe +from .llm import LLMError, select_highlights, generate_titles +from .render import render_clip + + +# Environment variables with sensible defaults +RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq") +RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672)) +RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin") +RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS") +RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render") +RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload") + +if not RABBITMQ_PASS: + raise RuntimeError("RABBITMQ_PASS não definido no ambiente") + + +def get_next_message() -> Any: + """Retrieve a single message from the RABBITMQ_QUEUE. + + Returns ``None`` if no messages are available. This helper opens a new + connection for each call to avoid keeping stale connections alive. + """ + credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) + parameters = pika.ConnectionParameters( + host=RABBITMQ_HOST, + port=RABBITMQ_PORT, + credentials=credentials, + heartbeat=60, + blocked_connection_timeout=300, + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE) + if method_frame: + channel.basic_ack(method_frame.delivery_tag) + connection.close() + return body + connection.close() + return None + + +def publish_to_queue(payload: Dict[str, Any]) -> None: + """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE.""" + credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) + parameters = pika.ConnectionParameters( + host=RABBITMQ_HOST, + port=RABBITMQ_PORT, + credentials=credentials, + heartbeat=60, + blocked_connection_timeout=300, + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True) + channel.basic_publish( + exchange="", + routing_key=RABBITMQ_UPLOAD_QUEUE, + body=json.dumps(payload), + properties=pika.BasicProperties(delivery_mode=2), + ) + connection.close() + + +def build_srt(segments: List[Dict[str, Any]]) -> str: + """Build an SRT-like string from a list of segments. + + Each segment should have ``start``, ``end`` and ``text`` fields. The + timestamps are converted to the ``HH:MM:SS,mmm`` format expected by + the Gemini prompt. Segments are separated by a blank line. + """ + lines = [] + for seg in segments: + start_ts = seconds_to_timestamp(seg["start"]) + end_ts = seconds_to_timestamp(seg["end"]) + lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}") + return "\n\n".join(lines) + + +def process_message(data: Dict[str, Any]) -> Dict[str, Any]: + """Process a single video task described in ``data``. + + Returns the payload to be sent to the upload queue. Raises an + exception on failure; the caller is responsible for catching it and + posting an error payload. + """ + filename = data.get("filename") + if not filename: + raise ValueError("Campo 'filename' ausente na mensagem") + url = data.get("url") + video_id = data.get("videoId") + # Determine source video path; n8n stores videos in the 'videos' directory + video_path = os.path.join("videos", filename) + if not os.path.exists(video_path): + raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}") + # Sanitize the filename to use as directory name + base_no_ext = os.path.splitext(filename)[0] + sanitized = sanitize_filename(base_no_ext) + work_dir = os.path.join("app", "videos", sanitized) + # Transcribe video + segments, words = transcribe(video_path, work_dir) + # Build SRT string + srt_str = build_srt(segments) + # Call Gemini to select highlights + highlights = select_highlights(srt_str) + # Convert start/end times to floats and keep original strings for openrouter + for item in highlights: + item["start"] = item["start"].strip() + item["end"] = item["end"].strip() + # Generate titles + titles = generate_titles(highlights) + # Render clips + output_dir = os.path.join("outputs", sanitized) + processed_files: List[str] = [] + for idx, item in enumerate(titles, start=1): + start_sec = timestamp_to_seconds(item.get("start")) + end_sec = timestamp_to_seconds(item.get("end")) + # Extract relative words for caption + relative_words = [] + for w in words: + # Word must overlap clip interval + if w["end"] <= start_sec or w["start"] >= end_sec: + continue + rel_start = max(0.0, w["start"] - start_sec) + rel_end = max(0.0, w["end"] - start_sec) + relative_words.append({ + "start": rel_start, + "end": rel_end, + "word": w["word"], + }) + # If no words found (e.g. silence), create a dummy word to avoid errors + if not relative_words: + relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""}) + out_path = render_clip( + video_path=video_path, + start=start_sec, + end=end_sec, + top_text=item.get("topText", ""), + words=relative_words, + out_dir=output_dir, + base_name=sanitized, + idx=idx, + ) + processed_files.append(out_path) + # Compose payload + payload = { + "videosProcessedQuantity": len(processed_files), + "filename": filename, + "processedFiles": processed_files, + "url": url, + "videoId": video_id, + "hasError": False, + "error": None, + } + # Clean up working directory and original video + shutil.rmtree(work_dir, ignore_errors=True) + try: + os.remove(video_path) + except FileNotFoundError: + pass + return payload + + +def main(): + print(" [*] Esperando mensagens. Para sair: CTRL+C") + while True: + body = get_next_message() + if body is None: + time.sleep(5) + continue + try: + data = json.loads(body) + except Exception: + print("⚠️ Mensagem inválida recebida (não é JSON)") + continue + try: + result = process_message(data) + except Exception as exc: + # Print stack trace for debugging + traceback.print_exc() + # Attempt to clean up any directories based on filename + filename = data.get("filename") + sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else "" + work_dir = os.path.join("app", "videos", sanitized) if sanitized else None + output_dir = os.path.join("outputs", sanitized) if sanitized else None + # Remove working and output directories + if work_dir: + shutil.rmtree(work_dir, ignore_errors=True) + if output_dir: + shutil.rmtree(output_dir, ignore_errors=True) + # Remove original video if present + video_path = os.path.join("videos", filename) if filename else None + if video_path and os.path.exists(video_path): + try: + os.remove(video_path) + except Exception: + pass + # Build error payload + error_payload = { + "videosProcessedQuantity": 0, + "filename": filename, + "processedFiles": [], + "url": data.get("url"), + "videoId": data.get("videoId"), + "hasError": True, + "error": str(exc), + } + try: + publish_to_queue(error_payload) + print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") + except Exception as publish_err: + print(f"Erro ao publicar mensagem de erro: {publish_err}") + continue + # On success publish payload + try: + publish_to_queue(result) + print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") + except Exception as publish_err: + print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}") + # Loop continues + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/render.py b/render.py new file mode 100644 index 0000000..539324e --- /dev/null +++ b/render.py @@ -0,0 +1,205 @@ +"""Rendering logic for producing vertical clips with dynamic captions. + +This module defines a single function ``render_clip`` which takes a video +segment and produces a vertical clip suitable for social media. Each clip +contains three regions: + +* A top region (480px high) showing a title generated by an LLM. +* A middle region (960px high) containing the original video, scaled to + fit horizontally while preserving aspect ratio and centred vertically. +* A bottom region (480px high) showing a dynamic caption. The caption + displays a sliding window of three to five words from the transcript, + colouring the currently spoken word differently to draw the viewer's + attention. + +The function uses the MoviePy library to compose the various elements and +writes the resulting video to disk. It returns the path to the created +file. +""" + +from __future__ import annotations + +import os +from typing import Dict, List + +import numpy as np +from moviepy.video.io.VideoFileClip import VideoFileClip +from moviepy.video.VideoClip import ColorClip, VideoClip +from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip +from moviepy.video.VideoClip import TextClip +from PIL import Image, ImageDraw, ImageFont + +from .utils import wrap_text + + +def render_clip( + video_path: str, + start: float, + end: float, + top_text: str, + words: List[Dict[str, float]], + out_dir: str, + base_name: str, + idx: int, + # Use a widely available system font by default. DejaVuSans is installed + # in most Debian-based containers. The caller can override this path. + font_path: str = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", + final_width: int = 1080, + final_height: int = 1920, + top_h: int = 480, + middle_h: int = 960, + bottom_h: int = 480, + video_codec: str = "libx264", + bitrate: str = "3000k", +) -> str: + """Render a single clip with title and dynamic caption. + + Parameters + ---------- + video_path: str + Path to the source video file. + start: float + Start time of the clip in seconds. + end: float + End time of the clip in seconds. + top_text: str + The title to display in the top region. + words: List[Dict[str, float]] + List of word-level timestamps for this clip. Each dict must have + ``start``, ``end`` and ``word`` keys. The start and end values + should be relative to the beginning of this clip (i.e. start at 0). + out_dir: str + Directory where the output file should be saved. The function + creates this directory if it doesn't exist. + base_name: str + Base name of the original video (sanitized). Used to build the + output filename. + idx: int + Index of the clip. Output will be named ``clip_{idx}.mp4``. + font_path: str + Path to the TrueType font to use for both title and caption. + final_width: int + Width of the final video in pixels. + final_height: int + Height of the final video in pixels. + top_h: int + Height of the title area in pixels. + middle_h: int + Height of the video area in pixels. + bottom_h: int + Height of the caption area in pixels. + video_codec: str + FFmpeg codec to use when writing the video. + bitrate: str + Bitrate for the output video. + + Returns + ------- + str + The path to the rendered video file. + """ + os.makedirs(out_dir, exist_ok=True) + # Extract the segment from the source video + with VideoFileClip(video_path) as clip: + segment = clip.subclip(start, end) + dur = segment.duration + # Background + bg = ColorClip(size=(final_width, final_height), color=(0, 0, 0), duration=dur) + # Resize video to fit width + video_resized = segment.resize(width=final_width) + # Compute vertical position to centre in the middle region + y = top_h + (middle_h - video_resized.h) // 2 + video_resized = video_resized.set_position((0, y)) + + # Build title clip + # Wrap the title to avoid overflow + wrapped_lines = wrap_text(top_text, max_chars=40) + wrapped_title = "\n".join(wrapped_lines) + title_clip = TextClip( + wrapped_title, + font=font_path, + fontsize=70, + color="white", + method="caption", + size=(final_width, top_h), + align="center", + ).set_duration(dur).set_position((0, 0)) + + # Prepare font for caption rendering + pil_font = ImageFont.truetype(font_path, size=60) + default_color = (255, 255, 255) # white + highlight_color = (255, 215, 0) # gold-like yellow + + # Precompute widths of a space and bounding box height for vertical centering + space_width = pil_font.getbbox(" ")[2] - pil_font.getbbox(" ")[0] + bbox = pil_font.getbbox("A") + text_height = bbox[3] - bbox[1] + + def make_caption_frame(t: float): + """Generate an image for the caption at time t.""" + # Determine current word index + idx_cur = 0 + for i, w in enumerate(words): + if w["start"] <= t < w["end"]: + idx_cur = i + break + if t >= w["end"]: + idx_cur = i + # Define window of words to display: show up to 5 words + start_idx = max(0, idx_cur - 2) + end_idx = min(len(words), idx_cur + 3) + window = words[start_idx:end_idx] + # Compute widths for each word + word_sizes = [] + for w in window: + bbox = pil_font.getbbox(w["word"]) + word_width = bbox[2] - bbox[0] + word_sizes.append(word_width) + total_width = sum(word_sizes) + space_width * (len(window) - 1 if window else 0) + # Create blank image for caption area + img = Image.new("RGB", (final_width, bottom_h), color=(0, 0, 0)) + draw = ImageDraw.Draw(img) + x = int((final_width - total_width) / 2) + y_pos = int((bottom_h - text_height) / 2) + for j, w in enumerate(window): + color = highlight_color if (start_idx + j) == idx_cur else default_color + draw.text((x, y_pos), w["word"], font=pil_font, fill=color) + x += word_sizes[j] + space_width + return np.array(img) + + caption_clip = VideoClip(make_frame=make_caption_frame, duration=dur) + caption_clip = caption_clip.set_position((0, final_height - bottom_h)) + + # Compose final clip + final = CompositeVideoClip([ + bg, + video_resized, + title_clip, + caption_clip, + ], size=(final_width, final_height)) + # Use the original audio from the video segment + final_audio = segment.audio + if final_audio is not None: + final = final.set_audio(final_audio) + # Define output path + out_path = os.path.join(out_dir, f"clip_{idx}.mp4") + # Write to disk + final.write_videofile( + out_path, + codec=video_codec, + fps=30, + bitrate=bitrate, + audio_codec="aac", + preset="ultrafast", + ffmpeg_params=[ + "-tune", "zerolatency", + "-pix_fmt", "yuv420p", + "-profile:v", "high", + "-level", "4.1", + ], + threads=4, + ) + # Close clips to free resources + final.close() + segment.close() + return out_path \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f5ce0c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pika==1.3.2 +moviepy==2.0.0 +faster-whisper==1.2.0 +openai==1.16.0 +numpy==1.26.4 +Pillow==10.1.0 +unidecode==1.3.6 \ No newline at end of file diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..8cb4739 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,111 @@ +"""Utilities for extracting audio from video and generating transcriptions. + +This module handles two tasks: + +1. Use FFMPEG to extract the audio track from a video file into a WAV file + suitable for consumption by the Whisper model. The audio is resampled to + 16 kHz mono PCM as required by Whisper. +2. Use the Faster-Whisper implementation to generate a transcription with + word-level timestamps. The transcription is returned both as a list of + segments (for building an SRT) and as a flattened list of words (for + building dynamic subtitles). + +If FFMPEG is not installed or fails, a ``RuntimeError`` is raised. The caller +is responsible for cleaning up the temporary files created in the working +directory. +""" + +from __future__ import annotations + +import os +import subprocess +from typing import Dict, List, Tuple + +from faster_whisper import WhisperModel + + +def extract_audio_ffmpeg(video_path: str, audio_path: str) -> None: + """Use FFMPEG to extract audio from ``video_path`` into ``audio_path``. + + The output will be a 16 kHz mono WAV file in PCM S16LE format. Any + existing file at ``audio_path`` will be overwritten. If ffmpeg returns + a non-zero exit code, a ``RuntimeError`` is raised with the stderr. + """ + cmd = [ + "ffmpeg", + "-y", # overwrite output + "-i", + video_path, + "-vn", # disable video recording + "-acodec", + "pcm_s16le", + "-ar", + "16000", + "-ac", + "1", + audio_path, + ] + proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if proc.returncode != 0: + raise RuntimeError(f"FFMPEG error: {proc.stderr.decode(errors='ignore')}") + + +def load_whisper_model() -> WhisperModel: + """Instantiate and cache a Faster-Whisper model. + + The model name and device can be configured via the ``WHISPER_MODEL`` and + ``WHISPER_DEVICE`` environment variables. The default model is + ``large-v3`` for best accuracy. The device can be ``cuda`` or ``cpu``. + A module-level cache is used to prevent loading the model multiple times. + """ + if hasattr(load_whisper_model, "_cache"): + return load_whisper_model._cache # type: ignore[attr-defined] + model_name = os.environ.get("WHISPER_MODEL", "large-v3") + device = os.environ.get("WHISPER_DEVICE", "cpu") + # Compute type can be set via WHISPER_COMPUTE_TYPE; default to float16 on GPU + compute_type = os.environ.get("WHISPER_COMPUTE_TYPE") + # If not explicitly set, choose sensible defaults + if compute_type is None: + compute_type = "float16" if device == "cuda" else "int8" + model = WhisperModel(model_name, device=device, compute_type=compute_type) + load_whisper_model._cache = model # type: ignore[attr-defined] + return model + + +def transcribe(video_path: str, work_dir: str) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]: + """Transcribe a video file using Faster-Whisper. + + ``video_path`` is the path to the video to transcribe. ``work_dir`` is a + directory where temporary files will be stored (audio file and + transcription). The function returns a tuple ``(segments, words)`` where + ``segments`` is a list of dictionaries with ``start``, ``end`` and + ``text`` fields, and ``words`` is a flat list of dictionaries with + ``start``, ``end`` and ``word`` fields covering the entire video. + The timestamps are expressed in seconds as floats. + """ + os.makedirs(work_dir, exist_ok=True) + audio_path = os.path.join(work_dir, "audio.wav") + # Extract audio + extract_audio_ffmpeg(video_path, audio_path) + # Load Whisper model + model = load_whisper_model() + # Run transcription with word-level timestamps + segments, info = model.transcribe(audio_path, word_timestamps=True) + seg_list: List[Dict[str, float]] = [] + words_list: List[Dict[str, float]] = [] + for seg in segments: + seg_list.append({ + "start": float(seg.start), + "end": float(seg.end), + "text": seg.text.strip(), + }) + # Each segment may contain words attribute + for w in getattr(seg, "words", []) or []: + words_list.append({ + "start": float(w.start), + "end": float(w.end), + "word": w.word, + }) + # Sort words by start time to be safe + words_list.sort(key=lambda d: d["start"]) + return seg_list, words_list \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..c8f9dbc --- /dev/null +++ b/utils.py @@ -0,0 +1,93 @@ +import re +import unicodedata +from typing import List, Tuple + + +def sanitize_filename(name: str) -> str: + """Return a sanitized version of a filename. + + This helper removes accents, converts to lowercase, replaces spaces + with underscores and removes any non alphanumeric characters except + underscores and dots. This makes the directory names safe to use on + most filesystems and matches the behaviour described in the spec. + """ + if not name: + return "" + # Decompose Unicode characters and strip accents + nfkd_form = unicodedata.normalize("NFKD", name) + no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c)) + # Replace spaces with underscores + no_spaces = no_accents.replace(" ", "_") + # Lowercase and remove any character that is not a letter, digit, dot or underscore + sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces) + return sanitized + + +def timestamp_to_seconds(ts: str) -> float: + """Convert a timestamp in HH:MM:SS,mmm format to seconds. + + The Gemini and OpenRouter prompts use timestamps formatted with a comma + as the decimal separator. This helper splits the string into hours, + minutes and seconds and returns a float expressed in seconds. + """ + if ts is None: + return 0.0 + ts = ts.strip() + if not ts: + return 0.0 + # Replace comma by dot for decimal seconds + ts = ts.replace(",", ".") + parts = ts.split(":") + parts = [float(p) for p in parts] + if len(parts) == 3: + h, m, s = parts + return h * 3600 + m * 60 + s + elif len(parts) == 2: + m, s = parts + return m * 60 + s + else: + # only seconds + return parts[0] + + +def seconds_to_timestamp(seconds: float) -> str: + """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT.""" + if seconds < 0: + seconds = 0 + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = seconds % 60 + # Format with comma as decimal separator and three decimal places + return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",") + + +def wrap_text(text: str, max_chars: int = 80) -> List[str]: + """Simple word-wrap for a string. + + Splits ``text`` into a list of lines, each at most ``max_chars`` + characters long. This does not attempt to hyphenate words – a word + longer than ``max_chars`` will occupy its own line. The return value + is a list of lines without trailing whitespace. + """ + if not text: + return [] + words = text.split() + lines: List[str] = [] + current: List[str] = [] + current_len = 0 + for word in words: + # If adding this word would exceed the max, flush current line + if current and current_len + 1 + len(word) > max_chars: + lines.append(" ".join(current)) + current = [word] + current_len = len(word) + else: + # Add to current line + if current: + current_len += 1 + len(word) + else: + current_len = len(word) + current.append(word) + if current: + lines.append(" ".join(current)) + return lines \ No newline at end of file