From 0c0a9c3b5c5e6e50ee7308bbff077b85621e3a02 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Fri, 17 Oct 2025 09:27:50 -0300 Subject: [PATCH 01/15] Inicia novos recursos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dentre eles estão recurso de adicao do faster-whisper, geração de legenda e integracao com Gemini e Open Router --- .DS_Store | Bin 0 -> 6148 bytes __init__.py | 1 + __pycache__/llm.cpython-311.pyc | Bin 0 -> 11635 bytes __pycache__/main.cpython-311.pyc | Bin 0 -> 13063 bytes __pycache__/render.cpython-311.pyc | Bin 0 -> 8779 bytes __pycache__/transcribe.cpython-311.pyc | Bin 0 -> 5529 bytes __pycache__/utils.cpython-311.pyc | Bin 0 -> 4567 bytes docker-compose.yml | 35 ++++ dockerfile | 45 +++++ llm.py | 234 ++++++++++++++++++++++ main.py | 266 +++++++++++++++++++++++++ render.py | 205 +++++++++++++++++++ requirements.txt | 7 + transcribe.py | 111 +++++++++++ utils.py | 93 +++++++++ 15 files changed, 997 insertions(+) create mode 100644 .DS_Store create mode 100644 __init__.py create mode 100644 __pycache__/llm.cpython-311.pyc create mode 100644 __pycache__/main.cpython-311.pyc create mode 100644 __pycache__/render.cpython-311.pyc create mode 100644 __pycache__/transcribe.cpython-311.pyc create mode 100644 __pycache__/utils.cpython-311.pyc create mode 100644 docker-compose.yml create mode 100644 dockerfile create mode 100644 llm.py create mode 100644 main.py create mode 100644 render.py create mode 100644 requirements.txt create mode 100644 transcribe.py create mode 100644 utils.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..38734ca2de71d90578b12a191d5ff30a57f26d5c GIT binary patch literal 6148 zcmeHKJ8Hu~5S@u#2;8`IxmU;y7U7)02atb(6aoc8igc=cE+5TrJ{W}TCXgn)ftj~E znx|!7q0xwlw%_Mhk+q04a6`FRn43K}pV>=h6bQ#VPI7>M$h&m2>c}_IdtuEgkdX>d0V;4;z`hR!Zden?K>u_g_y_=8 zBJGB?&l13531Cee1CfDgP=P_!95FQL$d|0EiDO{UMRWMjJXv!>Q9m8;FJ3NM0~x6R z6__fpi0#_?{~P?t{68geM+KA!1r*gxx&q`b_#;GW1zQV hY^)t`yeR65t?|4jj)6``-swR8445u7D)83|+yF)|6_)@2 literal 0 HcmV?d00001 diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..b437409 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +"""Top-level package for the video processing pipeline.""" \ No newline at end of file diff --git a/__pycache__/llm.cpython-311.pyc b/__pycache__/llm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36d44a6197b1726f4c07bd4fd0b04731d74e0161 GIT binary patch literal 11635 zcmd5?TWlLwdLCXyQsU@B*>Y~X6Du2=j6_?`U5>SpEZJ6KNnTkAHp;rxh%*#N8O|^> zL)lWOKpX6X8@Nc&E)eexRkn*wO;m-3m+luo4s+ZEWu-W*%4@PJ z6lJw68>Udu4MEW?*%0%VqOAy56{{#%MOip6mlRDAL`@Rr%Ca`ES5Qe9zcg)T6N!bQ zVhSZ)s;IIcYk9G3R#efFO+i#uK`$@?s=lJ+1x=PESrRN=$Ws|fzIbs44@qSm?V5>- zNySA&udEc&2CE^K%c_zWEk)O|!n7rzj>!tBC*#w?rFE;QYeHUCWDTX7gq}fiqNwYV zuq@|AJRmIV7|HENAzv%&sNqN(&g-Qz+FHg~yy0Edg+fJxMsy8hGB8N?gfyB+WQ3(9 zQ?l`BG>D*i2VX-TlemuUQ&aCv@VSXEw<1=BJ}qsM8?mLY0p-cZUUk~AbH1Yu2) zWIfGPY{*u{ptb~6F)ga#3m5d|RkUV0Q)619VX;S`LddX`l8k$$GPO;LlodnB>xLnl zWnGh~1D1TvYFDGH6-*#3!O|fHJ<%+sur6FJD*2*NMkA&-k*u3c3nd+Wqp_*$G^4V% zBCg07hJ2MwN!K9A)bom{3ME-m#4JmMXWX>KleiLupqDM7wrniY3|C ziL4puFD!`Jt%$j?^zs4*j2S5?vMLR^Bg*TVB`Tz0>ZGih%(~E1)+^=)G*2+8a=WhD zjo)jBsg5L-<-CH1BxpfoCc3mVcWG*Ne(t%2srlT@+~m|nbVb(I6hqfaWUgzXp@=Xx zxX}ceXuF|d-W12Iq2Q9J!at!)0iIE=U`h&xUSiF#4wzYCp(tZ6jP(S~$2FK)-f^LV zVqmVGxjZ+EnKQ(7j1YbTS8qNI4U>()w@wQR`K)Lq3ZkN#Loh>0cFlE-j6W|sh6}GZ zLD{=)2`VWe2>44oSYMO4b88c_oAv-j#t=r*AC2!f`i}07c8wA-`!1)pQ zvJV$;aWc0UK$QICj)=8w1gv%e$(; zCCAZf6v9gN<)7j1TU?dfrn?6YtUkeaf>>Gb?;P9SMccOoWwzJb3|Nrw9jpHHQn(th z{9hcm>ep&t!8U`{pxJH3Jqa9Ex_8jm5ByQQ7N`cd-Bq<&i>{<-d(F*IoU@Y6I;$Pt zbv4vp^D;NcSzTT!dPnMK#{--(uo-UaPe(QUGwv6`b_uTqq}XQo)$rB8Rqk5o1@3Af z9TydJdg>e9ksMic*9e3H0SAl^ZlsrF7S!DUNK20@YNDkZaCFQw>=9On5NHTuf!1Vl z7;<#-NyTK6oYmb~H$@0q!SZa7B-pk|D-i6spg8LVE+|x(m1TFqVu67AgUWIVk&2e3 zmPm43$;w&uoUIWo?jC#bn71Uk%ha(e$I=u>S#P{Z&H`-P(&R4W&I(6C$gJe^=whLw z4haa3vT+sB#t}lENsHL&o&vWjkodcGQ$bBVV+mlXDB>7cVj|wj(RZZOR5YvK>)%Q8Mjr zdVZ4R$t(_~BSsGucjQ%B)MUwyAgDl2jE0mWw+MkFM`ocwNp7a;OAI17Uqqk zC^?3I^KWt5;`aKv?$jIV>*~#iw}W+lu)z<0#y|8a|ImBqKAx!ak2mHU;B)?PZQ%QL{)GnrLM`^fZg>A1ue|=s&5=KU z^~S4P7xsdo$m!3!Q#Uql9ilSf zAFahk|A=ERgqrv^kD%B8bSgCwntjhh2lj6S!ZAPR-H7s@hJScTlBlNuz+>FM# zYG^g;jj0*_w*BztmC<)KSdD&|+}BQr_5ea7Eu!wh0%FWMZ_XXv$nb)PSWtH#?)Hxt&k zTS^m_(EQat=6$X7wadZslR*BE5`PT?I_sesHI#y~tWMzDY?15-rl13? zD;9c{O$_4!w(MetDkX8q(JUhvkb79LLZ<2!88eC=U|>6M8w%v=6=qAgA~>$fR0QB{ zW-}Qk6xO91nv4PYtF(Mg1n_0L(P*8gOdLi6e1SzA7qY4&1FEnFGMORh3;Kjjx?`z8 zQb}J!0;P%2j;QXuUlO3%GN7w7l(Tf-8E{kCs3CztL2nI=6%ChmKGKwuEO?1bqz?kSvYDTgiMbDS2oR&ti5QGlXp@plI@z4r6dCJ+By; z99E4~0bJ%6T>^0fS_WYb%06B&(1;_hAS;$W1lo#h#%Thm9AeV0&}nASG&6rEo5T5{ zxF$m?4QgIqfgE9e=k2nBW@#|)w80j1m5hj)8ze*U!=9J8)=ueXMM3B-De&Xc4l@gg zI4@!`z&w#A^DVYkP}m(fD6D8^=PzcbXXXx?*UZijzfT3N=0)~2D{Dnq0vH42trSfW z1w7kkSv;bgs5+B15AMiNoB@-{Cq2WY5oR*}PuIwi8ABwC0I)QgOnAyD)ZAkG2Ogx+ z-toaFPr{>HN5Is*!0eLxaEZKGgU#sZj;lH9XlocEZ$XHp*ofK1G*QfAKIKe~=s2U} zrT*N&pCEsVFdCQ}%$h3_Ps5)yk*w!H&~!{4CLoB*qH+xZkE~;Q=F;5!!uagM6eP9= z4L?xaK1^Uuyi=S2b}~(!JQFBQG4mCR;TP826ag4XB=sz~*#z?wkdu{vL2AM(T^6wb zusUAkgjSg4!=|Bgq8TwOz9yRyfW@zpA5Y0-hMK{iSSBbJq1DrXAM%MYlGnroah>=N z7&L$?Ge(>Y(FyCbkOKQ#uE0i*Qt8nlj5n_$Gq=5XpW8wM_U^@ zI^+~0#qo-~jLIuwgT`ZjQV)(o+-BlR0yuLWNkIuIMA*<(8MqO7A+%(NL3|tOAn+qm zyLQYY*Ea6+P%NTa2>@0ks#|b!R{&WD>I&n{#T6#*X0~gxqJy;D0H0eqfz)Zv!Od9B z5%(z-LFDr*v(+6|i?!b|v24264(g`e;ZmS>XhpW{#PgzBaj>r)brK>w>d^6a7+tjN zPA3gu47?qMVSzEU<5M0J=|Jlaq#_FV0=vTc5;b7+Fj;%L(-@>u zY}0VYNt_^-F|BseQ6~{`r81gk*_iXg7@>+WmUDn9HSVW}sSzUDu<2o$1Z>Cz+{1z9 zy$5y(j49MRgMU+ni*9jW9p<`wZ>+wdy{^^n8?N)&2A|zJzuVbIczFt-`$+Asvvq#5 z!B5s=lZ4Qpd1LnV*_&gvhtAZy#u{B?TT@@fIKKbJmF>rCkLtB#xt=UHlI5*)d$DNz z@x2fi>DxQGxBQ$C@Vw^Pl?ItPMPc zPn|#0;Lp@zXFQlcMsPpMJvP>ObgIVBT`xBHZ{2*k!QbED(+z&4#*h5=5nmCf9nQe# z3O-K?+XMAvx{*w8o%_7=(3^!{NcGOaM&}^4|BS;`T&VNY4Su>7o8Im0-I{SYh0*&b z?&WUXo0vEgy7ge+#1o-gPegESC;WXDGmnY!55nLfqD%i1=l^f~K|3yWiPH|h?;Q9A zjO7D=Jix~-wCbP2FWiAV<)qk$@pjC-8L5V=kqtDy8I=-Jr_==|B3X^nH<*YH#zgq6 z!>ZqpZMJdR?6Y$PCZa>)KkVK|4aXQ4K@^&MFfJl)HOT``>Z!(W%SH5V#$S!Ma1pU4 zy8tet<(Jgg6t~KIEv*vQ#tv{1y_<<@0xUtZA}6H|==y%|2{0>(_R`Hxi-M4QR6DH) zylakY*XzDk`rGBa7FgjnyQD)u0+Bz-y#pa1_<}Z*)y_%o%FRu_%HM(EI@~Vpj`arG z>lK2V-PPpnWmdb}>u&Z`d$s~MCaXQ$DZgti7yJ6$d&fQ;k?v~O%yxf^82_?eUbR;` z@)XLdUE3|Zkzbjs?qiSw+>M`9yTI<;E&e^+#%Fi-?fxN8@wPJs;27G_#Qv_c%2ij#= zS}3C1^DKCpvJQ_tXIuH+9whLwV5qfCy! zJt|BW7~?~Yc-xZNezwQkj%1J8eJxwB9@{g7Nk4e|8yKISmfDQPbw>*R=d1t`^;Xr-z zGO=S|6+Etmuz?GKkZKtf2S^eynRAnUhJcVap(`x&w3IbKa0kr;`w|vmL=YeDo zG(^JYM0PpNqg4ZMlF^}n z;O?9mz}{H}tinOq)w;_?66a~h%ORb7&kM6Z{REVD zPBin1Vmw3vfucfdKxpB8cRoRa zM64O|#{IVie$8O*gTDvH+rVG5hR_NCA4aRU2Y@XP90Y*}Qfs6HfiDS!y~Qa=VXmZT zlTG59c@=&In!gIcYT&PeH~O(a1KcvOjV)14GH9a+w}RP|b{OPZ$)p`UcZ2ln#8b3d z5s)0)81&X{_cf&KQ>qTPHpjBfO8<;!&0{!iakZXjozHK=Z$9wm)gKRU_ixL;x~JAV z{GR2PId5Rh-~nS6C%eOqrv{}Q>BGh;Dj|&-kI?B+Iz2|G$LU1btR0amcy-Hof^IxX zr%^hcrW57N2IVjY!Nw-CHOA;C?Qa7r>*aOQ2VN|*qjohw_5@O!FR(6~{mn(^7;}VsG*dB&% zV5tJLXeYgjIW{A-gGfnG`{Cf^?tQaj->cVf1wt;@VSSQYxfM-yRwb0tiO$Y-l;oP=g&3xbG6vH-z8HHa=u(kF4U6?jpPC$ z=ic}bL(X4ixW2c&R}$ZM`R+aP-9*CE2Q z6M@z>|KRbh3%krm{xJc6sP{bE=y`T)dbj8B9v6=sW2fukUA%j6?e%Ln zJL=s+qg&X1;N9cDJpSIJZ=d+pi5h?EdT2Lw?^|d7^2~Osp88HB^_}an-Og04^PVrd z`ge~Af1Uhk@+aLt@4lY=V({3zPyF(U_ohEK>Vu<=!O_nKpZ;|4>H6SAV{qbn@=c{4 z6Lw=gZ*;%jeKT2)J=lmnSYscE+nQte|JJvUo(*z4!Nl3Y&`ww1*>8n*z7@fBI?Ud@ zFnVEg23ab5aSC?{rM288n1;K|v*_BX%kK6Vv~Nu>eHAJca&tKY*`4FxP6(iv8tkaG zUPda*>hk*d0;Bz4O zPg2#hZ;Ch_f((Z}Jw4sk)zw|qUsdlfT`oHX*S&vyGWFlx6!jnYQo0(A4E@d728y~% z@zfZ_(|kjmPBe@)(3m&HO=BkVWX2fsG>@6dvvI7EJS}4ucrtNo!Zv10*vIS%$C#to z);Z?n&3t3rm2i)_6P__o!aL@LZ)V;S_a%H|K9aY_{fWR>AQ2o3CYr{YNSQ6(oM;(q zp(%m694fJ!3zu@$Pu|WuJ_PenYWH`Q|SW18Q@hu&t!I{3|e8$3Jt zZk~l_7vIBoz;hGd`8GASS?K1wt`KQUPN1{-LlD=;C81F;f2I-gABk}PW2UH~q#|8o z(^ z#d7RfZel`IPMl%i%m^8Q&`JtdSJP5B`HI}fUYHV+c8*Oz58R}{a*`xY3vw?jDr{Pi zfRhB!1zs+t;_=i~WCHq1%X{qh9ewN}N#GPghF-6xq$~K57bPL4q+lqugP7oCfoD^f zF2O(0mf1^UTu5>W7|IKM?9hxNaWN>xp12GzrdTYv!d?}XDfaN;6Q_rcuw0U7r3?)7 zFb6D1+b)0vfJGcNh=|X`1Ut(DE6_4&2u4gL4GO>=zb!6I3vpHvVWFo7@= zipa&u_=V&o2sHsEee5WF7f8K0)Kyqnn8XhtJc&?{gtP=Tk_yO)Ly_XFVuz`T%OFyj zjir(b2RxD~0B*JmAdt#%P(H|sBFrd#_MKHjJCzoaXHyv%Pi1(MLQ;@m?w}v&Q|1(q z6)>%cO59j80vDSq4U90{$DT^YuaV)8k1MJ4DD*Qv4)Z9)c^1~<6(OzcC7*erB7RXu zBCx1YD%b^(NF_uCM+r+hMg+*lxHNu+$%@C>34xu+NJ*#w&0ryv6rs<)3szjFxJpuz zI4L5_hLVt~Vv-l(H3OxwxR^$fNg&g73d%uhKy1~hj8z4a6eO|^66~}fDPoK;N>(2% zC?XVYA_dcpi;!daYp`_17!Iw(`h)=L(P#gDAA3480aFLF4HZjjRUAhq9Zzw5Q8Nj3 zF^MWh#Lq$x8KtzKN)yBzLqX}YawdkBpcm?Zz)Q4?>%v&ZFI%y5#3#;c;?*DxuNSeCAH1yQEw`LI(Pc` zse!?0rG}wh8NhE;xe}6Nlb`)H8uUwGJoyxxgk3T*A;Q!NdZQ#LU~41>+Qi_$d|4o8 zqA2>CpM${fQc4-_P}JpeUcqL?ODV!ce658Nv$d_uZ4?4IjpmB8NCqBR0x$2IJ_JW#Y%Y!I-gfnnvQQz8$$q>wJ|;J{#|K2V7R@aiLE zzTO9t`MMY6{*hT{rcyE}KNwL(cT|}r-8Llv3YY){rngSS(Zul43PEB5U_Ut-aYJMA z3a5Kd#{oV7WGfjqDFEF_O_1opLuJlSa}t0V$cngo;*El#Dh z1Dc>{<=;XwM}6z00xg=q`&X{NpH=-snty2S)n#9k=G(IDYK6NH+*t4j3!R%+9F6|Q zCln-qz+`UtTQ}uz)_gsSt*URQ=G!?pTyTdTx!EN*tGYWhcgNh3LU-Sz?*66j{(st_ zb|2NckE%@9-0@qzIi_F({Tx+Jm=rGSVXnbR}NmQ!*>&sH?O$nE2@?0^eX z&STWBP3ui0ZwIsIxIR#Xn_z{8M^2sG20#ZuizuV90;h#63p76nqs!*6dh6_yTA?Fp z9VTs%j7A!CGw~gD3!j0#JTBRTFR&bpEW3O+ z#}+KAi`880GT5#__X92;=vM>%TA+W$MwwfmP-Zh&DM(gG;-t);I=ca;18(UB9JaOO zg?<-AboqL%`FKM&fhnmmABY%;`OqM9mztzz=`1x!y>V)`Mvb!#JYCU~+9f}0s#XrD z*|26!CQIk4jTiK3Tv^jkA$XEyAZ37pxkBNm)cfYz{%GpvX}&R64t$kp;Ike*fl668 z3v_ufBZ6DYCE1a)qub)*6#>E?5^mfCOH`cmlKhHTVhDHqVi_w>r7}Q-gbBvSA?P51 z!;Ib+U}u0C=x=~0UFAlDn+8z=0s+dXz3hIVUkctwQI*HXj~?A~;>4bjk={fi0n`jP zk`-psum?iDiE9R(N@xSLF_DHq#7I@(0eHn-*k}MIWFpQbuR!DhLrkb0x|y)8n}Fr( zW_H`Q1MGLSU1yN%>kW2$#3bR0&>OilKp`G<5&~GDc0*WbMmjRAAz>008V*0I$a*8x zgJ-=0+9puqQCU)?5omk`{^eOn=BO1nAYz<(7k9{JaXTu26kzI zUGvsu-=fDA2NW z@pYTrk3pMqrH?gF-c-A)0q3t(lc}p|s`gVZmwV4KH60XRcr)k(=t?C}Jj<-n-u$fg z*}C3kzf#^LVFLuu0W@^gsMqMB?B!JVRIjtVrK0__X2n;jo2|11vyE%?^K402K^Bc4 zg1kO1%WT%7;O14lvP{-eE2|%Xi~PYZ`PT0 z^0vyza%G>Y)DoYypa5GSzJ;g#Pi|?)mN)k+2n32()=|fs>raxO1Eg2GlH9Ogc$D=e z`oO7+ciyi%UKpfq(Qkx_B)zjv1vl^Fm38Jy*_tPBtL3xyZPxc(H0tJ|j)osN4`gjH zGx6DAHdrm+tnUihqhu$PSIt<~e}!m>MBiMev{xAB-F5YR*C7&6C#j}YEvjZT`>Ynt z*+8yp?N|ECHf5WkM^9x%@ZNiM)&%y*L^dF|twjTP_8YzOzGt;oHdXbXz$7P5=+miLHF`|~**`Z;@fQ-AVc<|KL$kJI)wjpccgM1U; ze9tl)QX-Ywm&@9kweT(XL$&-S1>u4(J?WC$>IbD=wqN$5ilDSIF zm@ZOR>1+rvM)>+;fT|eFWw}zY!U$*LsSJdwFn~S*ySSJHh=AMA_;_8k0`YtmyoWd} zo*k6nL3NO>vZn)LhfqDnC@@T6#DtB&J%%bEssZs0BWf;-=tN*7BgQGQ zDTF_8ECjfP(1BPc;(Y{3Bs^SVr-&S^rNHF%u0wF#l483{1>JDOLk>FFBu2>L^a0LD zbW=u(>lPw~W4zvI1amW;Am{<<&5SsW5j=MKvtQwn7zyYJNeCK16q8TwVI@R&x(QH- zbPRIRFuq#AA%P&cEa5g1p~;s?$dXHE@K6ZwOS!)E1Nb1l1c`1bo^M2~x}7i*O>@dr zrK)a)3Q+_>x*5sHq^~H%ok{RRh+;%pH;Mer;gcU(^cG_<#VHr~GZ_v-%gQzBJa*;= zV1`RU;16SjT)Kd4yoO(_r7*p2CK4X~2s|)xGMSQuC@GL5ExHN99J>8H7taVr1h*k2 zL&OL9J&8mbg&9EhvhG9bCsWGdR3^zAHLY?wF5)O8R5}S!4FO*g?$`+Z;B-+!=T)}} z$xMPA7)cnl)J+L)MmmA@(O1{h zk&3+ZXV}<*XM@pXDvIK?mEzjGZpAZ2lqo@z(m2+&06vTYU%I_2KFz=w5_+Q%oYw8e ztdq&`ltoLzz-DSe95Q2cd32o9o#4B%lg{ZoR>GXKu_a*i}!b=INX}T5v#sh&B(< z%kEG<{IcqPMRUKBx4u%ayWj1((Q|9pJKJw=pF0F$A=1vKdO9>u$K27ce1SVHciV2a zEle$14GcUtyzFq@-2Lu>8wVD)sE$pVV-u1d1=1Vy?AcE*Ljt$z9?{$*dF#km{@|VN zNB)i_e@Cug_4jK2-o-PTe>()myzRNIs<%(`_RSqz_V{jIeK&g}yD*gNU%aS#c59y9 zz(6ybYh8RzZT`O2{QV!Dnmal#eC2Lgu&VBM&D}nCYqQh+#Fu$TzE6rbZ=%cqHcUqZQiLh@0>qUaJD{j zwl6u`3*nAJ%Z5TIys-J6ZNqPjo=}X-^9M}ewUjt1?*^D&+ZbJLzIf;p%cl{w z|JD4t3;B-M;8vX%HRr{=^Wt(~L!qU;;9pnpwiKGecVD~x+Jd;~SDSjZrrv^|Ed)0d znl~1_ffb)EXn#UM@&`=jtt&yw7J6jeuw>n^Fr!*~G;2@Z+5?$y8>z1D+*@krZmn~7 zA=p-EjTAioLSS7X7%uplRytS4Y2Iy}LBqaEngdu<8W~qIRKB^>A>4mrQHgFPQ-YPC0P@>>~%M3p(c{aDtrep^?HO>y9W63s3 z&DBu;a#_OJMu>&i{U!@!Ygx0t<(c)FKao!xfY#M~TC-MZXpQlrQmwkObxy5>>$~D% zP?yVYa(&I`0m-~2%S^7l&YZP#zvg;$)z;%1szHCY&IRJ#;1<|m6c{KjUfDXwvQ7&< z3dWO*7lb0c@7K|Uu}11OO=^PxKvhta_hs#?Ln5frunz4%v?r~`^D14^Z}P?TL({E> zn`ht~Z80Icw@;W9WCS?>Z@$h=Rt4tld5X9ar zIWUNEKpeanaBVoY`+7rn-}O$!)+|@906)4aF;;B4hrQlbCig5dt*b~>x-fO`VFC4< z(-4hT$cZLIW8pV#0N3Miwk!<*w-9-S!$4WL4V7Fy2~nqR18)fgf(NrU(jFjpOu|sB z;Q%Bsm&D{vOwfIhrZKsSNeYu0OpahOhzSO=A`WugLr^~cphAXUv_QZZC=}qbHC2*jr!Vh6tU8Zq&Lepz0J7lx(G@dP0kpBwxCw|fb>&9Y zrY&02mifc)95zUF+M1qFkgQM-Ts^fn5qb+-uw(R+&|C0lsO6rzjZOib(9om)?YUrdEI+=%id}97s;n4AX z%lW^Q?p(c_y`9YsE%raSsJ6VUwY;2%`|Nq3`SQ_x`0V@sdo3Tdz1Nl#AB5HLUM;*g zAKv?rpC5jVQFPk1Z`s>?$F(r^fX{pPs@}btcQ0fLo-lqqV0^s%36uBkSH1f+?|#TU z-anKNpDGicdSF$%A0*#PE{>@i`n3)H2v5U+>U`nl zVDp`03zyYkL<>gp%MmAvJE*dqMMF$dmi&asXi_w5m=?n`586nr}V^x9T0%yuBlyd5p^i|K2%D%8$^n9V}76gh|T=F#sPwf5hJX1(w~01fmMp@b@}| z2MMU4hXdo_^)wLic1U7$ARYi}?d5Dc;3b>DU`y!F6tShzVC zIJ37*w+`MJzI*ca$=u5i2G!t8TJR;+yGQfx!O#A38;k1g*S!5Tg@zGn`wyM4Zn(x4zyAW!o+E!NCW>i=VzOBN7hVK~J^%m! literal 0 HcmV?d00001 diff --git a/__pycache__/render.cpython-311.pyc b/__pycache__/render.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..634cd2e2f3f00636d37b7efb82fc306d76aa9ef5 GIT binary patch literal 8779 zcmbVRYit`wlAhuFDN-awQty#uTc#~hl4ECMJIO}zBk@{JvWYj|l@m}S&QKbeuk9J? zwL`!MWMRq(u-csOtmLu?cVYM(*f{8C_HYOIXZ9ay5HK--0RxBp&_4x9a5?|EtLouX zj&1WUXPE!OO#LzH3HF;?khsYR%q%0Yf+5MK z46_Cn)5fH6)(CG?(v&jKnp2ipORd~GYlCug(w=h6I%t_C=}ft1T`Bjho93;_j+AHC zL(?{>^Uiu{+Meu8`DT47|Exb1m<>RgLvSX$Qo-3E%h>n)2@&CtaPR~3Y)CM^!$c1A zFN4r;i)le5QhI?)W)`G4m&gzfECbO&DqO`CW;pD~43hK>silo2*3u0O%ydnzR z{2Iu_&CFbgQU)nW5TIV?${jrZe3M6QU}U2#id=?B3sM>+qzaC4GME$iDH#XHB_xGg zga+#>>F|KeRgOs60iGy_r^smoMa!nU#KBFj^R70R2R%*}G zMY)S0U~L{j96^$^Nq&vaT~ZQoe6WU)S>ZB?ni-W02)!(FD;XllT!O%msH8&pv>Yc= zRv806@k}z4qiYK5;yD8Iu7GxOHgiQx)4niPA|ckwfvG_F70PK@5?4erDgzr{QJ^2p z8eL&63n?eVqCnOcGSK|3H7+U56P~PrQDN*fi3Bu}Clrzs zm9?xmA&5)-a&F=@yxz^d1JgJDY9=W}6(zCrf{9^vu3CXS29`Vb!A)pZu<`1V=Z|XU z=O!m7uV@ZxvKX31Gs+6l%(B80MKdRo8D7zhu+udgY%?l{W~S@qNe~Tk8EtbJx{PtO z?fdU*)=W-`2@=ul^Sms^z`ux^Q4&^xq!rDMlItTm!ME_q*oq{;ur58bD8kl-fpM@! z&8F{6OzB$!Qchjgct#N8nsr`+)vAb^MW5oh7w53{IN=T2vfn%a6I?T0vtM&x8@$F{ z8@e`pZR94SpoMGCQX{RJ1@@ldwt->p!oU8ojAE{*8AfGpL+iVZ9>ssG}RqaX`_HtAF%{Ho0u-&t_TPGhB97TGT zHrwebHMoaYNC`FDs3FxRI0cvB2ECo2cgH=?ZIJnHL++xh*}9oheeGPIp^LY@RX(ir zHhC;{)YBkC=RIG$M%RY~|Mfv3aGl#2DthWITi-%gJ9oj--KhS_eXPwsI78QmpV@P- z9lOLsUlXfwk7xu6@2& z?R|z#t*T?gjr^ZlCB4c>3s=7yP{V5Pf=OS|d+nR=gFWD@x6k*h{r}LV_7z*!S-lQ+ zTYtNy^LivU%K;d%tKPfytq!Oi0;hU}Az^rd5k~&P z3j6NuzYRO)Ze!Q54D=4R_Nu3EL*i~d!$7;vpVm(3$G>(NStjiqVWt?_v|?B_{)Sm$ zS>`vu6Sxil<{Rbz2i%g5`M3r`i^D;rV>=!7Ao$t~6&!FNbKF}z0pF~M1kY!V8*e-` z+dL9ZaNyr*nck~;e#(Fh;&3$Mi3m^v4^eDF504Z2fzasp4xT80#8QBw5P$%dKuM)E zCyRix1c08{7mk{}JwRcS@ z?w}MEqxuv9DB~t0y7sk0yWM=JB?5po17K`r5%3)~8#K)36gu08uxo?MpqikITfZVS zG>ub~g&+@P0ANsKDJ3{S%tA(#)1wL}u7Y88Q5vwJU6@w^O>l@N>g%T+jB^KNJ}oKI zx+p}V+^=K`%encS1lED+Zb;K2KQhyY1bA`+)`ik&bq+34tM!5O$$V2+d__to(cIV> zrwc1%zXJyOXexVjZcb;|KsoI?yww6eF9LYG2;dDC4cw7n(f|q51q4tB5GuX0;EJe0 z`WoLRWNH`q5~8TOz!Xc>wk53gth6d7Yx}DeX0>y8okFjlz=C44AM0C_9i|=kTI=X_ zJojV0b#DS7_o?1>4ESTcYu4g^sDBG`Y@e31XH!{m0j?W>DeE%m5bEX5%GskC(j&mk}G? zHnGf=&y57(YLi5ann{MupqcRkM>Ly|qxa4<^fFV(z0>Hha}c^hxjn_N#^P|lke{%Q zowHB~O&8$;qgiWa)hu{r6-mURnQ+-^Hl#QXnZ)K6I8PQc0-2zF`J}Xvju@Ik?Yu|_ zqSX=O&i_J=yiz9-BTer-!~&G<3Cn5h^bUexf_P11719ImcmsMv2Eif#2lktJX!lPx z2-dK9wiViMgxdkcY3BXs-UIe|L*58**3bk_Dib%X8m>_8#07Q6X5AGS2E?$YA^ejz zsfIiYZ;#6CX+^II#DMi59#v5C+9R+k1d9+lwIA_$Q|r}2X+U3w*(*2nHzR!C)8D)i zR!yq;_bqzlEelNEs#-wPQ+eY?SFv&3s7r%ykh}%7w%aIg16XWoY7aTKnL{n|cGX&J zxfC@ytM+zYEc4-M#obbGZLi-5sutpF)m*i-w-Bs2Vwx4$br_LeVH`>eEMIECGQqat z-{^#^g#Dgl&jrfadW~5Ee)^+2+sDegTCXS@e$|0v!_2`oZBI*gTg%Rk&@)?hw64d! z+QBSM)Ai_q^=MK(sGrJS!3_Z0U?Y%bH+*oxbUouT1&0 zAfsEgs-EU7ZUk|pb(opF`@>hCaXH#!9RQuK^|9A#aWsG=oP-gKhRbU>-x|}eEYRmNfZhrEOjH`GEUMa04b9Zk7gIsxfBiEX-05SK%}&0 zOp0mEx*#g^^O;r6!e_I9jv#uGOA)+MYsR#=Lhu3!@f!%OkYSp{K#FEaXEifV2)`ET zCTFnR3g>qWVp|$3X)J_cDCCM^(83B_X~LpM&ol?h9YdEVYbH9jUZRIFoC-va>M;$Y zlv>bC7!A|dRn5E>gJ71%E>Rr``drNo0ahNuRx#?N$v!OeVDu6^=VGl?vt^}ZjCxzA zoaN(UEn?zCU1EB`gg^wFkuJ3lXmwz4^R;TAd-DwR*a zrOlUU@%LYad$*>yR_=6u{JV1aa3y@WaPCpC`%fcV;>U+RJycxSX8(Mp96VA99x1$5 z?dsV&Uc6N59jkPW7tTKJ?k)VH+SR+u7~N4yHrZ-u@L_25L1^?2d*{GCdpUHZ5<0SJ zs`d{Rhd#YjRPIdQS}FHOHyu@P&qMFf1Mg7rP}w_P@s5}1?@>?x*3!olpHAF4|7D_d z^74N^_r>yGPyWZrFFXJG^Dlm0n!Q{ed#~K{Ug7*#zR-=x)|uk@?Qm)2#QjLgH&gb_ zRD3gqQ;+<8H(oA|l>G-P{sV>6kG%fP%f*xBuITnqIq+QBd$i&`T9|$m2yKOn=gZv_ z+f(K6(Q@EeC2*{81}%P|9FA1N5maf4lA6WGj(x#?e&LIS2Zzs;4xicbYh0W|EwVS>FJ3NpO_sfrFn{bL zf7gu{wod)=r5i7mT--M>_Y8zT9dudHbwj40=nX&|2#MECh=+Fuphz?Fd|b1`IwyRB zhbXFT-8`=3(xT=B;E;i9i zFUa6*&;?Lr5eaDq*rGO62`(x!u^>0AoSR3LsCopCXbd+}aK%F4QWhbenUMh~X3_+Y zCC#*ykZOI#uC3tudJ64#f)Zl!8p`J9+h^&W3xnmUnp)JtB z5EYN?BN$C1P3PK&S@Z;C5DA9_gFR#f2|5?DAISkEqe##Rkq8oMHxsG}`s&|pG$RPE z8Ri!>Hyt~sD=ld3n&!r^bXF8%N&G(4%u?vDIp~YJ5Ar^8NBE?<>Q07^Vx~6{tri5< z^|b+Jrm;7_;?*^PASgn!`n*>I>c+`)F({SB#f^ zhbq291#7jVtI{!0Fn;9@Ze6}JR_goN7i*<=-YvN=mED&r?n?#3hfh?(CkpQG zY)toX@!IeJI9?7G47_oJ@ z?yvfKc0IPB<7)=Uw@Cim%Q!>V$2R4%qqpMd{cPrwnLEL9-(;n4a<`MQ`@ah^f&Pkr ze^G)l{m0Ax;}!q$f~^`JEx4;aqXiGT`oV|60}p}+igG!4xDq^kXSx!MZigREO+T2L zzCTi)I$N1Kd;hnUsdMGvxq`Fm2|e@-KJW|{jb+ciif3QJR1NlQP2E^1nu@=?Wi1Cs z3(j5BfPH9}`TvveUSXN=P-*PtFa4#^g>vXZC3K?KNz+~afSKuUncJsxLUMyNmkrUf~Ao{QDKUwjgEZ82~ z+z)O24{R`B0hptz*H;bnR>S>O|G=)n+}i;MW}Ud0uHbGjV;}gM>9E7x0okR5W9%J& za9($AxIc8moY*{na9?+Ccs}&N+?+S!TGdyqPQRwB_ghL^%b%5oo$;)y-5Dd}3S50t z#^~+8y*_-d&-k}K(>c!iw-FX-#76J~4CzFIO^6=}25I#a23!foO(Vm)^9o}D^iB4idOw3bh4&h z2FU@29Vs!xt-r^N{VM&dGX13=`aNcRrTVYRyijWYJzb(Vs50kE?Y}BBR%-uMnZu=L s{~j}arS{)$9}}Db^ap#dui6=?2157?aaDiM?*1hM8`x$3KBY$cKW8Q}y8r+H literal 0 HcmV?d00001 diff --git a/__pycache__/transcribe.cpython-311.pyc b/__pycache__/transcribe.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cac6337d8390c796bed9c63974cc35c18a71588e GIT binary patch literal 5529 zcmb7I-ES1v6`$Fi{a$-9#(X+35Yu>x?Ly)pK*$n;xj_Sm~Kn>(|1 zY^~dEgj5tEg&-u{mP&p|DHW)#`cze|ls}*kyJ(}VMzVy|=AmznXkPNv-?_81_8O>? z@%YaDJonsl?m6do_V*nfQ2|f)-@chjbqT`1sNp?6w<13K0F`@!DvS%NsQU6^!8h&` zsqWAF$Nl^(jZ64S`9L8!9xQ~$Lxu2on9m91BZcUA6!k7Oc0(GEshiafeB)}jx(VM7 z)qhn;Zc+Y;?XKB+Ubl74l5-}LwU2D3WNqD;l$DaIn{tks1$kCiHB(j$Ri4xgjVXK_ z6AdfN^rEeshBXw8j!o&7TrkyAUX!OVIS-oloGIIiHDeu%Mh^|i*DOswefr|%)EU_} zz0G8MN^9;$o6op=D$QMUI>_k;rYrJ$C*GB*9hGI-D(SW|fiYq`3l2&J&WSuR&k4Q< zZi*TshP3>UJl5I+Oft<<3dOvpf(xQ`=&(F< zs@`JGgS(9Av{tI~hEmY8knx1A+t6bY>OGh9R0oC((}q&ObDpG#U2}?_w+6shCUd1^ z(1B8nnJkl$iG4-4G}S#@mX@%ntA!%;rni)s*A#;oFBQ24ZLLr=nZo9|&Mi61G{wel zI^+utKz=ir9aW*GtZlOSq3D0edXl0OQw%W3Rbh=L{f=}>&)SZ30U~n(V+E$q;>{2QwF7ekUVAy{)z*L2&c}%d;OS9|uips2^|kkWEUKH}HM*2OOnR{)uX7oh z%$h*y5|9CBGHWu;WN@W$=eS{-CA(O%VHx?nJV6$--XJ}rC%uz#UOja9LW-=upx8t5 z31gm<(LRE5>GE)Yq1YQXYtc4mH8#g|vV$QRS2~v~6tzj1J1N-Wq!iq4V{k!ZPUgVtC z%Ou^BRhi&NT)7SbpQ9jxcmdDO165YZQl#D(U)?Ca<}r9%MwD__soJcb)#R*VkpBQ0 z$mx?Mir%w2!Y>GBGVh%|clC1WO8VlZQ>hE^NB}6@O4mkCrQSVvGKC?{nAMqSP)wC) z6{Zu+XhvpL%PFP2?V<{VwkC-)7e~E6u_2_$mW6x&jY171*fC! z^b28+U#HU>a1RfAFP5AwUgyb67cXBMOQpwtayd2XNW`-f%3*K&&|yb{Pd`F#$~~GB z&zc2zaV>4n7d1Gc(3D~UZ!l*wF`J%q@mQL3%l6^&I6l*ILQP>i{z=W|#+a0xD4&!j z^|Aw)$zG@F0p8p4Lu@yV40^>|s93*7Q4zk~B!m-RCiX5Lsf7mWp@CIF3?x?)d+Uk* zYPf%;tNZTGd%N%KUQYaZXRYgaz3X`8{7OgS*7&C%eDXoHci^E`>lm(g3|CHn-Q9Ed z*u9ZEBh`IJYTZZc-A5}Io`$;NI~sil9_F6(4OjbyZ_nSJ|Lc!###iFomv%lp{Y7sz zK3a>9*5jj;tD#JEuRz$rh!>z@)Gk#DAzPof5A9aWPyDE|c$m?y-?mc~3b! z-9kd`j%~xNsJEhh7S^{;u}oO+xu^YPYT&-VodV=cp|AOr!z#|uj^T>pq<|BhT8xxK z-{VA)?{%Wc^vg~ZneOzsQA5vjdnOvMJsxq?B>)imL+T(c>DXTStiaTmxg+T!k`l_C^H%^qXqa(Nu@Mx>IR3xJZ3r3wLP4mQY_Lj16*2b$79Ec5(I%?S)338 zB*jy9oH`}TWOg#@LiIP%!|^(MgKzEKIEv9Z(+Q)Urr!&kD3_pH_oLSl=lD@bJMQB~ z)f5q=;%)p}S0N-s)vZDxbmQ&Y;pO3~)L)bO>ry`|6i&C@>%G&vH1#l6>l&_i4Oh;8 zEg=>zkJqGwb?IPLI{0;bcP%d0<8meSv}fm6J^P>Z>|dV#qOaPszt(f6-gD*`qm{Ea z)y8Yd%GpNp^~(81V%w?^4)_V5Wd-kt%Z(r)ZzwNV; zyCaP)+wPvdH+pBZwxzGWrEfJP?TD@lD83P-NbJ8M#m8Ngv~N3U$f#bGgpf<_^<%rxQx2RWqATJJEDHS3*v!rPGW; zI=_@`D{4*<01dXyC>3Fpg)mA`oX#BoNy`Ij9E+{pSAHAV2+fwMAn*l?0VKoEeIx7| znq*4WUr?<2MNwS!B}8depkkjOzFHObt$&{ikqZB72whe0Z3unUm%mSi*Q)LBYH*u) bRa_OGD~G$qu2rGE80Zt_RpBKCKhysJ%Fw%g literal 0 HcmV?d00001 diff --git a/__pycache__/utils.cpython-311.pyc b/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c4f202100b86f021106aaf057e75124886032ee GIT binary patch literal 4567 zcmb_fO>7&-72YM6eOhxX804?Xl$kO6@Z3m7n31gLK+bknKd+ofnz zQH&lsO2ZJIfYaBBsb~i{3MUM zC+SIfC%q|QQs6notNIi{_00&1@47JQSNu*rfO-J+pb}IA&HfPTA*D+RV?;#lKr7}B zuCL3@_?6D|o8yw|xLe~P<}^4(U)ZScC= z;+Oa2JQZ5~TSOxC0SN=%N!PEL&sdp z6fHwc8gnX>B~u;7xO7sIG0;p?=J1)*tb~}+6)YtijF{-kBZm5ds6zlLnNCQ0CZ#e> z7UhJ*B-z3QQJt67wB_*G$U-qJbHtT0C0zwyE1`;t##Grd7`S35kVBeLNydz+iiYm+ zNg1Z)?A2T_Ej8uJlaees(d2PiO-R=?Bf~^RHD#t+hMLE9R3hr&q@jvEn@;tY^P2q5EJlStJqieFEsETAs zc1Si-mo;6}XVK#sIcW2;NmeI{Ykj&pH0wE3xQ^qqH4?w^$QXAkZlof^5cFYaey^x7faU@0jHR4FM+~N1piWVvh8U1<1-mZ zfm?}jk=P2y`+6GD?jrjpwyhr9U75L~)M7{Ku_J}o8{NG(`#oWy#7tB=Tm>RKYr)%orPL#v>qD;qi|nU=wsa^ zm&Y={WiRBbtFOU533Cb_#WR7tg2r#Bb_B`080C&{A<5&j6t5y|zz*WQIZFB3Z4r3p z-wd~4%YjF0Y_wn{aMj_?`f~iwu(P};?*#^VlwcM|l=CVf%ClgQ6Nmy|pTw=B$*mLI zz}Ml`-pF^5HXQj*X2So0u3*rBWi+JH|@;A+CFnNDhir2sI~VsLKA3b7KvAamFSElr06E><{eGPH{` zNduc9PghOTMXc7g$TP-g*N<(DA70}}*7##>wn#42i*I!pM%f~&e6SHkT-2^4a9oIUJClP|7l=O-0-j?2`EO>HL@sDt*H8KuH=F=bm zp~!CcU*7Taw@Zf>FD_qfH93y;keujYtPA8gmB;gusF0lRCu`& z?j(%qb$TA4XJ~1t^h&w67TR79ZAX8k>&BU-Go^{evE{MCs|{zLtbXPUF>wYLw ze4Capy|?sC@tH=nZ&9EbH2(17wKBh$tp*2I{9J5H>CNT&V$_AEK=UZtY=QujRJ-~m z2;q~$Uqy+OtMK`@yAk?4S*Uy?iaZ|qoH5|3csCh|8x`8ooA<(}h0k#B0dBatSMh%) z{MuL99OVK^upQ-q0-@~5b_gKvUw0h1)@DEe-$Cev>s($CC*Ha+;#d$PJ7mv;*xrMO z6}A(ltnc89BS&V~4(bv?5R1L=SjWiX0(*hl4-z>Cq>Rg9e1n_pov2xEB5rEARck@7W875i$pGloMJCuOxBKV9NeECZ#E~ zA-1?dsTm$8++a05O$IVO?S?8zB;clZL{igLJP=e#PKXFd&TV^ox@AVw(@s2l?24Z> z^x0-OR1AbT-LNnbH}kX;n-dG^1fC_9E0rn!v+xT@L=rlTXf`6Vh1Y}FwhUr%GASCe zoJlW;6kUxu-4Tw~Qt<$xz$spnk{Q)ugvZW?O*qjUfnT(kq+wgL;#@*Q5T>V%`v`Cl z;vu0DqkA)4e?OH+Ky1I1#>GKlTc{(zj#0-;)Zru9P21-v$_~gG#_*=H!_C4Jf|D&c zW9Vu`n1STNMu^U2iuBG-q3?(I%r%gW_t{V*61#DB>Fnb967guLN|5y-V!{XGL%eGJ;6?+jgHx@ zgRFRYo^NoWg7a%|J=OJpgL}HV*?+~~&2K5cvce&&xS3OajO-oY2g(yG9J0#WPPUP} s803#tcCT>A?tllfjb!U~zI%lODWM-3Ersk}-#z(HgHG`~iL>kf0!IUFhyVZp literal 0 HcmV?d00001 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..5d575cc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +services: + video-render-new: + restart: unless-stopped + build: . + container_name: video-render-new + environment: + # RabbitMQ credentials + - RABBITMQ_PASS=${RABBITMQ_PASS} + - RABBITMQ_HOST=${RABBITMQ_HOST} + - RABBITMQ_USER=${RABBITMQ_USER} + - RABBITMQ_PORT=${RABBITMQ_PORT} + - RABBITMQ_QUEUE=${RABBITMQ_QUEUE} + - RABBITMQ_UPLOAD_QUEUE=${RABBITMQ_UPLOAD_QUEUE} + # API keys for the LLMs + - GEMINI_API_KEY=${GEMINI_API_KEY} + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - OPENROUTER_MODEL=${OPENROUTER_MODEL} + # Optional whisper settings + - WHISPER_MODEL=${WHISPER_MODEL} + - WHISPER_DEVICE=${WHISPER_DEVICE} + - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE} + volumes: + # Mount host directories into the container so that videos can be + # provided and outputs collected. These paths can be customised when + # deploying the stack. The defaults assume /root/videos and + # /root/outputs on the host. + - "/root/videos:/app/videos" + - "/root/outputs:/app/outputs" + command: "python -u main.py" + networks: + - dokploy-network + +networks: + dokploy-network: + external: true \ No newline at end of file diff --git a/dockerfile b/dockerfile new file mode 100644 index 0000000..dc30f99 --- /dev/null +++ b/dockerfile @@ -0,0 +1,45 @@ +FROM python:3.11-slim + +# Create and set the working directory +WORKDIR /app + +# Prevent some interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install ffmpeg and other system dependencies. The list largely mirrors +# the original project but omits PostgreSQL development headers which are +# unused here. We include libgl1 and libglib2.0-0 so that MoviePy +# (through its dependencies) can find OpenGL and GLib when using the +# Pillow and numpy backends. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + build-essential \ + xvfb \ + xdg-utils \ + wget \ + unzip \ + libmagick++-dev \ + imagemagick \ + fonts-liberation \ + sox \ + bc \ + gsfonts && \ + rm -rf /var/lib/apt/lists/* + +# Copy dependency specification and install Python dependencies +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code +COPY . . + +# Declare volumes for videos and outputs. These paths correspond to the +# mount points defined in the docker-compose file. Using VOLUME here +# documents the intended persistent storage locations. +VOLUME ["/app/videos", "/app/outputs"] + +# The default command starts the consumer loop +CMD ["python", "-u", "main.py"] \ No newline at end of file diff --git a/llm.py b/llm.py new file mode 100644 index 0000000..f0a5a2a --- /dev/null +++ b/llm.py @@ -0,0 +1,234 @@ +"""High-level helpers for interacting with the Gemini and OpenRouter APIs. + +This module encapsulates all of the logic needed to call the LLM endpoints +used throughout the application. It uses the OpenAI Python client under the +hood because both Gemini and OpenRouter expose OpenAI-compatible APIs. + +Two functions are exposed: + +* ``select_highlights`` takes an SRT-like string (the transcription of a + video) and returns a list of highlight objects with start and end + timestamps and their corresponding text. It uses the Gemini model to + identify which parts of the video are most likely to engage viewers on + social media. +* ``generate_titles`` takes a list of highlight objects and returns a list + of the same objects enriched with a ``topText`` field, which contains a + sensational title for the clip. It uses the OpenRouter API with a model + specified via the ``OPENROUTER_MODEL`` environment variable. + +Both functions are resilient to malformed outputs from the models. They try +to extract the first JSON array found in the model responses; if that +fails, a descriptive exception is raised. These exceptions should be +handled by callers to post appropriate error messages back to the queue. +""" + +from __future__ import annotations + +import json +import os +import re +from typing import Any, Dict, List + +import openai + + +class LLMError(Exception): + """Raised when the LLM response cannot be parsed into the expected format.""" + + +def _extract_json_array(text: str) -> Any: + """Extract the first JSON array from a string. + + LLMs sometimes return explanatory text before or after the JSON. This + helper uses a regular expression to find the first substring that + resembles a JSON array (i.e. starts with '[' and ends with ']'). It + returns the corresponding Python object if successful, otherwise + raises a ``LLMError``. + """ + # Remove Markdown code fences and other formatting noise + cleaned = text.replace("`", "").replace("json", "") + # Find the first [ ... ] block + match = re.search(r"\[.*\]", cleaned, re.DOTALL) + if not match: + raise LLMError("Não foi possível encontrar um JSON válido na resposta da IA.") + json_str = match.group(0) + try: + return json.loads(json_str) + except json.JSONDecodeError as exc: + raise LLMError(f"Erro ao decodificar JSON: {exc}") + + +def select_highlights(srt_text: str) -> List[Dict[str, Any]]: + """Call the Gemini API to select highlight segments from a transcription. + + The input ``srt_text`` should be a string containing the transcription + formatted like an SRT file, with lines of the form + ``00:00:10,140 --> 00:01:00,990`` followed by the spoken text. + + Returns a list of dictionaries, each with ``start``, ``end`` and + ``text`` keys. On failure to parse the response, a ``LLMError`` is + raised. + """ + api_key = os.environ.get("GEMINI_API_KEY") + if not api_key: + raise ValueError("GEMINI_API_KEY não definido no ambiente") + + model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") + + # Initialise client for Gemini. The base_url points to the + # generativelanguage API; see the official docs for details. + client = openai.OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/") + + # System prompt: instructs Gemini how to behave. + system_prompt = ( + "Você é um assistente especializado em selecionar **HIGHLIGHTS** de vídeo " + "a partir da transcrição com timestamps.\n" + "Sua única função é **selecionar os trechos** conforme solicitado.\n" + "- **Não resuma, não interprete, não gere comentários ou textos complementares.**\n" + "- **Retorne a resposta exatamente no formato proposto pelo usuário**, sem adicionar ou remover nada além do pedido.\n" + "- Cada trecho selecionado deve ter **no mínimo 60 segundos e no máximo 120 segundos** de duração.\n" + "- Sempre responda **em português (PT-BR)**." + ) + + # Base prompt: describes how to select highlights and the format to return. + base_prompt = ( + "Você assumirá o papel de um especialista em Marketing e Social Media, " + "sua tarefa é selecionar as melhores partes de uma transcrição que irei fornecer.\n\n" + "## Critérios de Seleção\n\n" + "- Escolha trechos baseando-se em:\n" + " - **Picos de emoção ou impacto**\n" + " - **Viradas de assunto**\n" + " - **Punchlines** (frases de efeito, momentos de virada)\n" + " - **Informações-chave**\n\n" + "## Regras Rápidas\n\n" + "- Sempre devolver pelo menos 3 trechos, não possui limite máximo\n" + "- Garanta que cada trecho fique com no MÍNIMO 60 segundos e no MÁXIMO 120 segundos.\n" + "- Nenhum outro texto além do JSON final.\n\n" + "## Restrições de Duração\n\n" + "- **Duração mínima do trecho escolhido:** 60 segundos\n" + "- **Duração máxima do trecho escolhido:** 90 a 120 segundos\n\n" + "## Tarefa\n\n" + "- Proponha o **máximo de trechos** com potencial, mas **sempre devolva no mínimo 3 trechos**.\n" + "- Extraia os trechos **apenas** da transcrição fornecida abaixo.\n\n" + "## IMPORTANTE\n" + "- Cada trecho deve ter no mínimo 60 segundos, e no máximo 120 segundos. Isso é indiscutível\n\n" + "## Entrada\n\n" + "- Transcrição:\n\n" + f"{srt_text}\n\n" + "## Saída\n\n" + "- Retorne **somente** a lista de trechos selecionados em formato JSON, conforme o exemplo abaixo.\n" + "- **Não escreva comentários ou qualquer texto extra.**\n" + "- No atributo \"text\", inclua o texto presente no trecho escolhido.\n\n" + "### Exemplo de Conversão\n\n" + "#### De SRT:\n" + "00:00:10,140 --> 00:01:00,990\n" + "Exemplo de escrita presente no trecho\n\n" + "#### Para JSON:\n" + "[\n" + " {\n" + " \"start\": \"00:00:10,140\",\n" + " \"end\": \"00:01:00,990\",\n" + " \"text\": \"Exemplo de escrita presente no trecho\"\n" + " }\n" + "]\n" + ) + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": base_prompt}, + ] + try: + response = client.chat.completions.create(model=model, messages=messages) + except Exception as exc: + raise LLMError(f"Erro ao chamar a API Gemini: {exc}") + # Extract message content + content = response.choices[0].message.content if response.choices else None + if not content: + raise LLMError("A resposta da Gemini veio vazia.") + result = _extract_json_array(content) + if not isinstance(result, list): + raise LLMError("O JSON retornado pela Gemini não é uma lista.") + return result + + +def generate_titles(highlights: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Call the OpenRouter API to generate a title (topText) for each highlight. + + The ``highlights`` argument should be a list of dictionaries as returned + by ``select_highlights``, each containing ``start``, ``end`` and ``text``. + This function adds a ``topText`` field to each dictionary using the + OpenRouter model specified via the ``OPENROUTER_MODEL`` environment + variable. If parsing fails, an ``LLMError`` is raised. + """ + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + raise ValueError("OPENROUTER_API_KEY não definido no ambiente") + model = os.environ.get("OPENROUTER_MODEL") + if not model: + raise ValueError("OPENROUTER_MODEL não definido no ambiente") + # Create client for OpenRouter + client = openai.OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1") + + # Compose prompt: instruct to generate titles only + prompt_header = ( + "Você é um especialista em Marketing Digital e Criação de Conteúdo Viral.\n\n" + "Sua tarefa é criar **títulos sensacionalistas** (*topText*) para cada trecho " + "de transcrição recebido em formato JSON.\n\n" + "## Instruções\n\n" + "- O texto deve ser **chamativo, impactante** e com alto potencial de viralização " + "em redes sociais, **mas sem sair do contexto do trecho**.\n" + "- Use expressões fortes e curiosas, mas **nunca palavras de baixo calão**.\n" + "- Cada *topText* deve ter **no máximo 2 linhas**.\n" + "- Utilize **exclusivamente** o conteúdo do trecho; não invente fatos.\n" + "- Não adicione comentários, explicações, ou qualquer texto extra na resposta.\n" + "- Responda **apenas** no seguinte formato (mantendo as chaves e colchetes):\n\n" + "[\n {\n \"start\": \"00:00:10,140\",\n \"end\": \"00:01:00,990\",\n \"topText\": \"Título impactante\"\n }\n]\n\n" + "## Observações:\n\n" + "- Nunca fuja do contexto do trecho.\n" + "- Não invente informações.\n" + "- Não utilize palavrões.\n" + "- Não escreva nada além do JSON de saída.\n\n" + "Aqui estão os trechos em JSON:\n" + ) + # Compose input JSON for the model + json_input = json.dumps(highlights, ensure_ascii=False) + full_message = prompt_header + json_input + messages = [ + { + "role": "system", + "content": "Você é um assistente útil e objetivo." + }, + { + "role": "user", + "content": full_message + }, + ] + try: + response = client.chat.completions.create( + model=model, + messages=messages, + temperature=0.7, + ) + except Exception as exc: + raise LLMError(f"Erro ao chamar a API OpenRouter: {exc}") + content = response.choices[0].message.content if response.choices else None + if not content: + raise LLMError("A resposta da OpenRouter veio vazia.") + result = _extract_json_array(content) + if not isinstance(result, list): + raise LLMError("O JSON retornado pela OpenRouter não é uma lista.") + # Merge topText back into highlights + # We assume the result list has the same order and length as input highlights + enriched: List[Dict[str, Any]] = [] + input_map = {(item["start"], item["end"]): item for item in highlights} + for item in result: + key = (item.get("start"), item.get("end")) + original = input_map.get(key) + if original is None: + # If the model returns unexpected entries, skip them + continue + enriched_item = original.copy() + # Only topText is expected + enriched_item["topText"] = item.get("topText", "").strip() + enriched.append(enriched_item) + return enriched \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..32fd1d1 --- /dev/null +++ b/main.py @@ -0,0 +1,266 @@ +"""Entry point for the video processing pipeline. + +This script listens to a RabbitMQ queue for new video processing tasks. When +a message arrives, it performs the following steps: + +1. Creates a working directory for the video based off of its filename. +2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce + a transcription with word-level timestamps. +3. Uses the Gemini model to determine which parts of the video have the + highest potential for engagement. These highlight segments are + represented as a list of objects containing start/end timestamps and + text. +4. Uses the OpenRouter model to generate a sensational title for each + highlight. Only the ``topText`` field is kept; the description is + intentionally omitted since the caption will be burned into the video. +5. Cuts the original video into individual clips corresponding to each + highlight and renders them vertically with a title above and a dynamic + caption below. +6. Publishes a message to the upload queue with information about the + generated clips. On success, this message contains the list of output + files. On failure, ``hasError`` will be set to ``True`` and the + ``error`` field will describe what went wrong. +7. Cleans up temporary files (audio, transcript, working directory) and + deletes the original source video from the ``videos`` directory to + conserve disk space. + +The queue names and RabbitMQ credentials are configured via environment +variables. See the accompanying ``docker-compose.yml`` for defaults. +""" + +from __future__ import annotations + +import json +import os +import shutil +import time +import traceback +from typing import Any, Dict, List + +import pika + +from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds +from .transcribe import transcribe +from .llm import LLMError, select_highlights, generate_titles +from .render import render_clip + + +# Environment variables with sensible defaults +RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq") +RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672)) +RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin") +RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS") +RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render") +RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload") + +if not RABBITMQ_PASS: + raise RuntimeError("RABBITMQ_PASS não definido no ambiente") + + +def get_next_message() -> Any: + """Retrieve a single message from the RABBITMQ_QUEUE. + + Returns ``None`` if no messages are available. This helper opens a new + connection for each call to avoid keeping stale connections alive. + """ + credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) + parameters = pika.ConnectionParameters( + host=RABBITMQ_HOST, + port=RABBITMQ_PORT, + credentials=credentials, + heartbeat=60, + blocked_connection_timeout=300, + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE) + if method_frame: + channel.basic_ack(method_frame.delivery_tag) + connection.close() + return body + connection.close() + return None + + +def publish_to_queue(payload: Dict[str, Any]) -> None: + """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE.""" + credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) + parameters = pika.ConnectionParameters( + host=RABBITMQ_HOST, + port=RABBITMQ_PORT, + credentials=credentials, + heartbeat=60, + blocked_connection_timeout=300, + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True) + channel.basic_publish( + exchange="", + routing_key=RABBITMQ_UPLOAD_QUEUE, + body=json.dumps(payload), + properties=pika.BasicProperties(delivery_mode=2), + ) + connection.close() + + +def build_srt(segments: List[Dict[str, Any]]) -> str: + """Build an SRT-like string from a list of segments. + + Each segment should have ``start``, ``end`` and ``text`` fields. The + timestamps are converted to the ``HH:MM:SS,mmm`` format expected by + the Gemini prompt. Segments are separated by a blank line. + """ + lines = [] + for seg in segments: + start_ts = seconds_to_timestamp(seg["start"]) + end_ts = seconds_to_timestamp(seg["end"]) + lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}") + return "\n\n".join(lines) + + +def process_message(data: Dict[str, Any]) -> Dict[str, Any]: + """Process a single video task described in ``data``. + + Returns the payload to be sent to the upload queue. Raises an + exception on failure; the caller is responsible for catching it and + posting an error payload. + """ + filename = data.get("filename") + if not filename: + raise ValueError("Campo 'filename' ausente na mensagem") + url = data.get("url") + video_id = data.get("videoId") + # Determine source video path; n8n stores videos in the 'videos' directory + video_path = os.path.join("videos", filename) + if not os.path.exists(video_path): + raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}") + # Sanitize the filename to use as directory name + base_no_ext = os.path.splitext(filename)[0] + sanitized = sanitize_filename(base_no_ext) + work_dir = os.path.join("app", "videos", sanitized) + # Transcribe video + segments, words = transcribe(video_path, work_dir) + # Build SRT string + srt_str = build_srt(segments) + # Call Gemini to select highlights + highlights = select_highlights(srt_str) + # Convert start/end times to floats and keep original strings for openrouter + for item in highlights: + item["start"] = item["start"].strip() + item["end"] = item["end"].strip() + # Generate titles + titles = generate_titles(highlights) + # Render clips + output_dir = os.path.join("outputs", sanitized) + processed_files: List[str] = [] + for idx, item in enumerate(titles, start=1): + start_sec = timestamp_to_seconds(item.get("start")) + end_sec = timestamp_to_seconds(item.get("end")) + # Extract relative words for caption + relative_words = [] + for w in words: + # Word must overlap clip interval + if w["end"] <= start_sec or w["start"] >= end_sec: + continue + rel_start = max(0.0, w["start"] - start_sec) + rel_end = max(0.0, w["end"] - start_sec) + relative_words.append({ + "start": rel_start, + "end": rel_end, + "word": w["word"], + }) + # If no words found (e.g. silence), create a dummy word to avoid errors + if not relative_words: + relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""}) + out_path = render_clip( + video_path=video_path, + start=start_sec, + end=end_sec, + top_text=item.get("topText", ""), + words=relative_words, + out_dir=output_dir, + base_name=sanitized, + idx=idx, + ) + processed_files.append(out_path) + # Compose payload + payload = { + "videosProcessedQuantity": len(processed_files), + "filename": filename, + "processedFiles": processed_files, + "url": url, + "videoId": video_id, + "hasError": False, + "error": None, + } + # Clean up working directory and original video + shutil.rmtree(work_dir, ignore_errors=True) + try: + os.remove(video_path) + except FileNotFoundError: + pass + return payload + + +def main(): + print(" [*] Esperando mensagens. Para sair: CTRL+C") + while True: + body = get_next_message() + if body is None: + time.sleep(5) + continue + try: + data = json.loads(body) + except Exception: + print("⚠️ Mensagem inválida recebida (não é JSON)") + continue + try: + result = process_message(data) + except Exception as exc: + # Print stack trace for debugging + traceback.print_exc() + # Attempt to clean up any directories based on filename + filename = data.get("filename") + sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else "" + work_dir = os.path.join("app", "videos", sanitized) if sanitized else None + output_dir = os.path.join("outputs", sanitized) if sanitized else None + # Remove working and output directories + if work_dir: + shutil.rmtree(work_dir, ignore_errors=True) + if output_dir: + shutil.rmtree(output_dir, ignore_errors=True) + # Remove original video if present + video_path = os.path.join("videos", filename) if filename else None + if video_path and os.path.exists(video_path): + try: + os.remove(video_path) + except Exception: + pass + # Build error payload + error_payload = { + "videosProcessedQuantity": 0, + "filename": filename, + "processedFiles": [], + "url": data.get("url"), + "videoId": data.get("videoId"), + "hasError": True, + "error": str(exc), + } + try: + publish_to_queue(error_payload) + print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") + except Exception as publish_err: + print(f"Erro ao publicar mensagem de erro: {publish_err}") + continue + # On success publish payload + try: + publish_to_queue(result) + print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") + except Exception as publish_err: + print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}") + # Loop continues + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/render.py b/render.py new file mode 100644 index 0000000..539324e --- /dev/null +++ b/render.py @@ -0,0 +1,205 @@ +"""Rendering logic for producing vertical clips with dynamic captions. + +This module defines a single function ``render_clip`` which takes a video +segment and produces a vertical clip suitable for social media. Each clip +contains three regions: + +* A top region (480px high) showing a title generated by an LLM. +* A middle region (960px high) containing the original video, scaled to + fit horizontally while preserving aspect ratio and centred vertically. +* A bottom region (480px high) showing a dynamic caption. The caption + displays a sliding window of three to five words from the transcript, + colouring the currently spoken word differently to draw the viewer's + attention. + +The function uses the MoviePy library to compose the various elements and +writes the resulting video to disk. It returns the path to the created +file. +""" + +from __future__ import annotations + +import os +from typing import Dict, List + +import numpy as np +from moviepy.video.io.VideoFileClip import VideoFileClip +from moviepy.video.VideoClip import ColorClip, VideoClip +from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip +from moviepy.video.VideoClip import TextClip +from PIL import Image, ImageDraw, ImageFont + +from .utils import wrap_text + + +def render_clip( + video_path: str, + start: float, + end: float, + top_text: str, + words: List[Dict[str, float]], + out_dir: str, + base_name: str, + idx: int, + # Use a widely available system font by default. DejaVuSans is installed + # in most Debian-based containers. The caller can override this path. + font_path: str = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", + final_width: int = 1080, + final_height: int = 1920, + top_h: int = 480, + middle_h: int = 960, + bottom_h: int = 480, + video_codec: str = "libx264", + bitrate: str = "3000k", +) -> str: + """Render a single clip with title and dynamic caption. + + Parameters + ---------- + video_path: str + Path to the source video file. + start: float + Start time of the clip in seconds. + end: float + End time of the clip in seconds. + top_text: str + The title to display in the top region. + words: List[Dict[str, float]] + List of word-level timestamps for this clip. Each dict must have + ``start``, ``end`` and ``word`` keys. The start and end values + should be relative to the beginning of this clip (i.e. start at 0). + out_dir: str + Directory where the output file should be saved. The function + creates this directory if it doesn't exist. + base_name: str + Base name of the original video (sanitized). Used to build the + output filename. + idx: int + Index of the clip. Output will be named ``clip_{idx}.mp4``. + font_path: str + Path to the TrueType font to use for both title and caption. + final_width: int + Width of the final video in pixels. + final_height: int + Height of the final video in pixels. + top_h: int + Height of the title area in pixels. + middle_h: int + Height of the video area in pixels. + bottom_h: int + Height of the caption area in pixels. + video_codec: str + FFmpeg codec to use when writing the video. + bitrate: str + Bitrate for the output video. + + Returns + ------- + str + The path to the rendered video file. + """ + os.makedirs(out_dir, exist_ok=True) + # Extract the segment from the source video + with VideoFileClip(video_path) as clip: + segment = clip.subclip(start, end) + dur = segment.duration + # Background + bg = ColorClip(size=(final_width, final_height), color=(0, 0, 0), duration=dur) + # Resize video to fit width + video_resized = segment.resize(width=final_width) + # Compute vertical position to centre in the middle region + y = top_h + (middle_h - video_resized.h) // 2 + video_resized = video_resized.set_position((0, y)) + + # Build title clip + # Wrap the title to avoid overflow + wrapped_lines = wrap_text(top_text, max_chars=40) + wrapped_title = "\n".join(wrapped_lines) + title_clip = TextClip( + wrapped_title, + font=font_path, + fontsize=70, + color="white", + method="caption", + size=(final_width, top_h), + align="center", + ).set_duration(dur).set_position((0, 0)) + + # Prepare font for caption rendering + pil_font = ImageFont.truetype(font_path, size=60) + default_color = (255, 255, 255) # white + highlight_color = (255, 215, 0) # gold-like yellow + + # Precompute widths of a space and bounding box height for vertical centering + space_width = pil_font.getbbox(" ")[2] - pil_font.getbbox(" ")[0] + bbox = pil_font.getbbox("A") + text_height = bbox[3] - bbox[1] + + def make_caption_frame(t: float): + """Generate an image for the caption at time t.""" + # Determine current word index + idx_cur = 0 + for i, w in enumerate(words): + if w["start"] <= t < w["end"]: + idx_cur = i + break + if t >= w["end"]: + idx_cur = i + # Define window of words to display: show up to 5 words + start_idx = max(0, idx_cur - 2) + end_idx = min(len(words), idx_cur + 3) + window = words[start_idx:end_idx] + # Compute widths for each word + word_sizes = [] + for w in window: + bbox = pil_font.getbbox(w["word"]) + word_width = bbox[2] - bbox[0] + word_sizes.append(word_width) + total_width = sum(word_sizes) + space_width * (len(window) - 1 if window else 0) + # Create blank image for caption area + img = Image.new("RGB", (final_width, bottom_h), color=(0, 0, 0)) + draw = ImageDraw.Draw(img) + x = int((final_width - total_width) / 2) + y_pos = int((bottom_h - text_height) / 2) + for j, w in enumerate(window): + color = highlight_color if (start_idx + j) == idx_cur else default_color + draw.text((x, y_pos), w["word"], font=pil_font, fill=color) + x += word_sizes[j] + space_width + return np.array(img) + + caption_clip = VideoClip(make_frame=make_caption_frame, duration=dur) + caption_clip = caption_clip.set_position((0, final_height - bottom_h)) + + # Compose final clip + final = CompositeVideoClip([ + bg, + video_resized, + title_clip, + caption_clip, + ], size=(final_width, final_height)) + # Use the original audio from the video segment + final_audio = segment.audio + if final_audio is not None: + final = final.set_audio(final_audio) + # Define output path + out_path = os.path.join(out_dir, f"clip_{idx}.mp4") + # Write to disk + final.write_videofile( + out_path, + codec=video_codec, + fps=30, + bitrate=bitrate, + audio_codec="aac", + preset="ultrafast", + ffmpeg_params=[ + "-tune", "zerolatency", + "-pix_fmt", "yuv420p", + "-profile:v", "high", + "-level", "4.1", + ], + threads=4, + ) + # Close clips to free resources + final.close() + segment.close() + return out_path \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f5ce0c5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pika==1.3.2 +moviepy==2.0.0 +faster-whisper==1.2.0 +openai==1.16.0 +numpy==1.26.4 +Pillow==10.1.0 +unidecode==1.3.6 \ No newline at end of file diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..8cb4739 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,111 @@ +"""Utilities for extracting audio from video and generating transcriptions. + +This module handles two tasks: + +1. Use FFMPEG to extract the audio track from a video file into a WAV file + suitable for consumption by the Whisper model. The audio is resampled to + 16 kHz mono PCM as required by Whisper. +2. Use the Faster-Whisper implementation to generate a transcription with + word-level timestamps. The transcription is returned both as a list of + segments (for building an SRT) and as a flattened list of words (for + building dynamic subtitles). + +If FFMPEG is not installed or fails, a ``RuntimeError`` is raised. The caller +is responsible for cleaning up the temporary files created in the working +directory. +""" + +from __future__ import annotations + +import os +import subprocess +from typing import Dict, List, Tuple + +from faster_whisper import WhisperModel + + +def extract_audio_ffmpeg(video_path: str, audio_path: str) -> None: + """Use FFMPEG to extract audio from ``video_path`` into ``audio_path``. + + The output will be a 16 kHz mono WAV file in PCM S16LE format. Any + existing file at ``audio_path`` will be overwritten. If ffmpeg returns + a non-zero exit code, a ``RuntimeError`` is raised with the stderr. + """ + cmd = [ + "ffmpeg", + "-y", # overwrite output + "-i", + video_path, + "-vn", # disable video recording + "-acodec", + "pcm_s16le", + "-ar", + "16000", + "-ac", + "1", + audio_path, + ] + proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if proc.returncode != 0: + raise RuntimeError(f"FFMPEG error: {proc.stderr.decode(errors='ignore')}") + + +def load_whisper_model() -> WhisperModel: + """Instantiate and cache a Faster-Whisper model. + + The model name and device can be configured via the ``WHISPER_MODEL`` and + ``WHISPER_DEVICE`` environment variables. The default model is + ``large-v3`` for best accuracy. The device can be ``cuda`` or ``cpu``. + A module-level cache is used to prevent loading the model multiple times. + """ + if hasattr(load_whisper_model, "_cache"): + return load_whisper_model._cache # type: ignore[attr-defined] + model_name = os.environ.get("WHISPER_MODEL", "large-v3") + device = os.environ.get("WHISPER_DEVICE", "cpu") + # Compute type can be set via WHISPER_COMPUTE_TYPE; default to float16 on GPU + compute_type = os.environ.get("WHISPER_COMPUTE_TYPE") + # If not explicitly set, choose sensible defaults + if compute_type is None: + compute_type = "float16" if device == "cuda" else "int8" + model = WhisperModel(model_name, device=device, compute_type=compute_type) + load_whisper_model._cache = model # type: ignore[attr-defined] + return model + + +def transcribe(video_path: str, work_dir: str) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]: + """Transcribe a video file using Faster-Whisper. + + ``video_path`` is the path to the video to transcribe. ``work_dir`` is a + directory where temporary files will be stored (audio file and + transcription). The function returns a tuple ``(segments, words)`` where + ``segments`` is a list of dictionaries with ``start``, ``end`` and + ``text`` fields, and ``words`` is a flat list of dictionaries with + ``start``, ``end`` and ``word`` fields covering the entire video. + The timestamps are expressed in seconds as floats. + """ + os.makedirs(work_dir, exist_ok=True) + audio_path = os.path.join(work_dir, "audio.wav") + # Extract audio + extract_audio_ffmpeg(video_path, audio_path) + # Load Whisper model + model = load_whisper_model() + # Run transcription with word-level timestamps + segments, info = model.transcribe(audio_path, word_timestamps=True) + seg_list: List[Dict[str, float]] = [] + words_list: List[Dict[str, float]] = [] + for seg in segments: + seg_list.append({ + "start": float(seg.start), + "end": float(seg.end), + "text": seg.text.strip(), + }) + # Each segment may contain words attribute + for w in getattr(seg, "words", []) or []: + words_list.append({ + "start": float(w.start), + "end": float(w.end), + "word": w.word, + }) + # Sort words by start time to be safe + words_list.sort(key=lambda d: d["start"]) + return seg_list, words_list \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..c8f9dbc --- /dev/null +++ b/utils.py @@ -0,0 +1,93 @@ +import re +import unicodedata +from typing import List, Tuple + + +def sanitize_filename(name: str) -> str: + """Return a sanitized version of a filename. + + This helper removes accents, converts to lowercase, replaces spaces + with underscores and removes any non alphanumeric characters except + underscores and dots. This makes the directory names safe to use on + most filesystems and matches the behaviour described in the spec. + """ + if not name: + return "" + # Decompose Unicode characters and strip accents + nfkd_form = unicodedata.normalize("NFKD", name) + no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c)) + # Replace spaces with underscores + no_spaces = no_accents.replace(" ", "_") + # Lowercase and remove any character that is not a letter, digit, dot or underscore + sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces) + return sanitized + + +def timestamp_to_seconds(ts: str) -> float: + """Convert a timestamp in HH:MM:SS,mmm format to seconds. + + The Gemini and OpenRouter prompts use timestamps formatted with a comma + as the decimal separator. This helper splits the string into hours, + minutes and seconds and returns a float expressed in seconds. + """ + if ts is None: + return 0.0 + ts = ts.strip() + if not ts: + return 0.0 + # Replace comma by dot for decimal seconds + ts = ts.replace(",", ".") + parts = ts.split(":") + parts = [float(p) for p in parts] + if len(parts) == 3: + h, m, s = parts + return h * 3600 + m * 60 + s + elif len(parts) == 2: + m, s = parts + return m * 60 + s + else: + # only seconds + return parts[0] + + +def seconds_to_timestamp(seconds: float) -> str: + """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT.""" + if seconds < 0: + seconds = 0 + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = seconds % 60 + # Format with comma as decimal separator and three decimal places + return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",") + + +def wrap_text(text: str, max_chars: int = 80) -> List[str]: + """Simple word-wrap for a string. + + Splits ``text`` into a list of lines, each at most ``max_chars`` + characters long. This does not attempt to hyphenate words – a word + longer than ``max_chars`` will occupy its own line. The return value + is a list of lines without trailing whitespace. + """ + if not text: + return [] + words = text.split() + lines: List[str] = [] + current: List[str] = [] + current_len = 0 + for word in words: + # If adding this word would exceed the max, flush current line + if current and current_len + 1 + len(word) > max_chars: + lines.append(" ".join(current)) + current = [word] + current_len = len(word) + else: + # Add to current line + if current: + current_len += 1 + len(word) + else: + current_len = len(word) + current.append(word) + if current: + lines.append(" ".join(current)) + return lines \ No newline at end of file From 7ccc745a5db38ee56486b1509af933650809d31a Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Fri, 17 Oct 2025 09:32:06 -0300 Subject: [PATCH 02/15] Add git ignore --- .gitignore | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d4d72bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,98 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +*.manifest +*.spec +pip-log.txt +pip-delete-this-directory.txt +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ +*.mo +*.pot +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal +instance/ +.webassets-cache +.scrapy +docs/_build/ +.pybuilder/ +target/ +.ipynb_checkpoints +profile_default/ +ipython_config.py + +.pdm.toml + +__pypackages__/ + +celerybeat-schedule +celerybeat.pid + +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +.spyderproject +.spyproject +.ropeproject + +/site + +.mypy_cache/ +.dmypy.json +dmypy.json + +.pyre/ + +.pytype/ + +cython_debug/ +.idea/ +.vscode/ +*.code-workspace +*.local +*.mp4 +*.wav +*.mp3 +*.srt +*.vtt +*.json +*.csv +*.xlsx +*.db +*.sqlite3 From 2b99d2ad78b808d6b53b6679ef3a1c094b9677be Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 17 Oct 2025 14:32:36 +0200 Subject: [PATCH 03/15] Eliminar .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 38734ca2de71d90578b12a191d5ff30a57f26d5c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ8Hu~5S@u#2;8`IxmU;y7U7)02atb(6aoc8igc=cE+5TrJ{W}TCXgn)ftj~E znx|!7q0xwlw%_Mhk+q04a6`FRn43K}pV>=h6bQ#VPI7>M$h&m2>c}_IdtuEgkdX>d0V;4;z`hR!Zden?K>u_g_y_=8 zBJGB?&l13531Cee1CfDgP=P_!95FQL$d|0EiDO{UMRWMjJXv!>Q9m8;FJ3NM0~x6R z6__fpi0#_?{~P?t{68geM+KA!1r*gxx&q`b_#;GW1zQV hY^)t`yeR65t?|4jj)6``-swR8445u7D)83|+yF)|6_)@2 From b090f7c2cbdd6b95df298a0dd3047997e97f1491 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Mon, 20 Oct 2025 17:56:36 -0300 Subject: [PATCH 04/15] Cria novos components --- .gitignore | 98 ----- __init__.py | 1 - __pycache__/llm.cpython-311.pyc | Bin 11635 -> 0 bytes __pycache__/main.cpython-311.pyc | Bin 13063 -> 0 bytes __pycache__/render.cpython-311.pyc | Bin 8779 -> 0 bytes __pycache__/transcribe.cpython-311.pyc | Bin 5529 -> 0 bytes __pycache__/utils.cpython-311.pyc | Bin 4567 -> 0 bytes docker-compose.yml | 40 +- dockerfile | 4 + llm.py | 234 ---------- main.py | 269 +----------- prompts/generate.txt | 35 ++ render.py | 205 --------- requirements.txt | 13 +- transcribe.py | 111 ----- utils.py | 93 ---- video_render/__init__.py | 4 + .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 186 bytes .../__pycache__/config.cpython-39.pyc | Bin 0 -> 4948 bytes .../__pycache__/ffmpeg.cpython-39.pyc | Bin 0 -> 1516 bytes video_render/__pycache__/llm.cpython-39.pyc | Bin 0 -> 5533 bytes .../__pycache__/logging_utils.cpython-39.pyc | Bin 0 -> 505 bytes video_render/__pycache__/media.cpython-39.pyc | Bin 0 -> 2017 bytes .../__pycache__/messaging.cpython-39.pyc | Bin 0 -> 2939 bytes .../__pycache__/pipeline.cpython-39.pyc | Bin 0 -> 7889 bytes .../__pycache__/rendering.cpython-39.pyc | Bin 0 -> 10198 bytes .../__pycache__/transcription.cpython-39.pyc | Bin 0 -> 3819 bytes video_render/__pycache__/utils.cpython-39.pyc | Bin 0 -> 1284 bytes video_render/config.py | 103 +++++ video_render/ffmpeg.py | 54 +++ video_render/llm.py | 187 ++++++++ video_render/logging_utils.py | 13 + video_render/media.py | 64 +++ video_render/messaging.py | 85 ++++ video_render/pipeline.py | 236 ++++++++++ video_render/rendering.py | 406 ++++++++++++++++++ video_render/transcription.py | 122 ++++++ video_render/utils.py | 38 ++ 38 files changed, 1391 insertions(+), 1024 deletions(-) delete mode 100644 .gitignore delete mode 100644 __init__.py delete mode 100644 __pycache__/llm.cpython-311.pyc delete mode 100644 __pycache__/main.cpython-311.pyc delete mode 100644 __pycache__/render.cpython-311.pyc delete mode 100644 __pycache__/transcribe.cpython-311.pyc delete mode 100644 __pycache__/utils.cpython-311.pyc delete mode 100644 llm.py create mode 100644 prompts/generate.txt delete mode 100644 render.py delete mode 100644 transcribe.py delete mode 100644 utils.py create mode 100644 video_render/__init__.py create mode 100644 video_render/__pycache__/__init__.cpython-39.pyc create mode 100644 video_render/__pycache__/config.cpython-39.pyc create mode 100644 video_render/__pycache__/ffmpeg.cpython-39.pyc create mode 100644 video_render/__pycache__/llm.cpython-39.pyc create mode 100644 video_render/__pycache__/logging_utils.cpython-39.pyc create mode 100644 video_render/__pycache__/media.cpython-39.pyc create mode 100644 video_render/__pycache__/messaging.cpython-39.pyc create mode 100644 video_render/__pycache__/pipeline.cpython-39.pyc create mode 100644 video_render/__pycache__/rendering.cpython-39.pyc create mode 100644 video_render/__pycache__/transcription.cpython-39.pyc create mode 100644 video_render/__pycache__/utils.cpython-39.pyc create mode 100644 video_render/config.py create mode 100644 video_render/ffmpeg.py create mode 100644 video_render/llm.py create mode 100644 video_render/logging_utils.py create mode 100644 video_render/media.py create mode 100644 video_render/messaging.py create mode 100644 video_render/pipeline.py create mode 100644 video_render/rendering.py create mode 100644 video_render/transcription.py create mode 100644 video_render/utils.py diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d4d72bc..0000000 --- a/.gitignore +++ /dev/null @@ -1,98 +0,0 @@ -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST -*.manifest -*.spec -pip-log.txt -pip-delete-this-directory.txt -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ -*.mo -*.pot -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal -instance/ -.webassets-cache -.scrapy -docs/_build/ -.pybuilder/ -target/ -.ipynb_checkpoints -profile_default/ -ipython_config.py - -.pdm.toml - -__pypackages__/ - -celerybeat-schedule -celerybeat.pid - -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -.spyderproject -.spyproject -.ropeproject - -/site - -.mypy_cache/ -.dmypy.json -dmypy.json - -.pyre/ - -.pytype/ - -cython_debug/ -.idea/ -.vscode/ -*.code-workspace -*.local -*.mp4 -*.wav -*.mp3 -*.srt -*.vtt -*.json -*.csv -*.xlsx -*.db -*.sqlite3 diff --git a/__init__.py b/__init__.py deleted file mode 100644 index b437409..0000000 --- a/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Top-level package for the video processing pipeline.""" \ No newline at end of file diff --git a/__pycache__/llm.cpython-311.pyc b/__pycache__/llm.cpython-311.pyc deleted file mode 100644 index 36d44a6197b1726f4c07bd4fd0b04731d74e0161..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11635 zcmd5?TWlLwdLCXyQsU@B*>Y~X6Du2=j6_?`U5>SpEZJ6KNnTkAHp;rxh%*#N8O|^> zL)lWOKpX6X8@Nc&E)eexRkn*wO;m-3m+luo4s+ZEWu-W*%4@PJ z6lJw68>Udu4MEW?*%0%VqOAy56{{#%MOip6mlRDAL`@Rr%Ca`ES5Qe9zcg)T6N!bQ zVhSZ)s;IIcYk9G3R#efFO+i#uK`$@?s=lJ+1x=PESrRN=$Ws|fzIbs44@qSm?V5>- zNySA&udEc&2CE^K%c_zWEk)O|!n7rzj>!tBC*#w?rFE;QYeHUCWDTX7gq}fiqNwYV zuq@|AJRmIV7|HENAzv%&sNqN(&g-Qz+FHg~yy0Edg+fJxMsy8hGB8N?gfyB+WQ3(9 zQ?l`BG>D*i2VX-TlemuUQ&aCv@VSXEw<1=BJ}qsM8?mLY0p-cZUUk~AbH1Yu2) zWIfGPY{*u{ptb~6F)ga#3m5d|RkUV0Q)619VX;S`LddX`l8k$$GPO;LlodnB>xLnl zWnGh~1D1TvYFDGH6-*#3!O|fHJ<%+sur6FJD*2*NMkA&-k*u3c3nd+Wqp_*$G^4V% zBCg07hJ2MwN!K9A)bom{3ME-m#4JmMXWX>KleiLupqDM7wrniY3|C ziL4puFD!`Jt%$j?^zs4*j2S5?vMLR^Bg*TVB`Tz0>ZGih%(~E1)+^=)G*2+8a=WhD zjo)jBsg5L-<-CH1BxpfoCc3mVcWG*Ne(t%2srlT@+~m|nbVb(I6hqfaWUgzXp@=Xx zxX}ceXuF|d-W12Iq2Q9J!at!)0iIE=U`h&xUSiF#4wzYCp(tZ6jP(S~$2FK)-f^LV zVqmVGxjZ+EnKQ(7j1YbTS8qNI4U>()w@wQR`K)Lq3ZkN#Loh>0cFlE-j6W|sh6}GZ zLD{=)2`VWe2>44oSYMO4b88c_oAv-j#t=r*AC2!f`i}07c8wA-`!1)pQ zvJV$;aWc0UK$QICj)=8w1gv%e$(; zCCAZf6v9gN<)7j1TU?dfrn?6YtUkeaf>>Gb?;P9SMccOoWwzJb3|Nrw9jpHHQn(th z{9hcm>ep&t!8U`{pxJH3Jqa9Ex_8jm5ByQQ7N`cd-Bq<&i>{<-d(F*IoU@Y6I;$Pt zbv4vp^D;NcSzTT!dPnMK#{--(uo-UaPe(QUGwv6`b_uTqq}XQo)$rB8Rqk5o1@3Af z9TydJdg>e9ksMic*9e3H0SAl^ZlsrF7S!DUNK20@YNDkZaCFQw>=9On5NHTuf!1Vl z7;<#-NyTK6oYmb~H$@0q!SZa7B-pk|D-i6spg8LVE+|x(m1TFqVu67AgUWIVk&2e3 zmPm43$;w&uoUIWo?jC#bn71Uk%ha(e$I=u>S#P{Z&H`-P(&R4W&I(6C$gJe^=whLw z4haa3vT+sB#t}lENsHL&o&vWjkodcGQ$bBVV+mlXDB>7cVj|wj(RZZOR5YvK>)%Q8Mjr zdVZ4R$t(_~BSsGucjQ%B)MUwyAgDl2jE0mWw+MkFM`ocwNp7a;OAI17Uqqk zC^?3I^KWt5;`aKv?$jIV>*~#iw}W+lu)z<0#y|8a|ImBqKAx!ak2mHU;B)?PZQ%QL{)GnrLM`^fZg>A1ue|=s&5=KU z^~S4P7xsdo$m!3!Q#Uql9ilSf zAFahk|A=ERgqrv^kD%B8bSgCwntjhh2lj6S!ZAPR-H7s@hJScTlBlNuz+>FM# zYG^g;jj0*_w*BztmC<)KSdD&|+}BQr_5ea7Eu!wh0%FWMZ_XXv$nb)PSWtH#?)Hxt&k zTS^m_(EQat=6$X7wadZslR*BE5`PT?I_sesHI#y~tWMzDY?15-rl13? zD;9c{O$_4!w(MetDkX8q(JUhvkb79LLZ<2!88eC=U|>6M8w%v=6=qAgA~>$fR0QB{ zW-}Qk6xO91nv4PYtF(Mg1n_0L(P*8gOdLi6e1SzA7qY4&1FEnFGMORh3;Kjjx?`z8 zQb}J!0;P%2j;QXuUlO3%GN7w7l(Tf-8E{kCs3CztL2nI=6%ChmKGKwuEO?1bqz?kSvYDTgiMbDS2oR&ti5QGlXp@plI@z4r6dCJ+By; z99E4~0bJ%6T>^0fS_WYb%06B&(1;_hAS;$W1lo#h#%Thm9AeV0&}nASG&6rEo5T5{ zxF$m?4QgIqfgE9e=k2nBW@#|)w80j1m5hj)8ze*U!=9J8)=ueXMM3B-De&Xc4l@gg zI4@!`z&w#A^DVYkP}m(fD6D8^=PzcbXXXx?*UZijzfT3N=0)~2D{Dnq0vH42trSfW z1w7kkSv;bgs5+B15AMiNoB@-{Cq2WY5oR*}PuIwi8ABwC0I)QgOnAyD)ZAkG2Ogx+ z-toaFPr{>HN5Is*!0eLxaEZKGgU#sZj;lH9XlocEZ$XHp*ofK1G*QfAKIKe~=s2U} zrT*N&pCEsVFdCQ}%$h3_Ps5)yk*w!H&~!{4CLoB*qH+xZkE~;Q=F;5!!uagM6eP9= z4L?xaK1^Uuyi=S2b}~(!JQFBQG4mCR;TP826ag4XB=sz~*#z?wkdu{vL2AM(T^6wb zusUAkgjSg4!=|Bgq8TwOz9yRyfW@zpA5Y0-hMK{iSSBbJq1DrXAM%MYlGnroah>=N z7&L$?Ge(>Y(FyCbkOKQ#uE0i*Qt8nlj5n_$Gq=5XpW8wM_U^@ zI^+~0#qo-~jLIuwgT`ZjQV)(o+-BlR0yuLWNkIuIMA*<(8MqO7A+%(NL3|tOAn+qm zyLQYY*Ea6+P%NTa2>@0ks#|b!R{&WD>I&n{#T6#*X0~gxqJy;D0H0eqfz)Zv!Od9B z5%(z-LFDr*v(+6|i?!b|v24264(g`e;ZmS>XhpW{#PgzBaj>r)brK>w>d^6a7+tjN zPA3gu47?qMVSzEU<5M0J=|Jlaq#_FV0=vTc5;b7+Fj;%L(-@>u zY}0VYNt_^-F|BseQ6~{`r81gk*_iXg7@>+WmUDn9HSVW}sSzUDu<2o$1Z>Cz+{1z9 zy$5y(j49MRgMU+ni*9jW9p<`wZ>+wdy{^^n8?N)&2A|zJzuVbIczFt-`$+Asvvq#5 z!B5s=lZ4Qpd1LnV*_&gvhtAZy#u{B?TT@@fIKKbJmF>rCkLtB#xt=UHlI5*)d$DNz z@x2fi>DxQGxBQ$C@Vw^Pl?ItPMPc zPn|#0;Lp@zXFQlcMsPpMJvP>ObgIVBT`xBHZ{2*k!QbED(+z&4#*h5=5nmCf9nQe# z3O-K?+XMAvx{*w8o%_7=(3^!{NcGOaM&}^4|BS;`T&VNY4Su>7o8Im0-I{SYh0*&b z?&WUXo0vEgy7ge+#1o-gPegESC;WXDGmnY!55nLfqD%i1=l^f~K|3yWiPH|h?;Q9A zjO7D=Jix~-wCbP2FWiAV<)qk$@pjC-8L5V=kqtDy8I=-Jr_==|B3X^nH<*YH#zgq6 z!>ZqpZMJdR?6Y$PCZa>)KkVK|4aXQ4K@^&MFfJl)HOT``>Z!(W%SH5V#$S!Ma1pU4 zy8tet<(Jgg6t~KIEv*vQ#tv{1y_<<@0xUtZA}6H|==y%|2{0>(_R`Hxi-M4QR6DH) zylakY*XzDk`rGBa7FgjnyQD)u0+Bz-y#pa1_<}Z*)y_%o%FRu_%HM(EI@~Vpj`arG z>lK2V-PPpnWmdb}>u&Z`d$s~MCaXQ$DZgti7yJ6$d&fQ;k?v~O%yxf^82_?eUbR;` z@)XLdUE3|Zkzbjs?qiSw+>M`9yTI<;E&e^+#%Fi-?fxN8@wPJs;27G_#Qv_c%2ij#= zS}3C1^DKCpvJQ_tXIuH+9whLwV5qfCy! zJt|BW7~?~Yc-xZNezwQkj%1J8eJxwB9@{g7Nk4e|8yKISmfDQPbw>*R=d1t`^;Xr-z zGO=S|6+Etmuz?GKkZKtf2S^eynRAnUhJcVap(`x&w3IbKa0kr;`w|vmL=YeDo zG(^JYM0PpNqg4ZMlF^}n z;O?9mz}{H}tinOq)w;_?66a~h%ORb7&kM6Z{REVD zPBin1Vmw3vfucfdKxpB8cRoRa zM64O|#{IVie$8O*gTDvH+rVG5hR_NCA4aRU2Y@XP90Y*}Qfs6HfiDS!y~Qa=VXmZT zlTG59c@=&In!gIcYT&PeH~O(a1KcvOjV)14GH9a+w}RP|b{OPZ$)p`UcZ2ln#8b3d z5s)0)81&X{_cf&KQ>qTPHpjBfO8<;!&0{!iakZXjozHK=Z$9wm)gKRU_ixL;x~JAV z{GR2PId5Rh-~nS6C%eOqrv{}Q>BGh;Dj|&-kI?B+Iz2|G$LU1btR0amcy-Hof^IxX zr%^hcrW57N2IVjY!Nw-CHOA;C?Qa7r>*aOQ2VN|*qjohw_5@O!FR(6~{mn(^7;}VsG*dB&% zV5tJLXeYgjIW{A-gGfnG`{Cf^?tQaj->cVf1wt;@VSSQYxfM-yRwb0tiO$Y-l;oP=g&3xbG6vH-z8HHa=u(kF4U6?jpPC$ z=ic}bL(X4ixW2c&R}$ZM`R+aP-9*CE2Q z6M@z>|KRbh3%krm{xJc6sP{bE=y`T)dbj8B9v6=sW2fukUA%j6?e%Ln zJL=s+qg&X1;N9cDJpSIJZ=d+pi5h?EdT2Lw?^|d7^2~Osp88HB^_}an-Og04^PVrd z`ge~Af1Uhk@+aLt@4lY=V({3zPyF(U_ohEK>Vu<=!O_nKpZ;|4>H6SAV{qbn@=c{4 z6Lw=gZ*;%jeKT2)J=lmnSYscE+nQte|JJvUo(*z4!Nl3Y&`ww1*>8n*z7@fBI?Ud@ zFnVEg23ab5aSC?{rM288n1;K|v*_BX%kK6Vv~Nu>eHAJca&tKY*`4FxP6(iv8tkaG zUPda*>hk*d0;Bz4O zPg2#hZ;Ch_f((Z}Jw4sk)zw|qUsdlfT`oHX*S&vyGWFlx6!jnYQo0(A4E@d728y~% z@zfZ_(|kjmPBe@)(3m&HO=BkVWX2fsG>@6dvvI7EJS}4ucrtNo!Zv10*vIS%$C#to z);Z?n&3t3rm2i)_6P__o!aL@LZ)V;S_a%H|K9aY_{fWR>AQ2o3CYr{YNSQ6(oM;(q zp(%m694fJ!3zu@$Pu|WuJ_PenYWH`Q|SW18Q@hu&t!I{3|e8$3Jt zZk~l_7vIBoz;hGd`8GASS?K1wt`KQUPN1{-LlD=;C81F;f2I-gABk}PW2UH~q#|8o z(^ z#d7RfZel`IPMl%i%m^8Q&`JtdSJP5B`HI}fUYHV+c8*Oz58R}{a*`xY3vw?jDr{Pi zfRhB!1zs+t;_=i~WCHq1%X{qh9ewN}N#GPghF-6xq$~K57bPL4q+lqugP7oCfoD^f zF2O(0mf1^UTu5>W7|IKM?9hxNaWN>xp12GzrdTYv!d?}XDfaN;6Q_rcuw0U7r3?)7 zFb6D1+b)0vfJGcNh=|X`1Ut(DE6_4&2u4gL4GO>=zb!6I3vpHvVWFo7@= zipa&u_=V&o2sHsEee5WF7f8K0)Kyqnn8XhtJc&?{gtP=Tk_yO)Ly_XFVuz`T%OFyj zjir(b2RxD~0B*JmAdt#%P(H|sBFrd#_MKHjJCzoaXHyv%Pi1(MLQ;@m?w}v&Q|1(q z6)>%cO59j80vDSq4U90{$DT^YuaV)8k1MJ4DD*Qv4)Z9)c^1~<6(OzcC7*erB7RXu zBCx1YD%b^(NF_uCM+r+hMg+*lxHNu+$%@C>34xu+NJ*#w&0ryv6rs<)3szjFxJpuz zI4L5_hLVt~Vv-l(H3OxwxR^$fNg&g73d%uhKy1~hj8z4a6eO|^66~}fDPoK;N>(2% zC?XVYA_dcpi;!daYp`_17!Iw(`h)=L(P#gDAA3480aFLF4HZjjRUAhq9Zzw5Q8Nj3 zF^MWh#Lq$x8KtzKN)yBzLqX}YawdkBpcm?Zz)Q4?>%v&ZFI%y5#3#;c;?*DxuNSeCAH1yQEw`LI(Pc` zse!?0rG}wh8NhE;xe}6Nlb`)H8uUwGJoyxxgk3T*A;Q!NdZQ#LU~41>+Qi_$d|4o8 zqA2>CpM${fQc4-_P}JpeUcqL?ODV!ce658Nv$d_uZ4?4IjpmB8NCqBR0x$2IJ_JW#Y%Y!I-gfnnvQQz8$$q>wJ|;J{#|K2V7R@aiLE zzTO9t`MMY6{*hT{rcyE}KNwL(cT|}r-8Llv3YY){rngSS(Zul43PEB5U_Ut-aYJMA z3a5Kd#{oV7WGfjqDFEF_O_1opLuJlSa}t0V$cngo;*El#Dh z1Dc>{<=;XwM}6z00xg=q`&X{NpH=-snty2S)n#9k=G(IDYK6NH+*t4j3!R%+9F6|Q zCln-qz+`UtTQ}uz)_gsSt*URQ=G!?pTyTdTx!EN*tGYWhcgNh3LU-Sz?*66j{(st_ zb|2NckE%@9-0@qzIi_F({Tx+Jm=rGSVXnbR}NmQ!*>&sH?O$nE2@?0^eX z&STWBP3ui0ZwIsIxIR#Xn_z{8M^2sG20#ZuizuV90;h#63p76nqs!*6dh6_yTA?Fp z9VTs%j7A!CGw~gD3!j0#JTBRTFR&bpEW3O+ z#}+KAi`880GT5#__X92;=vM>%TA+W$MwwfmP-Zh&DM(gG;-t);I=ca;18(UB9JaOO zg?<-AboqL%`FKM&fhnmmABY%;`OqM9mztzz=`1x!y>V)`Mvb!#JYCU~+9f}0s#XrD z*|26!CQIk4jTiK3Tv^jkA$XEyAZ37pxkBNm)cfYz{%GpvX}&R64t$kp;Ike*fl668 z3v_ufBZ6DYCE1a)qub)*6#>E?5^mfCOH`cmlKhHTVhDHqVi_w>r7}Q-gbBvSA?P51 z!;Ib+U}u0C=x=~0UFAlDn+8z=0s+dXz3hIVUkctwQI*HXj~?A~;>4bjk={fi0n`jP zk`-psum?iDiE9R(N@xSLF_DHq#7I@(0eHn-*k}MIWFpQbuR!DhLrkb0x|y)8n}Fr( zW_H`Q1MGLSU1yN%>kW2$#3bR0&>OilKp`G<5&~GDc0*WbMmjRAAz>008V*0I$a*8x zgJ-=0+9puqQCU)?5omk`{^eOn=BO1nAYz<(7k9{JaXTu26kzI zUGvsu-=fDA2NW z@pYTrk3pMqrH?gF-c-A)0q3t(lc}p|s`gVZmwV4KH60XRcr)k(=t?C}Jj<-n-u$fg z*}C3kzf#^LVFLuu0W@^gsMqMB?B!JVRIjtVrK0__X2n;jo2|11vyE%?^K402K^Bc4 zg1kO1%WT%7;O14lvP{-eE2|%Xi~PYZ`PT0 z^0vyza%G>Y)DoYypa5GSzJ;g#Pi|?)mN)k+2n32()=|fs>raxO1Eg2GlH9Ogc$D=e z`oO7+ciyi%UKpfq(Qkx_B)zjv1vl^Fm38Jy*_tPBtL3xyZPxc(H0tJ|j)osN4`gjH zGx6DAHdrm+tnUihqhu$PSIt<~e}!m>MBiMev{xAB-F5YR*C7&6C#j}YEvjZT`>Ynt z*+8yp?N|ECHf5WkM^9x%@ZNiM)&%y*L^dF|twjTP_8YzOzGt;oHdXbXz$7P5=+miLHF`|~**`Z;@fQ-AVc<|KL$kJI)wjpccgM1U; ze9tl)QX-Ywm&@9kweT(XL$&-S1>u4(J?WC$>IbD=wqN$5ilDSIF zm@ZOR>1+rvM)>+;fT|eFWw}zY!U$*LsSJdwFn~S*ySSJHh=AMA_;_8k0`YtmyoWd} zo*k6nL3NO>vZn)LhfqDnC@@T6#DtB&J%%bEssZs0BWf;-=tN*7BgQGQ zDTF_8ECjfP(1BPc;(Y{3Bs^SVr-&S^rNHF%u0wF#l483{1>JDOLk>FFBu2>L^a0LD zbW=u(>lPw~W4zvI1amW;Am{<<&5SsW5j=MKvtQwn7zyYJNeCK16q8TwVI@R&x(QH- zbPRIRFuq#AA%P&cEa5g1p~;s?$dXHE@K6ZwOS!)E1Nb1l1c`1bo^M2~x}7i*O>@dr zrK)a)3Q+_>x*5sHq^~H%ok{RRh+;%pH;Mer;gcU(^cG_<#VHr~GZ_v-%gQzBJa*;= zV1`RU;16SjT)Kd4yoO(_r7*p2CK4X~2s|)xGMSQuC@GL5ExHN99J>8H7taVr1h*k2 zL&OL9J&8mbg&9EhvhG9bCsWGdR3^zAHLY?wF5)O8R5}S!4FO*g?$`+Z;B-+!=T)}} z$xMPA7)cnl)J+L)MmmA@(O1{h zk&3+ZXV}<*XM@pXDvIK?mEzjGZpAZ2lqo@z(m2+&06vTYU%I_2KFz=w5_+Q%oYw8e ztdq&`ltoLzz-DSe95Q2cd32o9o#4B%lg{ZoR>GXKu_a*i}!b=INX}T5v#sh&B(< z%kEG<{IcqPMRUKBx4u%ayWj1((Q|9pJKJw=pF0F$A=1vKdO9>u$K27ce1SVHciV2a zEle$14GcUtyzFq@-2Lu>8wVD)sE$pVV-u1d1=1Vy?AcE*Ljt$z9?{$*dF#km{@|VN zNB)i_e@Cug_4jK2-o-PTe>()myzRNIs<%(`_RSqz_V{jIeK&g}yD*gNU%aS#c59y9 zz(6ybYh8RzZT`O2{QV!Dnmal#eC2Lgu&VBM&D}nCYqQh+#Fu$TzE6rbZ=%cqHcUqZQiLh@0>qUaJD{j zwl6u`3*nAJ%Z5TIys-J6ZNqPjo=}X-^9M}ewUjt1?*^D&+ZbJLzIf;p%cl{w z|JD4t3;B-M;8vX%HRr{=^Wt(~L!qU;;9pnpwiKGecVD~x+Jd;~SDSjZrrv^|Ed)0d znl~1_ffb)EXn#UM@&`=jtt&yw7J6jeuw>n^Fr!*~G;2@Z+5?$y8>z1D+*@krZmn~7 zA=p-EjTAioLSS7X7%uplRytS4Y2Iy}LBqaEngdu<8W~qIRKB^>A>4mrQHgFPQ-YPC0P@>>~%M3p(c{aDtrep^?HO>y9W63s3 z&DBu;a#_OJMu>&i{U!@!Ygx0t<(c)FKao!xfY#M~TC-MZXpQlrQmwkObxy5>>$~D% zP?yVYa(&I`0m-~2%S^7l&YZP#zvg;$)z;%1szHCY&IRJ#;1<|m6c{KjUfDXwvQ7&< z3dWO*7lb0c@7K|Uu}11OO=^PxKvhta_hs#?Ln5frunz4%v?r~`^D14^Z}P?TL({E> zn`ht~Z80Icw@;W9WCS?>Z@$h=Rt4tld5X9ar zIWUNEKpeanaBVoY`+7rn-}O$!)+|@906)4aF;;B4hrQlbCig5dt*b~>x-fO`VFC4< z(-4hT$cZLIW8pV#0N3Miwk!<*w-9-S!$4WL4V7Fy2~nqR18)fgf(NrU(jFjpOu|sB z;Q%Bsm&D{vOwfIhrZKsSNeYu0OpahOhzSO=A`WugLr^~cphAXUv_QZZC=}qbHC2*jr!Vh6tU8Zq&Lepz0J7lx(G@dP0kpBwxCw|fb>&9Y zrY&02mifc)95zUF+M1qFkgQM-Ts^fn5qb+-uw(R+&|C0lsO6rzjZOib(9om)?YUrdEI+=%id}97s;n4AX z%lW^Q?p(c_y`9YsE%raSsJ6VUwY;2%`|Nq3`SQ_x`0V@sdo3Tdz1Nl#AB5HLUM;*g zAKv?rpC5jVQFPk1Z`s>?$F(r^fX{pPs@}btcQ0fLo-lqqV0^s%36uBkSH1f+?|#TU z-anKNpDGicdSF$%A0*#PE{>@i`n3)H2v5U+>U`nl zVDp`03zyYkL<>gp%MmAvJE*dqMMF$dmi&asXi_w5m=?n`586nr}V^x9T0%yuBlyd5p^i|K2%D%8$^n9V}76gh|T=F#sPwf5hJX1(w~01fmMp@b@}| z2MMU4hXdo_^)wLic1U7$ARYi}?d5Dc;3b>DU`y!F6tShzVC zIJ37*w+`MJzI*ca$=u5i2G!t8TJR;+yGQfx!O#A38;k1g*S!5Tg@zGn`wyM4Zn(x4zyAW!o+E!NCW>i=VzOBN7hVK~J^%m! diff --git a/__pycache__/render.cpython-311.pyc b/__pycache__/render.cpython-311.pyc deleted file mode 100644 index 634cd2e2f3f00636d37b7efb82fc306d76aa9ef5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8779 zcmbVRYit`wlAhuFDN-awQty#uTc#~hl4ECMJIO}zBk@{JvWYj|l@m}S&QKbeuk9J? zwL`!MWMRq(u-csOtmLu?cVYM(*f{8C_HYOIXZ9ay5HK--0RxBp&_4x9a5?|EtLouX zj&1WUXPE!OO#LzH3HF;?khsYR%q%0Yf+5MK z46_Cn)5fH6)(CG?(v&jKnp2ipORd~GYlCug(w=h6I%t_C=}ft1T`Bjho93;_j+AHC zL(?{>^Uiu{+Meu8`DT47|Exb1m<>RgLvSX$Qo-3E%h>n)2@&CtaPR~3Y)CM^!$c1A zFN4r;i)le5QhI?)W)`G4m&gzfECbO&DqO`CW;pD~43hK>silo2*3u0O%ydnzR z{2Iu_&CFbgQU)nW5TIV?${jrZe3M6QU}U2#id=?B3sM>+qzaC4GME$iDH#XHB_xGg zga+#>>F|KeRgOs60iGy_r^smoMa!nU#KBFj^R70R2R%*}G zMY)S0U~L{j96^$^Nq&vaT~ZQoe6WU)S>ZB?ni-W02)!(FD;XllT!O%msH8&pv>Yc= zRv806@k}z4qiYK5;yD8Iu7GxOHgiQx)4niPA|ckwfvG_F70PK@5?4erDgzr{QJ^2p z8eL&63n?eVqCnOcGSK|3H7+U56P~PrQDN*fi3Bu}Clrzs zm9?xmA&5)-a&F=@yxz^d1JgJDY9=W}6(zCrf{9^vu3CXS29`Vb!A)pZu<`1V=Z|XU z=O!m7uV@ZxvKX31Gs+6l%(B80MKdRo8D7zhu+udgY%?l{W~S@qNe~Tk8EtbJx{PtO z?fdU*)=W-`2@=ul^Sms^z`ux^Q4&^xq!rDMlItTm!ME_q*oq{;ur58bD8kl-fpM@! z&8F{6OzB$!Qchjgct#N8nsr`+)vAb^MW5oh7w53{IN=T2vfn%a6I?T0vtM&x8@$F{ z8@e`pZR94SpoMGCQX{RJ1@@ldwt->p!oU8ojAE{*8AfGpL+iVZ9>ssG}RqaX`_HtAF%{Ho0u-&t_TPGhB97TGT zHrwebHMoaYNC`FDs3FxRI0cvB2ECo2cgH=?ZIJnHL++xh*}9oheeGPIp^LY@RX(ir zHhC;{)YBkC=RIG$M%RY~|Mfv3aGl#2DthWITi-%gJ9oj--KhS_eXPwsI78QmpV@P- z9lOLsUlXfwk7xu6@2& z?R|z#t*T?gjr^ZlCB4c>3s=7yP{V5Pf=OS|d+nR=gFWD@x6k*h{r}LV_7z*!S-lQ+ zTYtNy^LivU%K;d%tKPfytq!Oi0;hU}Az^rd5k~&P z3j6NuzYRO)Ze!Q54D=4R_Nu3EL*i~d!$7;vpVm(3$G>(NStjiqVWt?_v|?B_{)Sm$ zS>`vu6Sxil<{Rbz2i%g5`M3r`i^D;rV>=!7Ao$t~6&!FNbKF}z0pF~M1kY!V8*e-` z+dL9ZaNyr*nck~;e#(Fh;&3$Mi3m^v4^eDF504Z2fzasp4xT80#8QBw5P$%dKuM)E zCyRix1c08{7mk{}JwRcS@ z?w}MEqxuv9DB~t0y7sk0yWM=JB?5po17K`r5%3)~8#K)36gu08uxo?MpqikITfZVS zG>ub~g&+@P0ANsKDJ3{S%tA(#)1wL}u7Y88Q5vwJU6@w^O>l@N>g%T+jB^KNJ}oKI zx+p}V+^=K`%encS1lED+Zb;K2KQhyY1bA`+)`ik&bq+34tM!5O$$V2+d__to(cIV> zrwc1%zXJyOXexVjZcb;|KsoI?yww6eF9LYG2;dDC4cw7n(f|q51q4tB5GuX0;EJe0 z`WoLRWNH`q5~8TOz!Xc>wk53gth6d7Yx}DeX0>y8okFjlz=C44AM0C_9i|=kTI=X_ zJojV0b#DS7_o?1>4ESTcYu4g^sDBG`Y@e31XH!{m0j?W>DeE%m5bEX5%GskC(j&mk}G? zHnGf=&y57(YLi5ann{MupqcRkM>Ly|qxa4<^fFV(z0>Hha}c^hxjn_N#^P|lke{%Q zowHB~O&8$;qgiWa)hu{r6-mURnQ+-^Hl#QXnZ)K6I8PQc0-2zF`J}Xvju@Ik?Yu|_ zqSX=O&i_J=yiz9-BTer-!~&G<3Cn5h^bUexf_P11719ImcmsMv2Eif#2lktJX!lPx z2-dK9wiViMgxdkcY3BXs-UIe|L*58**3bk_Dib%X8m>_8#07Q6X5AGS2E?$YA^ejz zsfIiYZ;#6CX+^II#DMi59#v5C+9R+k1d9+lwIA_$Q|r}2X+U3w*(*2nHzR!C)8D)i zR!yq;_bqzlEelNEs#-wPQ+eY?SFv&3s7r%ykh}%7w%aIg16XWoY7aTKnL{n|cGX&J zxfC@ytM+zYEc4-M#obbGZLi-5sutpF)m*i-w-Bs2Vwx4$br_LeVH`>eEMIECGQqat z-{^#^g#Dgl&jrfadW~5Ee)^+2+sDegTCXS@e$|0v!_2`oZBI*gTg%Rk&@)?hw64d! z+QBSM)Ai_q^=MK(sGrJS!3_Z0U?Y%bH+*oxbUouT1&0 zAfsEgs-EU7ZUk|pb(opF`@>hCaXH#!9RQuK^|9A#aWsG=oP-gKhRbU>-x|}eEYRmNfZhrEOjH`GEUMa04b9Zk7gIsxfBiEX-05SK%}&0 zOp0mEx*#g^^O;r6!e_I9jv#uGOA)+MYsR#=Lhu3!@f!%OkYSp{K#FEaXEifV2)`ET zCTFnR3g>qWVp|$3X)J_cDCCM^(83B_X~LpM&ol?h9YdEVYbH9jUZRIFoC-va>M;$Y zlv>bC7!A|dRn5E>gJ71%E>Rr``drNo0ahNuRx#?N$v!OeVDu6^=VGl?vt^}ZjCxzA zoaN(UEn?zCU1EB`gg^wFkuJ3lXmwz4^R;TAd-DwR*a zrOlUU@%LYad$*>yR_=6u{JV1aa3y@WaPCpC`%fcV;>U+RJycxSX8(Mp96VA99x1$5 z?dsV&Uc6N59jkPW7tTKJ?k)VH+SR+u7~N4yHrZ-u@L_25L1^?2d*{GCdpUHZ5<0SJ zs`d{Rhd#YjRPIdQS}FHOHyu@P&qMFf1Mg7rP}w_P@s5}1?@>?x*3!olpHAF4|7D_d z^74N^_r>yGPyWZrFFXJG^Dlm0n!Q{ed#~K{Ug7*#zR-=x)|uk@?Qm)2#QjLgH&gb_ zRD3gqQ;+<8H(oA|l>G-P{sV>6kG%fP%f*xBuITnqIq+QBd$i&`T9|$m2yKOn=gZv_ z+f(K6(Q@EeC2*{81}%P|9FA1N5maf4lA6WGj(x#?e&LIS2Zzs;4xicbYh0W|EwVS>FJ3NpO_sfrFn{bL zf7gu{wod)=r5i7mT--M>_Y8zT9dudHbwj40=nX&|2#MECh=+Fuphz?Fd|b1`IwyRB zhbXFT-8`=3(xT=B;E;i9i zFUa6*&;?Lr5eaDq*rGO62`(x!u^>0AoSR3LsCopCXbd+}aK%F4QWhbenUMh~X3_+Y zCC#*ykZOI#uC3tudJ64#f)Zl!8p`J9+h^&W3xnmUnp)JtB z5EYN?BN$C1P3PK&S@Z;C5DA9_gFR#f2|5?DAISkEqe##Rkq8oMHxsG}`s&|pG$RPE z8Ri!>Hyt~sD=ld3n&!r^bXF8%N&G(4%u?vDIp~YJ5Ar^8NBE?<>Q07^Vx~6{tri5< z^|b+Jrm;7_;?*^PASgn!`n*>I>c+`)F({SB#f^ zhbq291#7jVtI{!0Fn;9@Ze6}JR_goN7i*<=-YvN=mED&r?n?#3hfh?(CkpQG zY)toX@!IeJI9?7G47_oJ@ z?yvfKc0IPB<7)=Uw@Cim%Q!>V$2R4%qqpMd{cPrwnLEL9-(;n4a<`MQ`@ah^f&Pkr ze^G)l{m0Ax;}!q$f~^`JEx4;aqXiGT`oV|60}p}+igG!4xDq^kXSx!MZigREO+T2L zzCTi)I$N1Kd;hnUsdMGvxq`Fm2|e@-KJW|{jb+ciif3QJR1NlQP2E^1nu@=?Wi1Cs z3(j5BfPH9}`TvveUSXN=P-*PtFa4#^g>vXZC3K?KNz+~afSKuUncJsxLUMyNmkrUf~Ao{QDKUwjgEZ82~ z+z)O24{R`B0hptz*H;bnR>S>O|G=)n+}i;MW}Ud0uHbGjV;}gM>9E7x0okR5W9%J& za9($AxIc8moY*{na9?+Ccs}&N+?+S!TGdyqPQRwB_ghL^%b%5oo$;)y-5Dd}3S50t z#^~+8y*_-d&-k}K(>c!iw-FX-#76J~4CzFIO^6=}25I#a23!foO(Vm)^9o}D^iB4idOw3bh4&h z2FU@29Vs!xt-r^N{VM&dGX13=`aNcRrTVYRyijWYJzb(Vs50kE?Y}BBR%-uMnZu=L s{~j}arS{)$9}}Db^ap#dui6=?2157?aaDiM?*1hM8`x$3KBY$cKW8Q}y8r+H diff --git a/__pycache__/transcribe.cpython-311.pyc b/__pycache__/transcribe.cpython-311.pyc deleted file mode 100644 index cac6337d8390c796bed9c63974cc35c18a71588e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5529 zcmb7I-ES1v6`$Fi{a$-9#(X+35Yu>x?Ly)pK*$n;xj_Sm~Kn>(|1 zY^~dEgj5tEg&-u{mP&p|DHW)#`cze|ls}*kyJ(}VMzVy|=AmznXkPNv-?_81_8O>? z@%YaDJonsl?m6do_V*nfQ2|f)-@chjbqT`1sNp?6w<13K0F`@!DvS%NsQU6^!8h&` zsqWAF$Nl^(jZ64S`9L8!9xQ~$Lxu2on9m91BZcUA6!k7Oc0(GEshiafeB)}jx(VM7 z)qhn;Zc+Y;?XKB+Ubl74l5-}LwU2D3WNqD;l$DaIn{tks1$kCiHB(j$Ri4xgjVXK_ z6AdfN^rEeshBXw8j!o&7TrkyAUX!OVIS-oloGIIiHDeu%Mh^|i*DOswefr|%)EU_} zz0G8MN^9;$o6op=D$QMUI>_k;rYrJ$C*GB*9hGI-D(SW|fiYq`3l2&J&WSuR&k4Q< zZi*TshP3>UJl5I+Oft<<3dOvpf(xQ`=&(F< zs@`JGgS(9Av{tI~hEmY8knx1A+t6bY>OGh9R0oC((}q&ObDpG#U2}?_w+6shCUd1^ z(1B8nnJkl$iG4-4G}S#@mX@%ntA!%;rni)s*A#;oFBQ24ZLLr=nZo9|&Mi61G{wel zI^+utKz=ir9aW*GtZlOSq3D0edXl0OQw%W3Rbh=L{f=}>&)SZ30U~n(V+E$q;>{2QwF7ekUVAy{)z*L2&c}%d;OS9|uips2^|kkWEUKH}HM*2OOnR{)uX7oh z%$h*y5|9CBGHWu;WN@W$=eS{-CA(O%VHx?nJV6$--XJ}rC%uz#UOja9LW-=upx8t5 z31gm<(LRE5>GE)Yq1YQXYtc4mH8#g|vV$QRS2~v~6tzj1J1N-Wq!iq4V{k!ZPUgVtC z%Ou^BRhi&NT)7SbpQ9jxcmdDO165YZQl#D(U)?Ca<}r9%MwD__soJcb)#R*VkpBQ0 z$mx?Mir%w2!Y>GBGVh%|clC1WO8VlZQ>hE^NB}6@O4mkCrQSVvGKC?{nAMqSP)wC) z6{Zu+XhvpL%PFP2?V<{VwkC-)7e~E6u_2_$mW6x&jY171*fC! z^b28+U#HU>a1RfAFP5AwUgyb67cXBMOQpwtayd2XNW`-f%3*K&&|yb{Pd`F#$~~GB z&zc2zaV>4n7d1Gc(3D~UZ!l*wF`J%q@mQL3%l6^&I6l*ILQP>i{z=W|#+a0xD4&!j z^|Aw)$zG@F0p8p4Lu@yV40^>|s93*7Q4zk~B!m-RCiX5Lsf7mWp@CIF3?x?)d+Uk* zYPf%;tNZTGd%N%KUQYaZXRYgaz3X`8{7OgS*7&C%eDXoHci^E`>lm(g3|CHn-Q9Ed z*u9ZEBh`IJYTZZc-A5}Io`$;NI~sil9_F6(4OjbyZ_nSJ|Lc!###iFomv%lp{Y7sz zK3a>9*5jj;tD#JEuRz$rh!>z@)Gk#DAzPof5A9aWPyDE|c$m?y-?mc~3b! z-9kd`j%~xNsJEhh7S^{;u}oO+xu^YPYT&-VodV=cp|AOr!z#|uj^T>pq<|BhT8xxK z-{VA)?{%Wc^vg~ZneOzsQA5vjdnOvMJsxq?B>)imL+T(c>DXTStiaTmxg+T!k`l_C^H%^qXqa(Nu@Mx>IR3xJZ3r3wLP4mQY_Lj16*2b$79Ec5(I%?S)338 zB*jy9oH`}TWOg#@LiIP%!|^(MgKzEKIEv9Z(+Q)Urr!&kD3_pH_oLSl=lD@bJMQB~ z)f5q=;%)p}S0N-s)vZDxbmQ&Y;pO3~)L)bO>ry`|6i&C@>%G&vH1#l6>l&_i4Oh;8 zEg=>zkJqGwb?IPLI{0;bcP%d0<8meSv}fm6J^P>Z>|dV#qOaPszt(f6-gD*`qm{Ea z)y8Yd%GpNp^~(81V%w?^4)_V5Wd-kt%Z(r)ZzwNV; zyCaP)+wPvdH+pBZwxzGWrEfJP?TD@lD83P-NbJ8M#m8Ngv~N3U$f#bGgpf<_^<%rxQx2RWqATJJEDHS3*v!rPGW; zI=_@`D{4*<01dXyC>3Fpg)mA`oX#BoNy`Ij9E+{pSAHAV2+fwMAn*l?0VKoEeIx7| znq*4WUr?<2MNwS!B}8depkkjOzFHObt$&{ikqZB72whe0Z3unUm%mSi*Q)LBYH*u) bRa_OGD~G$qu2rGE80Zt_RpBKCKhysJ%Fw%g diff --git a/__pycache__/utils.cpython-311.pyc b/__pycache__/utils.cpython-311.pyc deleted file mode 100644 index 3c4f202100b86f021106aaf057e75124886032ee..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4567 zcmb_fO>7&-72YM6eOhxX804?Xl$kO6@Z3m7n31gLK+bknKd+ofnz zQH&lsO2ZJIfYaBBsb~i{3MUM zC+SIfC%q|QQs6notNIi{_00&1@47JQSNu*rfO-J+pb}IA&HfPTA*D+RV?;#lKr7}B zuCL3@_?6D|o8yw|xLe~P<}^4(U)ZScC= z;+Oa2JQZ5~TSOxC0SN=%N!PEL&sdp z6fHwc8gnX>B~u;7xO7sIG0;p?=J1)*tb~}+6)YtijF{-kBZm5ds6zlLnNCQ0CZ#e> z7UhJ*B-z3QQJt67wB_*G$U-qJbHtT0C0zwyE1`;t##Grd7`S35kVBeLNydz+iiYm+ zNg1Z)?A2T_Ej8uJlaees(d2PiO-R=?Bf~^RHD#t+hMLE9R3hr&q@jvEn@;tY^P2q5EJlStJqieFEsETAs zc1Si-mo;6}XVK#sIcW2;NmeI{Ykj&pH0wE3xQ^qqH4?w^$QXAkZlof^5cFYaey^x7faU@0jHR4FM+~N1piWVvh8U1<1-mZ zfm?}jk=P2y`+6GD?jrjpwyhr9U75L~)M7{Ku_J}o8{NG(`#oWy#7tB=Tm>RKYr)%orPL#v>qD;qi|nU=wsa^ zm&Y={WiRBbtFOU533Cb_#WR7tg2r#Bb_B`080C&{A<5&j6t5y|zz*WQIZFB3Z4r3p z-wd~4%YjF0Y_wn{aMj_?`f~iwu(P};?*#^VlwcM|l=CVf%ClgQ6Nmy|pTw=B$*mLI zz}Ml`-pF^5HXQj*X2So0u3*rBWi+JH|@;A+CFnNDhir2sI~VsLKA3b7KvAamFSElr06E><{eGPH{` zNduc9PghOTMXc7g$TP-g*N<(DA70}}*7##>wn#42i*I!pM%f~&e6SHkT-2^4a9oIUJClP|7l=O-0-j?2`EO>HL@sDt*H8KuH=F=bm zp~!CcU*7Taw@Zf>FD_qfH93y;keujYtPA8gmB;gusF0lRCu`& z?j(%qb$TA4XJ~1t^h&w67TR79ZAX8k>&BU-Go^{evE{MCs|{zLtbXPUF>wYLw ze4Capy|?sC@tH=nZ&9EbH2(17wKBh$tp*2I{9J5H>CNT&V$_AEK=UZtY=QujRJ-~m z2;q~$Uqy+OtMK`@yAk?4S*Uy?iaZ|qoH5|3csCh|8x`8ooA<(}h0k#B0dBatSMh%) z{MuL99OVK^upQ-q0-@~5b_gKvUw0h1)@DEe-$Cev>s($CC*Ha+;#d$PJ7mv;*xrMO z6}A(ltnc89BS&V~4(bv?5R1L=SjWiX0(*hl4-z>Cq>Rg9e1n_pov2xEB5rEARck@7W875i$pGloMJCuOxBKV9NeECZ#E~ zA-1?dsTm$8++a05O$IVO?S?8zB;clZL{igLJP=e#PKXFd&TV^ox@AVw(@s2l?24Z> z^x0-OR1AbT-LNnbH}kX;n-dG^1fC_9E0rn!v+xT@L=rlTXf`6Vh1Y}FwhUr%GASCe zoJlW;6kUxu-4Tw~Qt<$xz$spnk{Q)ugvZW?O*qjUfnT(kq+wgL;#@*Q5T>V%`v`Cl z;vu0DqkA)4e?OH+Ky1I1#>GKlTc{(zj#0-;)Zru9P21-v$_~gG#_*=H!_C4Jf|D&c zW9Vu`n1STNMu^U2iuBG-q3?(I%r%gW_t{V*61#DB>Fnb967guLN|5y-V!{XGL%eGJ;6?+jgHx@ zgRFRYo^NoWg7a%|J=OJpgL}HV*?+~~&2K5cvce&&xS3OajO-oY2g(yG9J0#WPPUP} s803#tcCT>A?tllfjb!U~zI%lODWM-3Ersk}-#z(HgHG`~iL>kf0!IUFhyVZp diff --git a/docker-compose.yml b/docker-compose.yml index 5d575cc..b74bd0d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,8 @@ +# GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw" +# YOUTUBE_API="https://totally-real-dingo.ngrok-free.app" +# OPENROUTER_API_KEY="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" +# OPENROUTER_MODEL="openai/gpt-oss-20b:free" + services: video-render-new: restart: unless-stopped @@ -6,19 +11,13 @@ services: environment: # RabbitMQ credentials - RABBITMQ_PASS=${RABBITMQ_PASS} - - RABBITMQ_HOST=${RABBITMQ_HOST} - - RABBITMQ_USER=${RABBITMQ_USER} - - RABBITMQ_PORT=${RABBITMQ_PORT} - - RABBITMQ_QUEUE=${RABBITMQ_QUEUE} - - RABBITMQ_UPLOAD_QUEUE=${RABBITMQ_UPLOAD_QUEUE} - # API keys for the LLMs - GEMINI_API_KEY=${GEMINI_API_KEY} + - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - - OPENROUTER_MODEL=${OPENROUTER_MODEL} - # Optional whisper settings - - WHISPER_MODEL=${WHISPER_MODEL} - - WHISPER_DEVICE=${WHISPER_DEVICE} - - WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE} + - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} + - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small} + ports: + - "5000:5000" volumes: # Mount host directories into the container so that videos can be # provided and outputs collected. These paths can be customised when @@ -27,9 +26,18 @@ services: - "/root/videos:/app/videos" - "/root/outputs:/app/outputs" command: "python -u main.py" - networks: - - dokploy-network + # runtime: nvidia -networks: - dokploy-network: - external: true \ No newline at end of file + # networks: + # - dokploy-network + + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: all + # capabilities: [gpu] +# networks: +# dokploy-network: +# external: true diff --git a/dockerfile b/dockerfile index dc30f99..048fdd3 100644 --- a/dockerfile +++ b/dockerfile @@ -21,6 +21,10 @@ RUN apt-get update && \ xdg-utils \ wget \ unzip \ + ffmpeg \ + libgomp1 \ + libpq-dev \ + vim \ libmagick++-dev \ imagemagick \ fonts-liberation \ diff --git a/llm.py b/llm.py deleted file mode 100644 index f0a5a2a..0000000 --- a/llm.py +++ /dev/null @@ -1,234 +0,0 @@ -"""High-level helpers for interacting with the Gemini and OpenRouter APIs. - -This module encapsulates all of the logic needed to call the LLM endpoints -used throughout the application. It uses the OpenAI Python client under the -hood because both Gemini and OpenRouter expose OpenAI-compatible APIs. - -Two functions are exposed: - -* ``select_highlights`` takes an SRT-like string (the transcription of a - video) and returns a list of highlight objects with start and end - timestamps and their corresponding text. It uses the Gemini model to - identify which parts of the video are most likely to engage viewers on - social media. -* ``generate_titles`` takes a list of highlight objects and returns a list - of the same objects enriched with a ``topText`` field, which contains a - sensational title for the clip. It uses the OpenRouter API with a model - specified via the ``OPENROUTER_MODEL`` environment variable. - -Both functions are resilient to malformed outputs from the models. They try -to extract the first JSON array found in the model responses; if that -fails, a descriptive exception is raised. These exceptions should be -handled by callers to post appropriate error messages back to the queue. -""" - -from __future__ import annotations - -import json -import os -import re -from typing import Any, Dict, List - -import openai - - -class LLMError(Exception): - """Raised when the LLM response cannot be parsed into the expected format.""" - - -def _extract_json_array(text: str) -> Any: - """Extract the first JSON array from a string. - - LLMs sometimes return explanatory text before or after the JSON. This - helper uses a regular expression to find the first substring that - resembles a JSON array (i.e. starts with '[' and ends with ']'). It - returns the corresponding Python object if successful, otherwise - raises a ``LLMError``. - """ - # Remove Markdown code fences and other formatting noise - cleaned = text.replace("`", "").replace("json", "") - # Find the first [ ... ] block - match = re.search(r"\[.*\]", cleaned, re.DOTALL) - if not match: - raise LLMError("Não foi possível encontrar um JSON válido na resposta da IA.") - json_str = match.group(0) - try: - return json.loads(json_str) - except json.JSONDecodeError as exc: - raise LLMError(f"Erro ao decodificar JSON: {exc}") - - -def select_highlights(srt_text: str) -> List[Dict[str, Any]]: - """Call the Gemini API to select highlight segments from a transcription. - - The input ``srt_text`` should be a string containing the transcription - formatted like an SRT file, with lines of the form - ``00:00:10,140 --> 00:01:00,990`` followed by the spoken text. - - Returns a list of dictionaries, each with ``start``, ``end`` and - ``text`` keys. On failure to parse the response, a ``LLMError`` is - raised. - """ - api_key = os.environ.get("GEMINI_API_KEY") - if not api_key: - raise ValueError("GEMINI_API_KEY não definido no ambiente") - - model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") - - # Initialise client for Gemini. The base_url points to the - # generativelanguage API; see the official docs for details. - client = openai.OpenAI(api_key=api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/") - - # System prompt: instructs Gemini how to behave. - system_prompt = ( - "Você é um assistente especializado em selecionar **HIGHLIGHTS** de vídeo " - "a partir da transcrição com timestamps.\n" - "Sua única função é **selecionar os trechos** conforme solicitado.\n" - "- **Não resuma, não interprete, não gere comentários ou textos complementares.**\n" - "- **Retorne a resposta exatamente no formato proposto pelo usuário**, sem adicionar ou remover nada além do pedido.\n" - "- Cada trecho selecionado deve ter **no mínimo 60 segundos e no máximo 120 segundos** de duração.\n" - "- Sempre responda **em português (PT-BR)**." - ) - - # Base prompt: describes how to select highlights and the format to return. - base_prompt = ( - "Você assumirá o papel de um especialista em Marketing e Social Media, " - "sua tarefa é selecionar as melhores partes de uma transcrição que irei fornecer.\n\n" - "## Critérios de Seleção\n\n" - "- Escolha trechos baseando-se em:\n" - " - **Picos de emoção ou impacto**\n" - " - **Viradas de assunto**\n" - " - **Punchlines** (frases de efeito, momentos de virada)\n" - " - **Informações-chave**\n\n" - "## Regras Rápidas\n\n" - "- Sempre devolver pelo menos 3 trechos, não possui limite máximo\n" - "- Garanta que cada trecho fique com no MÍNIMO 60 segundos e no MÁXIMO 120 segundos.\n" - "- Nenhum outro texto além do JSON final.\n\n" - "## Restrições de Duração\n\n" - "- **Duração mínima do trecho escolhido:** 60 segundos\n" - "- **Duração máxima do trecho escolhido:** 90 a 120 segundos\n\n" - "## Tarefa\n\n" - "- Proponha o **máximo de trechos** com potencial, mas **sempre devolva no mínimo 3 trechos**.\n" - "- Extraia os trechos **apenas** da transcrição fornecida abaixo.\n\n" - "## IMPORTANTE\n" - "- Cada trecho deve ter no mínimo 60 segundos, e no máximo 120 segundos. Isso é indiscutível\n\n" - "## Entrada\n\n" - "- Transcrição:\n\n" - f"{srt_text}\n\n" - "## Saída\n\n" - "- Retorne **somente** a lista de trechos selecionados em formato JSON, conforme o exemplo abaixo.\n" - "- **Não escreva comentários ou qualquer texto extra.**\n" - "- No atributo \"text\", inclua o texto presente no trecho escolhido.\n\n" - "### Exemplo de Conversão\n\n" - "#### De SRT:\n" - "00:00:10,140 --> 00:01:00,990\n" - "Exemplo de escrita presente no trecho\n\n" - "#### Para JSON:\n" - "[\n" - " {\n" - " \"start\": \"00:00:10,140\",\n" - " \"end\": \"00:01:00,990\",\n" - " \"text\": \"Exemplo de escrita presente no trecho\"\n" - " }\n" - "]\n" - ) - - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": base_prompt}, - ] - try: - response = client.chat.completions.create(model=model, messages=messages) - except Exception as exc: - raise LLMError(f"Erro ao chamar a API Gemini: {exc}") - # Extract message content - content = response.choices[0].message.content if response.choices else None - if not content: - raise LLMError("A resposta da Gemini veio vazia.") - result = _extract_json_array(content) - if not isinstance(result, list): - raise LLMError("O JSON retornado pela Gemini não é uma lista.") - return result - - -def generate_titles(highlights: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Call the OpenRouter API to generate a title (topText) for each highlight. - - The ``highlights`` argument should be a list of dictionaries as returned - by ``select_highlights``, each containing ``start``, ``end`` and ``text``. - This function adds a ``topText`` field to each dictionary using the - OpenRouter model specified via the ``OPENROUTER_MODEL`` environment - variable. If parsing fails, an ``LLMError`` is raised. - """ - api_key = os.environ.get("OPENROUTER_API_KEY") - if not api_key: - raise ValueError("OPENROUTER_API_KEY não definido no ambiente") - model = os.environ.get("OPENROUTER_MODEL") - if not model: - raise ValueError("OPENROUTER_MODEL não definido no ambiente") - # Create client for OpenRouter - client = openai.OpenAI(api_key=api_key, base_url="https://openrouter.ai/api/v1") - - # Compose prompt: instruct to generate titles only - prompt_header = ( - "Você é um especialista em Marketing Digital e Criação de Conteúdo Viral.\n\n" - "Sua tarefa é criar **títulos sensacionalistas** (*topText*) para cada trecho " - "de transcrição recebido em formato JSON.\n\n" - "## Instruções\n\n" - "- O texto deve ser **chamativo, impactante** e com alto potencial de viralização " - "em redes sociais, **mas sem sair do contexto do trecho**.\n" - "- Use expressões fortes e curiosas, mas **nunca palavras de baixo calão**.\n" - "- Cada *topText* deve ter **no máximo 2 linhas**.\n" - "- Utilize **exclusivamente** o conteúdo do trecho; não invente fatos.\n" - "- Não adicione comentários, explicações, ou qualquer texto extra na resposta.\n" - "- Responda **apenas** no seguinte formato (mantendo as chaves e colchetes):\n\n" - "[\n {\n \"start\": \"00:00:10,140\",\n \"end\": \"00:01:00,990\",\n \"topText\": \"Título impactante\"\n }\n]\n\n" - "## Observações:\n\n" - "- Nunca fuja do contexto do trecho.\n" - "- Não invente informações.\n" - "- Não utilize palavrões.\n" - "- Não escreva nada além do JSON de saída.\n\n" - "Aqui estão os trechos em JSON:\n" - ) - # Compose input JSON for the model - json_input = json.dumps(highlights, ensure_ascii=False) - full_message = prompt_header + json_input - messages = [ - { - "role": "system", - "content": "Você é um assistente útil e objetivo." - }, - { - "role": "user", - "content": full_message - }, - ] - try: - response = client.chat.completions.create( - model=model, - messages=messages, - temperature=0.7, - ) - except Exception as exc: - raise LLMError(f"Erro ao chamar a API OpenRouter: {exc}") - content = response.choices[0].message.content if response.choices else None - if not content: - raise LLMError("A resposta da OpenRouter veio vazia.") - result = _extract_json_array(content) - if not isinstance(result, list): - raise LLMError("O JSON retornado pela OpenRouter não é uma lista.") - # Merge topText back into highlights - # We assume the result list has the same order and length as input highlights - enriched: List[Dict[str, Any]] = [] - input_map = {(item["start"], item["end"]): item for item in highlights} - for item in result: - key = (item.get("start"), item.get("end")) - original = input_map.get(key) - if original is None: - # If the model returns unexpected entries, skip them - continue - enriched_item = original.copy() - # Only topText is expected - enriched_item["topText"] = item.get("topText", "").strip() - enriched.append(enriched_item) - return enriched \ No newline at end of file diff --git a/main.py b/main.py index 32fd1d1..1ef531b 100644 --- a/main.py +++ b/main.py @@ -1,265 +1,16 @@ -"""Entry point for the video processing pipeline. - -This script listens to a RabbitMQ queue for new video processing tasks. When -a message arrives, it performs the following steps: - -1. Creates a working directory for the video based off of its filename. -2. Extracts the audio track with FFMPEG and runs Faster-Whisper to produce - a transcription with word-level timestamps. -3. Uses the Gemini model to determine which parts of the video have the - highest potential for engagement. These highlight segments are - represented as a list of objects containing start/end timestamps and - text. -4. Uses the OpenRouter model to generate a sensational title for each - highlight. Only the ``topText`` field is kept; the description is - intentionally omitted since the caption will be burned into the video. -5. Cuts the original video into individual clips corresponding to each - highlight and renders them vertically with a title above and a dynamic - caption below. -6. Publishes a message to the upload queue with information about the - generated clips. On success, this message contains the list of output - files. On failure, ``hasError`` will be set to ``True`` and the - ``error`` field will describe what went wrong. -7. Cleans up temporary files (audio, transcript, working directory) and - deletes the original source video from the ``videos`` directory to - conserve disk space. - -The queue names and RabbitMQ credentials are configured via environment -variables. See the accompanying ``docker-compose.yml`` for defaults. -""" - -from __future__ import annotations - -import json -import os -import shutil -import time -import traceback -from typing import Any, Dict, List - -import pika - -from .utils import sanitize_filename, seconds_to_timestamp, timestamp_to_seconds -from .transcribe import transcribe -from .llm import LLMError, select_highlights, generate_titles -from .render import render_clip +from video_render.config import load_settings +from video_render.logging_utils import setup_logging +from video_render.messaging import RabbitMQWorker +from video_render.pipeline import VideoPipeline -# Environment variables with sensible defaults -RABBITMQ_HOST = os.environ.get("RABBITMQ_HOST", "rabbitmq") -RABBITMQ_PORT = int(os.environ.get("RABBITMQ_PORT", 5672)) -RABBITMQ_USER = os.environ.get("RABBITMQ_USER", "admin") -RABBITMQ_PASS = os.environ.get("RABBITMQ_PASS") -RABBITMQ_QUEUE = os.environ.get("RABBITMQ_QUEUE", "to-render") -RABBITMQ_UPLOAD_QUEUE = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload") +def main() -> None: + setup_logging() + settings = load_settings() -if not RABBITMQ_PASS: - raise RuntimeError("RABBITMQ_PASS não definido no ambiente") - - -def get_next_message() -> Any: - """Retrieve a single message from the RABBITMQ_QUEUE. - - Returns ``None`` if no messages are available. This helper opens a new - connection for each call to avoid keeping stale connections alive. - """ - credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) - parameters = pika.ConnectionParameters( - host=RABBITMQ_HOST, - port=RABBITMQ_PORT, - credentials=credentials, - heartbeat=60, - blocked_connection_timeout=300, - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - method_frame, _, body = channel.basic_get(RABBITMQ_QUEUE) - if method_frame: - channel.basic_ack(method_frame.delivery_tag) - connection.close() - return body - connection.close() - return None - - -def publish_to_queue(payload: Dict[str, Any]) -> None: - """Publish a JSON-serialisable payload to the RABBITMQ_UPLOAD_QUEUE.""" - credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) - parameters = pika.ConnectionParameters( - host=RABBITMQ_HOST, - port=RABBITMQ_PORT, - credentials=credentials, - heartbeat=60, - blocked_connection_timeout=300, - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - channel.queue_declare(queue=RABBITMQ_UPLOAD_QUEUE, durable=True) - channel.basic_publish( - exchange="", - routing_key=RABBITMQ_UPLOAD_QUEUE, - body=json.dumps(payload), - properties=pika.BasicProperties(delivery_mode=2), - ) - connection.close() - - -def build_srt(segments: List[Dict[str, Any]]) -> str: - """Build an SRT-like string from a list of segments. - - Each segment should have ``start``, ``end`` and ``text`` fields. The - timestamps are converted to the ``HH:MM:SS,mmm`` format expected by - the Gemini prompt. Segments are separated by a blank line. - """ - lines = [] - for seg in segments: - start_ts = seconds_to_timestamp(seg["start"]) - end_ts = seconds_to_timestamp(seg["end"]) - lines.append(f"{start_ts} --> {end_ts}\n{seg['text']}") - return "\n\n".join(lines) - - -def process_message(data: Dict[str, Any]) -> Dict[str, Any]: - """Process a single video task described in ``data``. - - Returns the payload to be sent to the upload queue. Raises an - exception on failure; the caller is responsible for catching it and - posting an error payload. - """ - filename = data.get("filename") - if not filename: - raise ValueError("Campo 'filename' ausente na mensagem") - url = data.get("url") - video_id = data.get("videoId") - # Determine source video path; n8n stores videos in the 'videos' directory - video_path = os.path.join("videos", filename) - if not os.path.exists(video_path): - raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {video_path}") - # Sanitize the filename to use as directory name - base_no_ext = os.path.splitext(filename)[0] - sanitized = sanitize_filename(base_no_ext) - work_dir = os.path.join("app", "videos", sanitized) - # Transcribe video - segments, words = transcribe(video_path, work_dir) - # Build SRT string - srt_str = build_srt(segments) - # Call Gemini to select highlights - highlights = select_highlights(srt_str) - # Convert start/end times to floats and keep original strings for openrouter - for item in highlights: - item["start"] = item["start"].strip() - item["end"] = item["end"].strip() - # Generate titles - titles = generate_titles(highlights) - # Render clips - output_dir = os.path.join("outputs", sanitized) - processed_files: List[str] = [] - for idx, item in enumerate(titles, start=1): - start_sec = timestamp_to_seconds(item.get("start")) - end_sec = timestamp_to_seconds(item.get("end")) - # Extract relative words for caption - relative_words = [] - for w in words: - # Word must overlap clip interval - if w["end"] <= start_sec or w["start"] >= end_sec: - continue - rel_start = max(0.0, w["start"] - start_sec) - rel_end = max(0.0, w["end"] - start_sec) - relative_words.append({ - "start": rel_start, - "end": rel_end, - "word": w["word"], - }) - # If no words found (e.g. silence), create a dummy word to avoid errors - if not relative_words: - relative_words.append({"start": 0.0, "end": end_sec - start_sec, "word": ""}) - out_path = render_clip( - video_path=video_path, - start=start_sec, - end=end_sec, - top_text=item.get("topText", ""), - words=relative_words, - out_dir=output_dir, - base_name=sanitized, - idx=idx, - ) - processed_files.append(out_path) - # Compose payload - payload = { - "videosProcessedQuantity": len(processed_files), - "filename": filename, - "processedFiles": processed_files, - "url": url, - "videoId": video_id, - "hasError": False, - "error": None, - } - # Clean up working directory and original video - shutil.rmtree(work_dir, ignore_errors=True) - try: - os.remove(video_path) - except FileNotFoundError: - pass - return payload - - -def main(): - print(" [*] Esperando mensagens. Para sair: CTRL+C") - while True: - body = get_next_message() - if body is None: - time.sleep(5) - continue - try: - data = json.loads(body) - except Exception: - print("⚠️ Mensagem inválida recebida (não é JSON)") - continue - try: - result = process_message(data) - except Exception as exc: - # Print stack trace for debugging - traceback.print_exc() - # Attempt to clean up any directories based on filename - filename = data.get("filename") - sanitized = sanitize_filename(os.path.splitext(filename or "")[0]) if filename else "" - work_dir = os.path.join("app", "videos", sanitized) if sanitized else None - output_dir = os.path.join("outputs", sanitized) if sanitized else None - # Remove working and output directories - if work_dir: - shutil.rmtree(work_dir, ignore_errors=True) - if output_dir: - shutil.rmtree(output_dir, ignore_errors=True) - # Remove original video if present - video_path = os.path.join("videos", filename) if filename else None - if video_path and os.path.exists(video_path): - try: - os.remove(video_path) - except Exception: - pass - # Build error payload - error_payload = { - "videosProcessedQuantity": 0, - "filename": filename, - "processedFiles": [], - "url": data.get("url"), - "videoId": data.get("videoId"), - "hasError": True, - "error": str(exc), - } - try: - publish_to_queue(error_payload) - print(f"Mensagem de erro publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") - except Exception as publish_err: - print(f"Erro ao publicar mensagem de erro: {publish_err}") - continue - # On success publish payload - try: - publish_to_queue(result) - print(f"Mensagem publicada na fila '{RABBITMQ_UPLOAD_QUEUE}'.") - except Exception as publish_err: - print(f"Erro ao publicar na fila '{RABBITMQ_UPLOAD_QUEUE}': {publish_err}") - # Loop continues + pipeline = VideoPipeline(settings) + worker = RabbitMQWorker(settings) + worker.consume_forever(pipeline.process_message) if __name__ == "__main__": diff --git a/prompts/generate.txt b/prompts/generate.txt new file mode 100644 index 0000000..ed2853b --- /dev/null +++ b/prompts/generate.txt @@ -0,0 +1,35 @@ +Voce e um estrategista de conteudo especializado em identificar cortes curtos de videos longos que performam bem em redes sociais. + +FUNCAO: +- Analisar a transcricao completa de um video. +- Escolher trechos curtos (entre 20s e 90s) com maior chance de engajamento. +- Responder APENAS em JSON valido. + +FORMATO DA RESPOSTA: +{ + "highlights": [ + { + "start": , + "end": , + "summary": "Resumo conciso do porque este trecho engaja" + } + ] +} + +REGRAS: +- Liste no maximo 6 destaques. +- Respeite a ordem cronologica. +- Nunca deixe listas vazias; se nada for relevante, inclua uma entrada com start = 0, end = 0 e summary explicando a ausencia de cortes. +- Utilize apenas valores numericos simples (ponto como separador decimal). +- Nao repita um mesmo trecho. + +PERSPECTIVA DE ANALISE: +- Concentre-se em momentos com gatilhos emocionais, insights, storytelling ou chamadas para acao fortes. +- Prefira trechos com comeco, meio e fim claros. +- Evite partes redundantes, silenciosas ou extremamente tecnicas. + +TAREFA: +- Leia a transcricao recebida no campo "transcript". +- Use a lista de marcas de tempo detalhadas no campo "segments" para embasar suas escolhas. +- Produza a saida JSON descrita acima. + diff --git a/render.py b/render.py deleted file mode 100644 index 539324e..0000000 --- a/render.py +++ /dev/null @@ -1,205 +0,0 @@ -"""Rendering logic for producing vertical clips with dynamic captions. - -This module defines a single function ``render_clip`` which takes a video -segment and produces a vertical clip suitable for social media. Each clip -contains three regions: - -* A top region (480px high) showing a title generated by an LLM. -* A middle region (960px high) containing the original video, scaled to - fit horizontally while preserving aspect ratio and centred vertically. -* A bottom region (480px high) showing a dynamic caption. The caption - displays a sliding window of three to five words from the transcript, - colouring the currently spoken word differently to draw the viewer's - attention. - -The function uses the MoviePy library to compose the various elements and -writes the resulting video to disk. It returns the path to the created -file. -""" - -from __future__ import annotations - -import os -from typing import Dict, List - -import numpy as np -from moviepy.video.io.VideoFileClip import VideoFileClip -from moviepy.video.VideoClip import ColorClip, VideoClip -from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip -from moviepy.video.VideoClip import TextClip -from PIL import Image, ImageDraw, ImageFont - -from .utils import wrap_text - - -def render_clip( - video_path: str, - start: float, - end: float, - top_text: str, - words: List[Dict[str, float]], - out_dir: str, - base_name: str, - idx: int, - # Use a widely available system font by default. DejaVuSans is installed - # in most Debian-based containers. The caller can override this path. - font_path: str = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", - final_width: int = 1080, - final_height: int = 1920, - top_h: int = 480, - middle_h: int = 960, - bottom_h: int = 480, - video_codec: str = "libx264", - bitrate: str = "3000k", -) -> str: - """Render a single clip with title and dynamic caption. - - Parameters - ---------- - video_path: str - Path to the source video file. - start: float - Start time of the clip in seconds. - end: float - End time of the clip in seconds. - top_text: str - The title to display in the top region. - words: List[Dict[str, float]] - List of word-level timestamps for this clip. Each dict must have - ``start``, ``end`` and ``word`` keys. The start and end values - should be relative to the beginning of this clip (i.e. start at 0). - out_dir: str - Directory where the output file should be saved. The function - creates this directory if it doesn't exist. - base_name: str - Base name of the original video (sanitized). Used to build the - output filename. - idx: int - Index of the clip. Output will be named ``clip_{idx}.mp4``. - font_path: str - Path to the TrueType font to use for both title and caption. - final_width: int - Width of the final video in pixels. - final_height: int - Height of the final video in pixels. - top_h: int - Height of the title area in pixels. - middle_h: int - Height of the video area in pixels. - bottom_h: int - Height of the caption area in pixels. - video_codec: str - FFmpeg codec to use when writing the video. - bitrate: str - Bitrate for the output video. - - Returns - ------- - str - The path to the rendered video file. - """ - os.makedirs(out_dir, exist_ok=True) - # Extract the segment from the source video - with VideoFileClip(video_path) as clip: - segment = clip.subclip(start, end) - dur = segment.duration - # Background - bg = ColorClip(size=(final_width, final_height), color=(0, 0, 0), duration=dur) - # Resize video to fit width - video_resized = segment.resize(width=final_width) - # Compute vertical position to centre in the middle region - y = top_h + (middle_h - video_resized.h) // 2 - video_resized = video_resized.set_position((0, y)) - - # Build title clip - # Wrap the title to avoid overflow - wrapped_lines = wrap_text(top_text, max_chars=40) - wrapped_title = "\n".join(wrapped_lines) - title_clip = TextClip( - wrapped_title, - font=font_path, - fontsize=70, - color="white", - method="caption", - size=(final_width, top_h), - align="center", - ).set_duration(dur).set_position((0, 0)) - - # Prepare font for caption rendering - pil_font = ImageFont.truetype(font_path, size=60) - default_color = (255, 255, 255) # white - highlight_color = (255, 215, 0) # gold-like yellow - - # Precompute widths of a space and bounding box height for vertical centering - space_width = pil_font.getbbox(" ")[2] - pil_font.getbbox(" ")[0] - bbox = pil_font.getbbox("A") - text_height = bbox[3] - bbox[1] - - def make_caption_frame(t: float): - """Generate an image for the caption at time t.""" - # Determine current word index - idx_cur = 0 - for i, w in enumerate(words): - if w["start"] <= t < w["end"]: - idx_cur = i - break - if t >= w["end"]: - idx_cur = i - # Define window of words to display: show up to 5 words - start_idx = max(0, idx_cur - 2) - end_idx = min(len(words), idx_cur + 3) - window = words[start_idx:end_idx] - # Compute widths for each word - word_sizes = [] - for w in window: - bbox = pil_font.getbbox(w["word"]) - word_width = bbox[2] - bbox[0] - word_sizes.append(word_width) - total_width = sum(word_sizes) + space_width * (len(window) - 1 if window else 0) - # Create blank image for caption area - img = Image.new("RGB", (final_width, bottom_h), color=(0, 0, 0)) - draw = ImageDraw.Draw(img) - x = int((final_width - total_width) / 2) - y_pos = int((bottom_h - text_height) / 2) - for j, w in enumerate(window): - color = highlight_color if (start_idx + j) == idx_cur else default_color - draw.text((x, y_pos), w["word"], font=pil_font, fill=color) - x += word_sizes[j] + space_width - return np.array(img) - - caption_clip = VideoClip(make_frame=make_caption_frame, duration=dur) - caption_clip = caption_clip.set_position((0, final_height - bottom_h)) - - # Compose final clip - final = CompositeVideoClip([ - bg, - video_resized, - title_clip, - caption_clip, - ], size=(final_width, final_height)) - # Use the original audio from the video segment - final_audio = segment.audio - if final_audio is not None: - final = final.set_audio(final_audio) - # Define output path - out_path = os.path.join(out_dir, f"clip_{idx}.mp4") - # Write to disk - final.write_videofile( - out_path, - codec=video_codec, - fps=30, - bitrate=bitrate, - audio_codec="aac", - preset="ultrafast", - ffmpeg_params=[ - "-tune", "zerolatency", - "-pix_fmt", "yuv420p", - "-profile:v", "high", - "-level", "4.1", - ], - threads=4, - ) - # Close clips to free resources - final.close() - segment.close() - return out_path \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f5ce0c5..f329669 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ -pika==1.3.2 -moviepy==2.0.0 -faster-whisper==1.2.0 -openai==1.16.0 -numpy==1.26.4 -Pillow==10.1.0 -unidecode==1.3.6 \ No newline at end of file +moviepy==2.2.0 +pillow==10.3.0 +numpy>=1.26.0 +requests>=2.31.0 +pika>=1.3.2 +faster-whisper==1.0.0 diff --git a/transcribe.py b/transcribe.py deleted file mode 100644 index 8cb4739..0000000 --- a/transcribe.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Utilities for extracting audio from video and generating transcriptions. - -This module handles two tasks: - -1. Use FFMPEG to extract the audio track from a video file into a WAV file - suitable for consumption by the Whisper model. The audio is resampled to - 16 kHz mono PCM as required by Whisper. -2. Use the Faster-Whisper implementation to generate a transcription with - word-level timestamps. The transcription is returned both as a list of - segments (for building an SRT) and as a flattened list of words (for - building dynamic subtitles). - -If FFMPEG is not installed or fails, a ``RuntimeError`` is raised. The caller -is responsible for cleaning up the temporary files created in the working -directory. -""" - -from __future__ import annotations - -import os -import subprocess -from typing import Dict, List, Tuple - -from faster_whisper import WhisperModel - - -def extract_audio_ffmpeg(video_path: str, audio_path: str) -> None: - """Use FFMPEG to extract audio from ``video_path`` into ``audio_path``. - - The output will be a 16 kHz mono WAV file in PCM S16LE format. Any - existing file at ``audio_path`` will be overwritten. If ffmpeg returns - a non-zero exit code, a ``RuntimeError`` is raised with the stderr. - """ - cmd = [ - "ffmpeg", - "-y", # overwrite output - "-i", - video_path, - "-vn", # disable video recording - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - audio_path, - ] - proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if proc.returncode != 0: - raise RuntimeError(f"FFMPEG error: {proc.stderr.decode(errors='ignore')}") - - -def load_whisper_model() -> WhisperModel: - """Instantiate and cache a Faster-Whisper model. - - The model name and device can be configured via the ``WHISPER_MODEL`` and - ``WHISPER_DEVICE`` environment variables. The default model is - ``large-v3`` for best accuracy. The device can be ``cuda`` or ``cpu``. - A module-level cache is used to prevent loading the model multiple times. - """ - if hasattr(load_whisper_model, "_cache"): - return load_whisper_model._cache # type: ignore[attr-defined] - model_name = os.environ.get("WHISPER_MODEL", "large-v3") - device = os.environ.get("WHISPER_DEVICE", "cpu") - # Compute type can be set via WHISPER_COMPUTE_TYPE; default to float16 on GPU - compute_type = os.environ.get("WHISPER_COMPUTE_TYPE") - # If not explicitly set, choose sensible defaults - if compute_type is None: - compute_type = "float16" if device == "cuda" else "int8" - model = WhisperModel(model_name, device=device, compute_type=compute_type) - load_whisper_model._cache = model # type: ignore[attr-defined] - return model - - -def transcribe(video_path: str, work_dir: str) -> Tuple[List[Dict[str, float]], List[Dict[str, float]]]: - """Transcribe a video file using Faster-Whisper. - - ``video_path`` is the path to the video to transcribe. ``work_dir`` is a - directory where temporary files will be stored (audio file and - transcription). The function returns a tuple ``(segments, words)`` where - ``segments`` is a list of dictionaries with ``start``, ``end`` and - ``text`` fields, and ``words`` is a flat list of dictionaries with - ``start``, ``end`` and ``word`` fields covering the entire video. - The timestamps are expressed in seconds as floats. - """ - os.makedirs(work_dir, exist_ok=True) - audio_path = os.path.join(work_dir, "audio.wav") - # Extract audio - extract_audio_ffmpeg(video_path, audio_path) - # Load Whisper model - model = load_whisper_model() - # Run transcription with word-level timestamps - segments, info = model.transcribe(audio_path, word_timestamps=True) - seg_list: List[Dict[str, float]] = [] - words_list: List[Dict[str, float]] = [] - for seg in segments: - seg_list.append({ - "start": float(seg.start), - "end": float(seg.end), - "text": seg.text.strip(), - }) - # Each segment may contain words attribute - for w in getattr(seg, "words", []) or []: - words_list.append({ - "start": float(w.start), - "end": float(w.end), - "word": w.word, - }) - # Sort words by start time to be safe - words_list.sort(key=lambda d: d["start"]) - return seg_list, words_list \ No newline at end of file diff --git a/utils.py b/utils.py deleted file mode 100644 index c8f9dbc..0000000 --- a/utils.py +++ /dev/null @@ -1,93 +0,0 @@ -import re -import unicodedata -from typing import List, Tuple - - -def sanitize_filename(name: str) -> str: - """Return a sanitized version of a filename. - - This helper removes accents, converts to lowercase, replaces spaces - with underscores and removes any non alphanumeric characters except - underscores and dots. This makes the directory names safe to use on - most filesystems and matches the behaviour described in the spec. - """ - if not name: - return "" - # Decompose Unicode characters and strip accents - nfkd_form = unicodedata.normalize("NFKD", name) - no_accents = "".join(c for c in nfkd_form if not unicodedata.combining(c)) - # Replace spaces with underscores - no_spaces = no_accents.replace(" ", "_") - # Lowercase and remove any character that is not a letter, digit, dot or underscore - sanitized = re.sub(r"[^A-Za-z0-9_.]+", "", no_spaces) - return sanitized - - -def timestamp_to_seconds(ts: str) -> float: - """Convert a timestamp in HH:MM:SS,mmm format to seconds. - - The Gemini and OpenRouter prompts use timestamps formatted with a comma - as the decimal separator. This helper splits the string into hours, - minutes and seconds and returns a float expressed in seconds. - """ - if ts is None: - return 0.0 - ts = ts.strip() - if not ts: - return 0.0 - # Replace comma by dot for decimal seconds - ts = ts.replace(",", ".") - parts = ts.split(":") - parts = [float(p) for p in parts] - if len(parts) == 3: - h, m, s = parts - return h * 3600 + m * 60 + s - elif len(parts) == 2: - m, s = parts - return m * 60 + s - else: - # only seconds - return parts[0] - - -def seconds_to_timestamp(seconds: float) -> str: - """Convert a time in seconds to HH:MM:SS,mmm format expected by SRT.""" - if seconds < 0: - seconds = 0 - h = int(seconds // 3600) - m = int((seconds % 3600) // 60) - s = seconds % 60 - # Format with comma as decimal separator and three decimal places - return f"{h:02d}:{m:02d}:{s:06.3f}".replace(".", ",") - - -def wrap_text(text: str, max_chars: int = 80) -> List[str]: - """Simple word-wrap for a string. - - Splits ``text`` into a list of lines, each at most ``max_chars`` - characters long. This does not attempt to hyphenate words – a word - longer than ``max_chars`` will occupy its own line. The return value - is a list of lines without trailing whitespace. - """ - if not text: - return [] - words = text.split() - lines: List[str] = [] - current: List[str] = [] - current_len = 0 - for word in words: - # If adding this word would exceed the max, flush current line - if current and current_len + 1 + len(word) > max_chars: - lines.append(" ".join(current)) - current = [word] - current_len = len(word) - else: - # Add to current line - if current: - current_len += 1 + len(word) - else: - current_len = len(word) - current.append(word) - if current: - lines.append(" ".join(current)) - return lines \ No newline at end of file diff --git a/video_render/__init__.py b/video_render/__init__.py new file mode 100644 index 0000000..e6a2b67 --- /dev/null +++ b/video_render/__init__.py @@ -0,0 +1,4 @@ +""" +Core package for the revamped video rendering pipeline. +""" + diff --git a/video_render/__pycache__/__init__.cpython-39.pyc b/video_render/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c56007a85d9bfe6bfae0e16141f734b1d89add0d GIT binary patch literal 186 zcmYe~<>g`kf`xwHG8}>QV-N=!FakLaKwQiLBvKfn7*ZI688n%yEV-QXi&7N|5|gtN z(^D1F@{1HoGExckymWmZKhR)r+MZ? z{Vw}dd60XMGxI#pQkNC8fZsHJi}=mpx5Sey%W~hSw-lCVg|8J>IMGg2vmE0WSqc1- z62*JeR!G~Vkw-r;0GEgI17=$pE+zICd?$U%7|_&)c0hgQ!W4}Ehc(%aJO9pk26 zH%_mUV(yUJ!r$YzZ!ShUH@92sxAhInXzb{%UBh_~a|V^YzS}$E%<`Q9hwn9)YeCks zhBj7j1$oOFxNO{~cG1G>?EXZexn7Xr!-tM=he6_i`yxjlu2@#g*DXs>mJkIZMIvP) z6(UF)ikKlXM`Rx4Uy68%+C+X_TwRkPVTEV9h9li~4pv7`L>gk6$D`4CZ9%=HE=>L| zp)KVgtBkXx{4nm`8jqeYMndhdiiwHr(=w?{$qSQ z|Havnn^$70JM}+WSWSD^u#DDiz1clgkk+JCg4)1-Y=!mI^4%j&#kch9jD60|?~)~7 zWZh*5-XZH!mZO}MR+B#!6Uas+dCsmGFmF)p{xx<*-pMkcBX&L)5ib|ZMR{1Z$O@N{ zZKTCZtO$(~>{>>~s05XJhmMEPi;Q}4v)(0x-rH()5wDiCn$>NXx|tqu=8Okn0?Esi zV0q3lirTp`8~WYGx-MSD&Vn@K51k%2FQ3y{M_j^lEaQG#4~jhmCVb2CpNzP9<(%q9 z>t0jV%1*0g9A{S_g-aHMD}xeq9}TH+TEcZvvdX_;)D;X0BY2fsYeZP&NPU+M!5IQBgH)cC@GLQr zoWV1aNJCE~XW>f{Dm~#`vhV<+mB6L46LO~zjkpBSLhclz5tkrZ$dv%iEXx@Po`qZq z&QNSfm!Mh5mB7rbN-kg-2TYbby2@Ubt~SG#p#KUa(;dCaUXzkpx!2cYk~i3!QZgqc zZ^a~Uvt=onmy&m4l6TpAQnDZ=D{$#mxb#|3>qz7UNEkVFv7>Xx> z!oDDI^T=U<-KB}rmfpCvWjKFDUw>rm}*ETmd zKDc%*s2pN9ecBN|*>{CG8;z0ff2O{PC&=&F9-nAdrzg;APKs)Eb#2ET`T)pS-Ky{J zhhdr92UFHmb73+jT-7Fxq;zsh`7WsVjt@vFwPh~3IMI%lbT;4y4Vdx9d*{d0*6VGf z(P~-%JJ!8cXQO+XAl+Ke9??b10w}TwsC(v_DvCv@D$~fBD(Zw(Z7yFppGstXKZgiD z#D~IsLPMeZpcA}A7oj(aW(EdTGe==oN7P zO37Pd5xo*nZ%U?~G7>`p?IQH5W=*zHU-*nUE89p!tb)L-2H9ymN?0za@JXl>q(jsx z>tm32fi#3f8Zx5>nQ6YK%>}ve4uI$m9W5VTmM!Lppb!>#Pj<3|DXG<5o;zRMEX#BJgfL%cCzZ4)a;h$U3np^+~# z=q3bhydK?r@>*G){AoqCs0xzOWnXaM;b$-Vs38jZEBrXf9P^rNYOd;rm1RcvV01VQVytv1X zNM_ZNEay|{pP=p0w2GS6vT9bVs`%5yU7BUy#I6O1=i_~)C!XM-t;=~zgk$QpmJS`NhE&Dy&&-Q!2_p_s3&nHmc{Nly*6^D@D zad5l1F!+cNT0R4X6HZf-(STCaS;_)tq!n03+JOzyN}bFNTw`OWt;`ENN(8%T3;&V^ z9qw?Kx6T>&c>Bx_y4*Llb8hi2@4-wD#%oY$NqC78D zq-I54j$v$n8L25q@3A`M}??(HsRPu?o$3-p%Oxux6O6^LaDw)R=kGn1w$QHDZ zG2U-M;gmCOaXYbV@-sc9id<0i{(|uq_s&R7KOrl6NiP{C$~v@b%G=!kk)5)d9Xh=8 z)RJE-mv?dWoczRA^cm#@qUenScQVQ5B3X8a(-{}x0fZyuvOP?TBo)UZ)sB#|kjuM| z79y@xl=GtBZ26#nzr3RT|GCb6j} zm50|hdub6zX}Lf8*YMK;4V-u?;-?bfUhXuO{YjKgi>e3KTB-gO+vygG5N} zaB)y2+CD61x%SKIU@nVTl%=-dI_+O)5H{)VS5>ZNnRtYd4J?VQ)mEJGpbe+bQ=tSm zfi>MaE_ZG@-GzzLJOsKycjy}RseBdYBvN8v>CP;lS1L4+Xuqfwj=qnMCDJ8E@NWw0 z9cYF_?|`BVixjPaDafUU{V=90nzUE&l4=TR@y1tdcmi#Nlo%4Bj4{fn3+<-5f3d&q zo_qsVwKGYJNDUlwJ`hC#E3^eH(4FhJ=kU;)z>#q`1wIdgFq**kLBYUlR?}^=VyCt; zPf0NY13nbzxy{`g-~b7a305os#5aIg07!eB{IK=wXTL4aK9sM+aq=E?fJ0d>t!&;=iAubhc57VCC!)d5@xX!krGXY!%`%f$Q5oW z@md0m_zyKiu0hXz0l2Nncfn$?9{6E6so+x)VJMLXK?`|2%M*#rl6RrgEd(!}9T;M1 zSDnmp9dQiWiBO+6#P`B5k1{-G6L%UWVsgw)w_G=gH)QtqvVvEs*vAJ};;t_9DAq3R LGT++pHt+lcIv;6( literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/llm.cpython-39.pyc b/video_render/__pycache__/llm.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72379c37fcd88c5dde3dbcb2a437e46a004cf331 GIT binary patch literal 5533 zcmc&&OLH5?5#C)a7K_D$pk9__J6_XvWF!(P+lgb#iXzFPWW_SYQ1ojZ1zBQ-;8F`L zG&4(!Fi=TZ%2BE09Hol9q>7iM@=tQ!Yfif4k}4luvXrj}fC#;+a!6GcH9fDMo|&GW z?r#uQDxQJwt;KKF{&L7L{z;X|FNex$!!YIVQD6o$W20>brb%ThwgOA%xgdu;7u)T8 zkk@^7?6eC(q3s6lh_@INS)Mtu*DeL6b~z}cJp z8w*Ujyd#oSaWNZ4bC3c7bIr9-%|Tc<=6WMd+T7%4qjH*#0>t1(U@`;hH<`t9cPwVJ z{4E>D;IIO7Q73v#JfI{C~H`YyAQ&`hY^J`P(bPHx2Sbb|Nzir5) z%IRCr8@gUV{j72s`u=46*`g7BYrV*F%)V{*jfU4JT4Dy)nqTO>bYcFZiwhSUXBICu zKAivD?}UlJnnXV1tJniedM_^u=uY`8@rV1;EWFc%@UQ`ISeVSF z&qpy|NYwcx?Jzw&D+(U6hT=C=HZ>BWIF2~P*b2thbD1r9ys9f=FIsjCaTwXR`)B7i zBF2-3;2p-rTpYJ&yPKKYXkfpp(bxwjxd+5BUDLJ77E#x%T2-@m_=%I99S;*l67JEJ z_=0Z7RosaR6trvoq6&^jj+z!qmZ6V&9T$lXmJII}mCSR-O_NM;ppB3@ePi8cej1@q@sb$zOD_wyK0W7D_ofzx-kYTJfsAkS?Z zgF?TsHO=;Ho85o4jDg#CZ3 zoNaU9^*!{RLf=8%XKd~5duT89OBjC$rLx-BFRT4GE%2?X1MINoE5r1kUucr&8h>;H z9i%IC>%NCV3(C5BU|`ab&|x6KCEv;Y(YkJ9xVe+d@=}FDWjWYUW|Jw3moO0LZp|JK z3X-?la8BCjhU9tlX5PsdCPMD$)b~-B6eS$Tx@VjCeb;;UNedf$yT;nsp~%^jancN9 zc?yNGw7L~|yY@y-M$a!Zmu@OnA~QRc@I^D2@%)Y%lzB&{f;U3hj3TVs7D>#rd>4C? zS&42FIcIPLwXvg4I&itGQ7f=j(p}W_LtOj3_RCogxBTu6NXO-KASP^M%5|A^vI5bMI-Ml*b~V`h$x}F^&PEtxbtkZlb~o{l?D8M+Lnse=tUnhNSJODwzDK-`nh<2#N zQ5rc-@?62Mr<`1%P20|DB8((&fUf~1s8nXT7FUA2RaR+`1%^%4&?@&L8Fj$aY4Xe_ zBgwo^!Z;mz-+Dy=lNf>wJt%a;%{U2R!z&5f%yOxS#oTCiXuBgJ04ElTH;LOLRl;Gz zNCH=cUk-gR9k=96{D`EIo1L)uy-zXVSHL0{Kn%+^9moFGzVB2le6Cr$Z{4>aSP$%S z)$$&=xvIIxM6y7U$Qf60Rb1F6?1&-BbP`g*VZSJx-YNhJh@eMIAmAe+K2hbO9? z^rUAXwuer76_s9D8C!^<+vcZm7q+3`mN$QG!pXtS8CyAK!ttPg%VrizRv%9Mnk7%e zWg{17zXlg#Fr4~x#_y~{#vrHV<+e@7_#6_C0|H(#cJd3E`Bi2P?moaL$T#ieM^;9s zC4;G-U%9++jw9d>4>hw9v00WEl=lIElVrAq_*GWkHWs zXwx%mriBE2@Hm*6eZII6Oov_Utjm&E_;0w~2%H>AmdK$*V9`OP#WF90dlj%my zHdkv2)ALWPN!t!NN!T+%r5{iM73rHIBD zPLwdQD1$r_$SHbI(t(_|Kv827vuVr)D38ctE%aWwyg0wGbot}u`K4##uDYFhyKq*! z1AWhj=*Ak}Ujh&l%F}uyMaM{zOh#iEv-jfDHna;}UIb&WsG$ok29V3}9zTIsXx{{L z^fql=lD;Y37Byaj@=1hWZbqbIN2^`i0!V@x27(g=5ysXCVfNZ;3bg3Suv0 zg+94%ggfpl!JBIs!VnL_Cd5&ZrqgH^leRHlrfp2)Q}9k^Js@|M_DIfXUY~b1l6ZsS znEyJ0u>baPggOcu9AoRlRX}#=!{Xbt5^{~GhPV(RmfQBp>FCKq9|$9jNEB8iQXqx! zg!k19b{KOxbHe|EirB3G#>`QFTz+%r>cCqbDS`0~NCYM!!QDitw1sGQ)K6t7{0%Nv zut&r?fCcPH@Kt)i2|pR_b5>jF6DW>e4jRP~V$djVhEUv=!QioV4*#rkcxS-tF`~0~ zePvdF9^0YVtmk$c#b=OLH^qCDS%M;1W|Nv8zksiz=Ql_Xp&eqD>3-LH=c$+QVQ?Jk z?Px#BG%M?lVt*Fo#{5HV=1;kCmX4V7F2w#(c$mlBU^*(;pop@5ou zwXoZbqo#h*&k;`OO^@Hs8d1W)RBykwsAI$T(XnpS<#E*Ebvvk>N#U~vjF}$Ny)wWc z0n1~{n_b>3f3Uo~cx;KU!jwd>@Yyk(a*T&KtuQwXT!l3}+PIM74KxDUttC+ttMhji zqu-Vf!628-Bwh>0P;j&zz5m`T+BWswNG1hL!okcLDz!KF4CW^Q-;n>v6GlZ5MjI8q z4z*F02d09ryJ-#|1lkB-9p`DgBkRa(X-_kczk%!DNJf3n5G;O*^28}l(FbhX-YP zAOPHHJSa{R?=Oj*B?7N+h^s^hFNkwQ8blt$dsB~@^mXvaOCSb4adOrJ%X1y;TN?;Z z-wewo)N$XwZ{2gMRiF<1?l}cuya!Hh7w;X=sE?3|M@aA0U8C8J(X)?Dt&c+kJ#hi^ zKXR;er2hw8CoW=M@ez?<5P4L28V>y(?azW^Gf&<2@EJh-oOaGJ7?KEa1If|dH{qWh6_~U1Y^fZ zK@w8mUg9y2`OlaIEPV76pOG^ZH9lOW2-{97gfywirO>hUV<|YOq2i`eV&a1S4Mspd zoS|#E4aA@u{6ZLJWI#>eBk-CjS{ z6O)%b(pwk3e!=Iw5UF!7w|f0wwg;@_TBjMv5wSrXWy7gdWon|(5~*#2i+Qf3uwKTE z4XW9UE88e!mgOQ#I#;RACqpTwc^2WM1v;&(ydu?w2YhV~cIG+bk}573S37@+v@&_2 z2eVt-)v$*d{nHzeqw7Dx>)6KxlW(;KS`;L0N~aa9j8m!zNNUAF(}9bq+bzK=Xtudu SN>&y8z-?BC04t=4o8Ax03VjLy literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/media.cpython-39.pyc b/video_render/__pycache__/media.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3ae715e9e1fafedcc5dea92050e0c6ee6b6ee4b GIT binary patch literal 2017 zcmZ`4%WfP+u)F8Ev$N~BXs!4mjh`{sf7k}=5783FY z5|8+iw^HtJjkP> z%exj2^SJ2oUeV|MVvVm+BH7tMuAfuBA-zwD=!obs5z(B@Dc=OTD`KGIIpbS`-6Nyk zGqiL>+iqHxl}Sxjm3j~8jz~>9&Qq;{aPOvO{~29yhWDf~SvkdvEqQEIIyOmKi>yja zl^msqBc~n6QrAi*N0s_o&(g7MV?qaBwY2Z&o3 zwOv3Y;ZzXLzJ%yWN3ffV-GSW&+FS1aau1e!xZFFiN8oL@?SGXCS>0cmy=EcXd^(@xr}!1Nk6A$ssDUF1OJlLg@b9 z$Zf;K%1x3sN|K@ybuIyqljLEY=BN>0xhUu!ug&FS9p(IC{14kA_N)>%H^);LOA2L z_c^M)tK=+IQYjxKwC-M2$`NZry{wbgSJKp~95cASco4^R8&iz^)6j+_&=Me*LIJ8* z;0&a=TILf=Ja=Bb1a}z142md8t^=j5YpmKGQFH25IK5&O2CoCl zTJ<|14LKkOw4v9@DQnmnJ16=L;|SWYC*%ics&A-qjn~jQJ@DuB)G@*G3un$wT@yC$ zS?8R1w=SZ9aTP{`ro)1RSkhvm~BtD!8%RS7*Vst<=JZ{5j?Sz3x}xax$VJkGR{rIACZ zfpqw`t}&{&pZ+q-(jjmzg^g8~igYLo;5g~nX4t`55vU%pD=b!pWvd1V))8zV*aR@z zvIW2+Tk+a9oNx8QB-w=lvR9eisxHMRN>xf>H7y#}1Ef@=s~M}V{8k}Y-)VHc{G z0pQX&AwxQF?8#`G!(5xx(DpFN6gDRhFys&p^)6D~)&E6m<-kcB;B3sZ2R3!whq9Yw zQ#(?)KbaIWIkj=;9;QXxtlGrceDls!nmab93;W*zkv2uvp8qnv4R0<6???D!)97n# Q$0ddi7Tscz!{{LT2W)5XasU7T literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/messaging.cpython-39.pyc b/video_render/__pycache__/messaging.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b011f51d112ce489e0fbc148355f41a5670d360 GIT binary patch literal 2939 zcmZuzOOM;u6~33GD2jTFW;{unM_m?(n<@yDaf5bI7{)l0xDV75CpKW18-n7!lBl6b z%_Zqtf~Fe>NH){<2Y4a1s{lp+MD|^F!EIOm0|64WXud;A|AdZUf=6u1UCybmMVhOXf8Evb*CS(Eb8A!6HK%pA zzM-*POBkXSHKw*}b0+mwQ?}OJZA(+uWk=SA+Usn@pZ96(?)-)cQ`A3aqW(m4yF$Ci zde$@2tjCL{AIC}NXJHbj$34B!Z^iS%I`X5)A4IY+J_>{E8Lw!qdos(yc$5aT`SPP< za7d=ThmK?}7Yycbp$Yw|Mk;@9z$_EvI>wf0U~C8*iZqMPXZ~OiW_Lb)kf;->R2?h| z<2Z?BPb;i+BeF1+%yJb68kV;Y^@;q5L?5EF?(3S*Y2%xwlr3K?giAEiZOEPa&?q@THO45B1Bk-`g- zIF|LB6?uCM&5%%n;SOO-}XPgM(~s*T3&-i`b)KDs2& z$Wy5bYv!lv9}^`MP=#=AC2zzPb#F$DQ@G1WWi%|Mf5p@*p#JX7{`-$YArnu@SV;B$ zRHmsvLJ<2i#M$%0ILth67o_Pv8pcg-;G?$ne@#d4+r?`yMyS8tRdnu@+~jZlSVU4m zf%>@WswQ~75Gz8zg%42S>A%rg{4--K{v~3y=70I~nYPqc`kI~TOT-PcwTU^Y6NJt+ z&MwVam*yH9I$P-Hlt%#CQ~m|}f)BM4@vSohJDXc>OM{?yu5*^PR&8Ogv1h5TI%~$+ z17x9r_X$?x?sb;_?Etg%J}g&Hv6ivP&ZIm4S2YhFyP`=v9-awWP&>_K zF5SAw5$H%F(K<6FhcXMsKujKIi|d4rzAyY40is_96=AuAr~ZcoG0Yvosj-Z}L<`1& z0pD#O0zYBx3OV&%r0JqVPF?D2er7yi@j`GoQbPhDC&_gWOXUw( zrmyNkUmB~j(5yA0T(rJqOXCSYtAXPoI5y_13x2jR$pxY=tZLVqm;c7*4=0b9Xg)R1 zY7beg9k7L3974zDvIZSZXweca1gJgfOm>!L)}8E%w&50-6{|y;lWF`#oZTeJ*Wb;cbxwOuDc^+B!rwgVf^UpbrcHUQ8JJNbbjqz zOX2jEkWu(Zs=1f>qs1#18!nR-)-a4@jPzdEH&QwE;$$js<~<1bSgFK`zy5pTWXjKc zkvIg3IFnA|%v2Ik3QbXAlKc6Oe~>%|T{wddu`KFkGXG8#dak0L=~`nyB>}3ap>!rn zT?Gq8b}0vq3)Gboq__?J(t zVS~*~sw|Z0;;qkQIzxf17EJ}RQ+bfJon#4toMyb2=`+J`pe!a!tgZ5_1E7Mi&kdU1ZNyyy` zgL`%L7SWn0-Qz^4YZ$1vX{KHkQ|VfzfpLn2pZRmLxe6xQ&`jGa9%5YnrcudQV$?@KSBYag^p$ z=?xPlf!Y2I)DRq9H~W_)-|p$^4ajw`JlDnpRWyKSUCYn2gnYbs4@Vc6g(6YNjxY!T z-oA5dl>17c%qI?DOSu!L{eD0HB{*V(d`wUelBu)R%mH!)(y8y<2h7WjNF7iHnXj<3 zw^uoS@1ovaoR|uQ4SptIq5!(>GEAP3LF6m>JdSRAYjzdKJ{h+G%W0CTH)*^9!G==( z9CM2D)7=4jc$r+1Go6e>Zf=$6x~Rl%h&yEZm&ZUO28V*Cqg z=m1vNsVRfMvk|?G4M8|{&@hA=q38Q}_q6@DjWWpE|IC`r-{oDtxVm*^bw%pG#E9-J zILfK|cwS+99#T4wsNeE{Y(J`2)N2r>C`^@RrFvyU>M1&$DJfDqU@YEkl{39v7Yv6v z?p|Q1sCpPIvr`9GvsVcn?g!663iNX3>$d9klqT1&IFtNQoA!4+%( literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/pipeline.cpython-39.pyc b/video_render/__pycache__/pipeline.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e176c79472f7f73afacbda82066758f36af04e4 GIT binary patch literal 7889 zcma)B%X1vZd7t;rKCu8n@J+2GP|`Xw$w*G($P7(K6lK{GM3agbjl@g_+XHZx^H}!G zg2dLshd@@0v4sPb-2 zPj^puPxtrzzUO$&W=+BG%^SbD(|u7<{)-y>e+C*?6h%!9RZ)Z@)KH03M^&k=g_@(` zu7`SLI7VbTCifem6;+%H*Uiw5s!lbkIkl+n)T4&eK))p_VKZtuEw0<)T-0{j(Y!Oy z?P|CX9dV9Ci_RjqYvIvo$ythyImftN506JDoD4fb6Qn=ZRd=Cc2{*? z@QrsB(G;!E717$(wpFLYJ#(Uso;LTq$UXC70X+-c^Ah(Q5sTN?phG~u^0K+XOrJ>!;giJ7xsu2`+qtbR|qfP zM#WbgRVa@3SHO+03+)oHg1a%fo0k;d@J(SoR2?7!Pi#D?g2uJH_Jd^g13yi@bwAhB zOy+jEho{Ywbf2RCcCHU)nA;n4ZZ{B*)qGC)YaUMNu6eyIk(*CjM0XnedIxN{om>8c zOnTs1CAVFd&gHs!&2^(h3`43nT=(A43nx$JTz4O*U00G!wkMaccwR||QRKVtUx3c8>a9L2(k-!J|>{^N)5JPY*fT3_^s3BUb2$B|dPkp+K}Y z2-bwLT<<5VE2HC6IXS2lZs*nerGglpnGQOj5(+(WJ8wcz_OypQh1Ew#Nr8Pj1Fzfp z{GC!=x%U}y>1csR?&r|$yj2K>FWeqDmUe505}#l%-=v}twh*tB-=g-PP;nSQ%T|?XA^-hSBXxGs`g`U%o zba6(UMNdP#AUfhjyzwRRGTKeC3>2K3=;RU#vW_$5#!3>C{sseNrr!`|*Gp99cTB}g)76@l3nZ5ZFRoT*2W%OEB-Khzz*01*) zyBe?99BX5JSG%QH$|vx=w)7uE{EPcV+5i2s(#tU zXOhQorhLx503GH8}<_3r=~^y=t^ zRH0_9vEP9b%iN0Jdam=;K$@1MV%nt zc(NM?!h5UpYUw4t+IjN0ldQ^M-OHZ*QU-}Ufhl=~os;*3BxBtp*R%ZTs1VR^k|rRx zCIFOgFMXzM(HY>zZK%?KcF_J=yGJ`wFVkpVl^2s_<9KsS`>vjA=P%~^U4Jt-H@tA@ z&$6U;i9D!Y5)CfX)Ral6S*rcz==Fot--GG-OJUOU!u0a_y~zLpfn(oJX!qoEulreE z`(oVeP*-rDwg~q?Q#Ky0=yWit<}^;)zYjlEpv4Ic{^rF z>uT~tO#7;4AqWxS88!sRvC?PF(%1Tc$yjv_l5P1NJc3I`3fifZ`jlk7*gILv$69eB ze$g^L;)1=ichLLWoRkkW)Yi!e>9*7L9EU zF3Myk)RgKjFq>N|qmv&^{Kg(kP>f3i=^%-N4L>ZLa#B6)nU7wWdXl8bJm|5c6RgJx zA>u7qE6EPKGWzD;I4BK_g$2TJ3_fhuV|}0VPb+-!48L_$_&2F6Um0Dv2@NWIL`C<8 zeg{z`ptlzE@bd3;*1RxW^?G+Z-Y`odMEbqteD_G9(fPg z4~DF;=_7G>T6-^crFS2z7Y=xCkj9%KgkvK`k-7AS8Bur_YxQuYiLXf4%%C#^vs z7I8Jynzjf5X{egEq#3&PtgUIk*GvN=H1un~H=Y^x=;ayFIm}r&AULG@Nz1T?`2i{{ zIP6oBPfj+mUsyy9+6MNK9H%XveAB)%*0)KF7GSB2Epu${7`w{YaLs#fOx&UaW{xx< zy>4pe*4fN$LDym_wNH2Cv>xgIG%<7XZ!nq*Xjsfra;u>34E6HbB??m5o;}2{)Iecr z+OyFcGvt`Ug(K~BpG})C0Q1Db&UdkfGdD9K*GC9-Jv`!owlrmogt;japaFYXnp?$7 zZXs#JIM8X%)Sj3tU*Rpmh;=mjxNm5V!vy9C=*SmwP%nxDZxx-D|8nN=QZT> z2E&Yu)q#T$6>GToAQZpyL0B0N-_?A*TNrR>T(#~3o{^&+_S#gB#_>~^+U_`?)W za6I75O$G2;5(0S6Tp@B-1L<>eIB!j2J6~Kwk{kJ`Tt}|ENHgtu*c2`m5e9jvPOHp} z3r-))h#oU^2_pqEW2!Mh$fErcR}pY(WGOBFx78)JRyBTW{BE@Thmb}75UaqWWtq56 z>yxGKk3= zYXu@X@K~UE6wVcqF-6|cu~UCd66>6~Ug&%AaNr^zk4QCjnhB!y0H1_7v~j9aj5#f2 zXxu$pF{}Aju-t#*N+~O#z&j(&*TSDN(+zC_JvMY;n@Y{rNl_Pn?dfcyV#fa5Td-i055kG^Lx;OHp;0UxceAuZM>4{e8&VB2M)%iR3dBz(cMZIu#QHzz5q zBFwdl+};M3$}Re;6=#K)T%W2aeAOXWx{U2;W4W>FrL#E(mdpRf(@AhwgAE`rc6541 z+NLX$4O&5dZ06&~Ut$gUBPy;^LGiStz)`+S1-Vd?%#S2Vm&A@jBd|`~D_9x)8J?1T zV`YF?({4B0t+w8-v|qvfbbD#xtfVpBa}GX9tqqZ~#V09ABzG#59D~6`YPkH==%$o9l4MdG3(2yzV4YLNjvj^)r(!w#>X@9iEpi)hexK8L1sy5k;j9aXG=<-Bk^1PI`@lRk nA-R32_!f7W!o>776b*zS#Jb~J1NY+J&}sm4yE3_|xZD2^FFrSY literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/rendering.cpython-39.pyc b/video_render/__pycache__/rendering.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18da5d3d83d76b06c40d67abc91f0aa05449a052 GIT binary patch literal 10198 zcma)C+mjsES?~Mw^jvm!cCcfe242;?M%n@-A>lec0Awf@4|9ovL32J|o=ra%aU~VL01f?X3B0ozwnlT?>rync(c9 z?yrkeBFF#yho&fq;$7Q67dWqKVosFqYNB*t9O(Xee&tQpC|JF?LYo3hoXHM|vsgLag7 z@Ajm4r`187f2D=eFR{1Vy+%|s6Z>YPJqVHvid&sQCvjvD4P>`zsEV8tY~i^^BvJ$r zX|W#Zv38&zU;{tWH!-%^Y222JD6AQB2`^bi@{uOj@cMXlq7I5FM(nMQy7;)m3Uit+c;Mz zDPC*vPS8$cn(L# z682pS?B@_;h%Ct66FFckVDh4X8bvXOyp4QG%p+eG3&=Y`MpW=W1*SQp0~C6N3`!NcZ$T0}!4)V{9I3 z;Zu<%P_}3MK>HzB@mqRikDVw(m;;^byNA>w8+qd#=ktfuSALo=aK6B0MPXDml+HyZ z)mE4lP#`e#oG$}fV5kD<6h_I87ln1GkBrz5_Ogc5S=P9ZC6vzNS*k+Rtjv~F({}Em zE;4ANdZAo4ZM!Tq?4P$eTqHIO*1L_~I^+Og*$|Bv)*pN? z8_7Ws-RTF38AdWGhl8!C6@f~*i?B6>pdwHb!}`t+$bV-m@rd~AO%^M2Q;JU!xot@# zf2$=he!ki0-fV!R^17Z_@zNxsY4%tz^1tAkA*(o?v+PFO}oq`$6XKSl> z+s{Rigmu+p;?nHet!|Ly!hWNfP6blR0(x8RmPO4@Fd18s39Z8ICS!pUj;x0&oyFm7Zea{W3UYf)ZEcVR&2hcUH{3*hG4aYz6>S-!5a6B zk;51h7&CT+wFI3EYzE9_#hKWtY6r&1jhz_mG0LJ(JGKw?_l%J@%0*zeu^W4b`hh;q z#@R!C?8PAFQ63`}qFijpxdQ{^3e+!-<}gxmGA{R5VvI>xVXKiDKhLAj$0Z)UoQ^Y> zj#J`s${1%ME{yZ>!Xa#l{cp$CI3E?_d|VLDJ%e%O*a9s7sI%wX-jQ89*<6FkM;4xeNTNom^+*11owAL(C(xqcl7{M#7%QH?VP zEHMt4Ne?%`IDe>Z>!cg+y0QKebil{}Ih!#N^2L$$fwrfO^_V2T9h+kV>;&dPYLNtI zvRjGF*hD)g0^1$v!oFe2cI=>~J;7A9iuO4THNbMfGouM}g_tc9x#O&$+8I3FwjnZy z=E#k5Aa!D{EN0`y8L(d=HqbxKOM=0yQ9jP@72_OQ&21Yo&lGS;xc4kwQ@zIS-aPjQ z_C$ZsfTJSV@6lFe)hhO|Le$c=I2Y&98`u={&h7tg494Fk12? zth60SqU)uR{sGIQJxHWKHTJ=mOe8Sva`k3AXeb#k-$4W5jEzeSexxhDATv*ros)Q% z+fZCh*uO8nhx~uy(T0~!(8V#v+_>ECH5={lMF5kg@@X_%9d|X ziA9=8zDPyC!gJn8Ol1<{{j`{BKQ(*r9b_Q;`iI~cc@5l zw4kBhLO#)NLxGUion>&v(=&X_^?9GaLW8h%`5K_F;R!d8XnGmi&eijJ6}+3*7r~K> z%*n*DYetD+y$o&W86{)5_DN?lTx162WJU`Tld=eW#4OBWe4?r&?FJ>c15*jipx6_v zdi^QVbI^KVP7u6K%m)+8xM9djYyfW(o|t*R5}UHCY6unQGe_z1=xC@LDOEIyZy~!jjU&ih{!ibP**ds6^SqIFB`4Mbm zX~nZlm~x_g7nKroN1l}5@~bHLb3D>VGJN*r$to4ZHp=6et>!SR$m`U@Wh7*MMx?Q_ zL3hxB-wWUO*MYBPXSCoZSKJzGZ^L^av5=T8ahqkhpAo@!tJ#XkOodBIHJEZaVak04 z4PrduQ%E$>r3Kn0o#R2{c)EieY&OF)tNI0CDxl{x&^N=?Pgt9L4qbslnF*OfXXPHj zQwk;jk?gNSZ3H?T8>CKR4PqSF{+cjxZr0|<=6m5U%V$t^Xl`t5kevrx82#6Kf9R%;rwuoi%Eu{|x$ zu$!6{XK@DWi3*Us1!4?j>ZQtG!c^s_kQ{SNNw3j!ZNg5#BC}!1_2aB+%?pg*;%GmI*%XWVK1J1i{T;%ZYJ}ZIuA^OlFnU1ziPw6)RzF| zTpvEe0#%tqKs>$9ahU0gQJlcvf<9&yb|qkq48> z+d}{WW(`3$pnT-O=x|{Y!P9_UPf#B3+znl}$@74R;vE{W{V&ED_#ZG(4vkS^_zEPh z7Z)fWXUVOHCxV_Kp~tbMFw0sOZTIs0WMdOj+rHh(r9XgAhdJKY|+|LgspT*ui35^=R2AiP?l_rm&_+@~_HH;DR!s4iNvR+4md@=ZztN=Sh598^ChJnzJ5_jYyynOLpv zcF*_H(fWZ%7A6K)h@cyS1Uw5C_czf`VmI5pFi=Wh5r~O})1yK9l+c8vq~tb|BqKSv z7)t0a%`d9p-o)9Ew+IP~i0JcZjxr*2K}9$&e)HQhZl3h%5hdzL#X|v#Y%E}YA#Q2X0`(= zqTS);s10D9LK1WGy9i*)Q(rNmQ{aXuKYUN4fEXMBxFdi}@Q4-Xgib*#a1Ox+!^{T= zOBK+r7}>FhAeOVL#YMa`tK@LZ#X7urc-Ny6IHnkv_A&@u!5LZ7M)1MGSsrkR1Q0Hs z--d5JDn}l$IXEW>9^Nw$V1xHRT3~D*Smy{k&cor*M-?tBOv|#u1&3l*)Sqgf);twk z{eO?`(IVDXh!39Xw z?7h*M_{`oC9JsmoH1~QoUL99s+$ez0*8riE;WK;7sQ;fo}P9IJMl#9ih0C}%0IBg!Vk?=ZFC`iVs8!OUYMT9>2$<1>HBQ@UKa?TGR2<5 z{mSBwg;uUuN_(qPZ*Z8GNq67NV#l5spC@SyDpc=s`mUz$h4fuX->1^|D)zQGdNO`e z8tQ8`+?s;K5h{`14kGkplpAR4;Q^jo*<+nvbI+!@?_{<%wi ziiYAQq%jb!UY)8x-+1SNMV-5}8WA*cN5zFIV{Rh^l-X**0Ehy=u)W>s2Ro3f2;+u+ z@u)92#(|%cL8o^!s1rKLM7uI*h_LpAvV_Qel%Jys=T#6`MPykt$~(X(`mIFY^-1o- z4HmwNm+=)Bu9~Dv?SGTob0DpU@NFx?-A?_MY~XH(TWPH z)ExD_8FXR0(QeZU_~g;orzpu;`U+YX`Fg-Y zC=-4XkD{EN#E@*mDTjXV3N z-txxq+?VM#^vHQoCP{t9I2)EuhF%c-Vk2y7J1PsG#KYv50YG=0Y~lYk0BA!5681F6 z)ejI1wf7MJxS@p}Am~uUJ}&GNY|4mI)VexyVkfP236QGw`u-)v?ybm%m2VLCz`!*g zZX@<^0gTiY`aO=w>|a8E7PPpAdj@+C_Y3HOM}@}5tpsdsl<(!~S`&Bh);-d>bOT^- zeq_#YA4L<6g-S@m)5D<>a%xCNrQVzjk%zqfW4undB~AJsS9k${W?Gd)(-%P*6JJ!#vhpxL0 zP8qI!(%OfW$tMo8_g<+2ObY2t()*I!Bui1EyfXFEB3!GG;ZIi(k2;|x%#@S`ljvUn z;5a_MC>ZO)gqdp`YKJO#20{$aQvea}CSv6rcSrCrwgI^j!fc3d!wg0a7h5o!5x#?= zV4pX#?^ z4zvKz5F@~9Lq}juvyh@eWxcM`JIDAMaBELroQMRAODXUXuO5`Jb69pjODCqkc ze6vBHp2!_)K}_psZl(2YMF1yA_PeOlC6c8ujOMO@NG*eiv*k{Hg#%DZmTZVoT0qL za+jW6`hy@(@>M7b^w5J25_%|n?O#wJ|HWQ=^0hz{AZeZc-jEb&OG!}@GxO%n=X>*h z@4bzdmV67ps~>%O&t0~xe^6)fC(yZJSuFoM2x&={SUnanMl?@&z)fohHd;FoJtuI? zm`L1SC8!wMNxWV)sP_E8H+?s$_3A+#bVYi}Qg1m}W|p!KR#fAV1*^)DRq5}s4=h=e z^(U6B@A6$1tS#`Der?j)Xa)KXS{Jvil>VJe z%iD1eyO&NU$)cik@*>hjX{%J0c3$XGJj9&Mr=s-2FpYXD3`;)@dzl<0MAyUc{vb-G zBP(IJAbA)*we%TW$W))7Tfg!kmMRN1Hdp#ep`$c!>)6P@-hZrXnA$+eT~t2zn8yt7 ztEK%BCY~Uz4Jierq}E7_wt~Kgwu;t=^tE#J_R$5l)J{*O5IaW2W2x)dST9iptF`nB zs!|l{Q88@LtNnJaxb4y*4dt79m1Z7^JE2}7-ZxPZkrNgsav9l(|JAa62cusjn*$l# z^3+@g7|}0PK1hn;E9Q#xk*RdsrI%0S%qId>JA)(%Nvf~8{A;*I6D238(&aUAj$PXU z^9C8<#~^0cmP~SK?{g`n^F#o|uB_}c>B%a3Liz+ij1t^L;3yY;A1hY&ZP}2kPXu6D zk!u93^7LFB2)d>8gSf496^E5#GfNfl1nDL~DP66Kflk{T!?PdJRNkXYl9i3v$O_?S zPy;PEgl)FCK97;oCHqzhVKi4s$1oROmfn)!5T|hwhL_MY%CVTshOeD;&-K|{UBToQ z)89qYCVbC+M3dE<+x0<=tS;_0uynUz2VCN=UArr~&a`#=uS-6qEBy^=9|~p_-mzI9 zF3kG1w2SI=hQG_C*b}4ya7cP*bNKW3BCXXK9YQ7;A zDgP%I^0%7ptk)kDDl8uNiP0)WG$^vNB2_1XDYop=oy;!li&M)bnLSM5aAc^ntZ3Er z>yWv0lWb>4!RN$jCo@_PDi0?vG4Rce!aN=--NI!3CRLSvjbW z+(SkyrME|stGfA&5PZAEgX&!s^^Bf_74mFh5%(0LLa(2{V6r46seg+AwGLtjqHSLs zvk~iB2kg+&g2e0EC}PHw)}H-GeqRvWp4tl@mJr!;G&K~15EYE;pU;4v`T9o59=`m# z{pciCy~h4X1uprGuW*5s*X6CL4U8rn*tTkKkt_!omsUQA^Ry;uJ_Q z7JLPqz%dqU4lG$1tn?$DD;dt5c(8KhzhQg2$PvEeqp~s`UpV_kJm?=|^LJ5l3ZE7b zAmF(;5|8Rcn6!4ED0qaI~ zCu@7~hj-vX1^&A>v(m=fR{r}9i<$p#P><||^=HQpa6{ge5l)V+{PW_<0$7vv5j*6R zZ^D$9Hiv67Bba0}-8@T&t1t43N5wG!k>mzI8z2mT-k1OgfFU3m07>L;&;VdW8jyV7 znL@}uwjXKJ>_=8=_B-?9e$v`M0$^(qfX2$-#wrBCchDFpzJxXXLkz?yCf)&LUk=|r z(GHNq<^a@5_*#IPCi^qh?987M<0WgQGuw3L8Y;KOKO<|TzstpQ@x^fC|Gj~}N~in) z)zBFfoy%8|S9{2*Gl@L-<}>nZaD#P~<^!$5C~wDc>BdM|(_-)~49?E`&Y6WI#GV4MQk}9Q69g6Lk!i@D`nZnGSLF4WfUBDyYGDP2Wn>ew z(vKcf&?$MR|B_zH%2dhQSe{cVX;H>(yEXAIu_XTKt~S^Pu>2+;o}Jy;*yc2MTAwc8 z#H~rQ`V>{Csk%rNxi;e|$~v5X5x0A)xR*)&A@S!=WGIv6HZedeO_uAihFESi&U2=T z-#QcEqoFfER)&uTO)AzmQ3Vx>_)vRLJNiOWxwbI``4CT&_^t`HrHc#$^E5>%O;#Q( zcT8>)P7?%^3IuLDOFQw7NeqMPjw)`MjAA08(LBZDiT@?9Yi@UvIHh+D-WT6E*C{p2 Y2~KtMMS^UBHFzC=--eVfiihI-7e|$J4FCWD literal 0 HcmV?d00001 diff --git a/video_render/__pycache__/utils.cpython-39.pyc b/video_render/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ce9f5a8ca3f121c356e57b8ece40ab6a7bb0122 GIT binary patch literal 1284 zcmZ`&&1)Pt6qhugJG0;|D3)H8T*Tj>jOUa zBV*j2L6J=Iiq$;loODr%T=X)?1IVBX>nM->x;FKwkD&+&~ z%tZLGDQo3pqujzYGfv@GBY=$XJ)$~tD2}bTPqB_M%`JEg!8-|jXxi5K;c;6b zSwT@I!Ol!z`8en(F{x%Xq zy^rd1sSV2f0x1mT$p|)1ntHv&jDn1?f99~NRF%*I#zJ|6Of27kh z;@6=Mr){Hfe>8__Hn)sOs>oK=k)L`zYo&u!>@2C$7ajgg1tlWU^++@(?*TS|}MXi+~yTS*@+r%?ZHr z{98CclnZ67nxvo-!DxM=ZQos=L9v&0rJHjfFPe?Ct#L~F$edK|5jboM;0?3hz Settings: + settings = Settings() + + if not settings.rabbitmq.password: + raise RuntimeError("RABBITMQ_PASS must be provided") + + settings.videos_dir.mkdir(parents=True, exist_ok=True) + settings.outputs_dir.mkdir(parents=True, exist_ok=True) + settings.temp_dir.mkdir(parents=True, exist_ok=True) + + return settings diff --git a/video_render/ffmpeg.py b/video_render/ffmpeg.py new file mode 100644 index 0000000..358d7a5 --- /dev/null +++ b/video_render/ffmpeg.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import logging +import shlex +import subprocess +from pathlib import Path +from typing import Sequence + +logger = logging.getLogger(__name__) + + +def _run_ffmpeg(args: Sequence[str]) -> None: + cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", *args] + logger.debug("Executando ffmpeg: %s", " ".join(shlex.quote(part) for part in cmd)) + completed = subprocess.run(cmd, check=False) + if completed.returncode != 0: + raise RuntimeError(f"ffmpeg falhou com exit code {completed.returncode}") + + +def extract_audio_to_wav(input_video: Path, output_wav: Path) -> Path: + _run_ffmpeg( + [ + "-y", + "-i", + str(input_video), + "-ac", + "1", + "-ar", + "16000", + "-vn", + str(output_wav), + ] + ) + return output_wav + + +def create_video_segment(input_video: Path, start: float, end: float, output_path: Path) -> Path: + duration = max(0.01, end - start) + _run_ffmpeg( + [ + "-y", + "-i", + str(input_video), + "-ss", + f"{start:.3f}", + "-t", + f"{duration:.3f}", + "-c", + "copy", + str(output_path), + ] + ) + return output_path + diff --git a/video_render/llm.py b/video_render/llm.py new file mode 100644 index 0000000..8a7b143 --- /dev/null +++ b/video_render/llm.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Dict, List + +import requests + +from .config import BASE_DIR, Settings +from .transcription import TranscriptionResult + +logger = logging.getLogger(__name__) + +GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" +OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions" + + +class GeminiHighlighter: + def __init__(self, settings: Settings) -> None: + if not settings.gemini.api_key: + raise RuntimeError("GEMINI_API_KEY nao foi definido") + + prompt_path = Path(settings.gemini.prompt_path) + + if not prompt_path.is_absolute(): + prompt_path = BASE_DIR / prompt_path + + if not prompt_path.exists(): + raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}") + + self.prompt_template = prompt_path.read_text(encoding="utf-8") + self.settings = settings + + def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]: + payload = { + "transcript": transcription.full_text, + "segments": [ + { + "start": segment.start, + "end": segment.end, + "text": segment.text, + } + for segment in transcription.segments + ], + } + + body = { + "contents": [ + { + "role": "user", + "parts": [ + {"text": self.prompt_template}, + {"text": json.dumps(payload, ensure_ascii=False)}, + ], + } + ] + } + + if self.settings.gemini.temperature is not None: + body["generationConfig"] = { + "temperature": self.settings.gemini.temperature, + } + if self.settings.gemini.top_p is not None: + body["generationConfig"]["topP"] = self.settings.gemini.top_p + if self.settings.gemini.top_k is not None: + body["generationConfig"]["topK"] = self.settings.gemini.top_k + + url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model) + params = {"key": self.settings.gemini.api_key} + + response = requests.post(url, params=params, json=body, timeout=120) + response.raise_for_status() + data = response.json() + + candidates = data.get("candidates") or [] + if not candidates: + raise RuntimeError("Gemini nao retornou candidatos") + + text_parts = candidates[0].get("content", {}).get("parts", []) + if not text_parts: + raise RuntimeError("Resposta do Gemini sem conteudo") + + raw_text = text_parts[0].get("text") + if not raw_text: + raise RuntimeError("Resposta do Gemini sem texto") + + parsed = self._extract_json(raw_text) + highlights = parsed.get("highlights") + if not isinstance(highlights, list): + raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente") + return highlights + + @staticmethod + def _extract_json(response_text: str) -> Dict: + try: + return json.loads(response_text) + except json.JSONDecodeError: + start = response_text.find("{") + end = response_text.rfind("}") + if start == -1 or end == -1: + raise + subset = response_text[start : end + 1] + return json.loads(subset) + + +class OpenRouterCopywriter: + def __init__(self, settings: Settings) -> None: + if not settings.openrouter.api_key: + raise RuntimeError("OPENROUTER_API_KEY nao foi definido") + self.settings = settings + + def generate_titles(self, highlights: List[Dict]) -> List[str]: + if not highlights: + return [] + + prompt = ( + "Voce e um copywriter especializado em titulos curtos e virais para reels.\n" + "Recebera uma lista de trechos destacados de um video com resumo e tempo.\n" + "Produza um titulo envolvente (ate 60 caracteres) para cada item.\n" + "Responda apenas em JSON com a seguinte estrutura:\n" + '{"titles": ["titulo 1", "titulo 2"]}\n' + "Titulos devem ser em portugues, usar verbos fortes e refletir o resumo." + ) + + user_payload = { + "highlights": [ + { + "start": item.get("start"), + "end": item.get("end"), + "summary": item.get("summary"), + } + for item in highlights + ] + } + + body = { + "model": self.settings.openrouter.model, + "temperature": self.settings.openrouter.temperature, + "max_tokens": self.settings.openrouter.max_output_tokens, + "messages": [ + {"role": "system", "content": prompt}, + { + "role": "user", + "content": json.dumps(user_payload, ensure_ascii=False), + }, + ], + } + + headers = { + "Authorization": f"Bearer {self.settings.openrouter.api_key}", + "Content-Type": "application/json", + "HTTP-Referer": "https://localhost", + "X-Title": "video-render-pipeline", + } + + response = requests.post( + OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120 + ) + response.raise_for_status() + data = response.json() + + choices = data.get("choices") or [] + if not choices: + raise RuntimeError("OpenRouter nao retornou escolhas") + + message = choices[0].get("message", {}).get("content") + if not message: + raise RuntimeError("Resposta do OpenRouter sem conteudo") + + parsed = self._extract_json(message) + titles = parsed.get("titles") + if not isinstance(titles, list): + raise ValueError("Resposta do OpenRouter invalida: campo 'titles'") + return [str(title) for title in titles] + + @staticmethod + def _extract_json(response_text: str) -> Dict: + try: + return json.loads(response_text) + except json.JSONDecodeError: + start = response_text.find("{") + end = response_text.rfind("}") + if start == -1 or end == -1: + raise + subset = response_text[start : end + 1] + return json.loads(subset) diff --git a/video_render/logging_utils.py b/video_render/logging_utils.py new file mode 100644 index 0000000..c3967f4 --- /dev/null +++ b/video_render/logging_utils.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import logging +import os + + +def setup_logging() -> None: + log_level = os.environ.get("LOG_LEVEL", "INFO").upper() + logging.basicConfig( + level=log_level, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + ) + diff --git a/video_render/media.py b/video_render/media.py new file mode 100644 index 0000000..360b231 --- /dev/null +++ b/video_render/media.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import logging +import shutil +from dataclasses import dataclass +from pathlib import Path + +from .config import Settings +from .ffmpeg import extract_audio_to_wav +from .utils import ensure_workspace, remove_paths, sanitize_filename + +logger = logging.getLogger(__name__) + + +@dataclass +class VideoWorkspace: + original_filename: str + sanitized_name: str + workspace_dir: Path + output_dir: Path + source_path: Path + working_video_path: Path + audio_path: Path + + +class MediaPreparer: + def __init__(self, settings: Settings) -> None: + self.settings = settings + + def prepare(self, filename: str) -> VideoWorkspace: + source_path = self.settings.videos_dir / filename + if not source_path.exists(): + raise FileNotFoundError(f"Arquivo de vídeo não encontrado: {source_path}") + + sanitized_name = sanitize_filename(Path(filename).stem) + workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name) + + existing_children = list(workspace_dir.iterdir()) + if existing_children: + logger.info("Limpando workspace existente para %s", sanitized_name) + remove_paths(existing_children) + + destination_name = f"{sanitized_name}{source_path.suffix.lower()}" + working_video_path = workspace_dir / destination_name + shutil.copy2(source_path, working_video_path) + logger.info("Cópia do vídeo criada em %s", working_video_path) + + output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name) + existing_outputs = list(output_dir.iterdir()) + if existing_outputs: + remove_paths(existing_outputs) + + audio_path = workspace_dir / "audio.wav" + extract_audio_to_wav(working_video_path, audio_path) + + return VideoWorkspace( + original_filename=filename, + sanitized_name=sanitized_name, + workspace_dir=workspace_dir, + output_dir=output_dir, + source_path=source_path, + working_video_path=working_video_path, + audio_path=audio_path, + ) diff --git a/video_render/messaging.py b/video_render/messaging.py new file mode 100644 index 0000000..28470f5 --- /dev/null +++ b/video_render/messaging.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import json +import logging +from typing import Any, Callable, Dict + +import pika + +from .config import Settings + +logger = logging.getLogger(__name__) + +MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] + + +class RabbitMQWorker: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self._params = pika.ConnectionParameters( + host=settings.rabbitmq.host, + port=settings.rabbitmq.port, + credentials=pika.PlainCredentials( + settings.rabbitmq.user, settings.rabbitmq.password + ), + heartbeat=settings.rabbitmq.heartbeat, + blocked_connection_timeout=settings.rabbitmq.blocked_timeout, + ) + + def consume_forever(self, handler: MessageHandler) -> None: + while True: + try: + with pika.BlockingConnection(self._params) as connection: + channel = connection.channel() + channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True) + channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True) + channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count) + + def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body): + try: + message = json.loads(body) + except json.JSONDecodeError: + logger.error("Mensagem inválida recebida: %s", body) + ch.basic_ack(delivery_tag=method.delivery_tag) + return + + logger.info("Mensagem recebida: %s", message.get("filename", "")) + try: + response = handler(message) + except Exception: + logger.exception("Erro não tratado durante o processamento") + response = { + "hasError": True, + "error": "Erro não tratado no pipeline", + "filename": message.get("filename"), + "videoId": message.get("videoId"), + "url": message.get("url"), + "processedFiles": [], + } + + try: + payload = json.dumps(response) + ch.basic_publish( + exchange="", + routing_key=self.settings.rabbitmq.publish_queue, + body=payload, + properties=pika.BasicProperties(delivery_mode=2), + ) + logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue) + except Exception: + logger.exception("Falha ao publicar a resposta na fila de upload") + finally: + ch.basic_ack(delivery_tag=method.delivery_tag) + + channel.basic_consume( + queue=self.settings.rabbitmq.consume_queue, + on_message_callback=_on_message, + auto_ack=False, + ) + logger.info("Consumidor iniciado. Aguardando mensagens...") + channel.start_consuming() + except pika.exceptions.AMQPConnectionError: + logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...") + except KeyboardInterrupt: + logger.info("Encerrando consumidor por interrupção do usuário.") + break diff --git a/video_render/pipeline.py b/video_render/pipeline.py new file mode 100644 index 0000000..0d4b7cd --- /dev/null +++ b/video_render/pipeline.py @@ -0,0 +1,236 @@ +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .config import Settings +from .llm import GeminiHighlighter, OpenRouterCopywriter +from .media import MediaPreparer, VideoWorkspace +from .transcription import TranscriptionResult, TranscriptionService +from .utils import remove_paths, sanitize_filename +from .rendering import VideoRenderer + +logger = logging.getLogger(__name__) + + +@dataclass +class JobMessage: + filename: str + url: Optional[str] + video_id: Optional[str] + extras: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class HighlightWindow: + start: float + end: float + summary: str + title: Optional[str] = None + + +@dataclass +class RenderedClip: + path: Path + start: float + end: float + title: str + summary: str + index: int + + +@dataclass +class PipelineContext: + job: JobMessage + workspace: Optional[VideoWorkspace] = None + transcription: Optional[TranscriptionResult] = None + highlight_windows: List[HighlightWindow] = field(default_factory=list) + rendered_clips: List[RenderedClip] = field(default_factory=list) + + +class VideoPipeline: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self.media_preparer = MediaPreparer(settings) + self.transcriber = TranscriptionService(settings) + self.highlighter = GeminiHighlighter(settings) + self.copywriter = OpenRouterCopywriter(settings) + self.renderer = VideoRenderer(settings) + + def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]: + context = PipelineContext(job=self._parse_job(message)) + try: + self._prepare_workspace(context) + self._generate_transcription(context) + self._determine_highlights(context) + self._generate_titles(context) + self._render_clips(context) + return self._build_success_payload(context) + except Exception as exc: + logger.exception("Falha ao processar vídeo %s", context.job.filename) + return self._handle_failure(context, exc) + + def _parse_job(self, message: Dict[str, Any]) -> JobMessage: + filename = message.get("filename") + if not filename: + raise ValueError("Mensagem inválida: 'filename' é obrigatório") + + url = message.get("url") + video_id = message.get("videoId") or message.get("video_id") + extras = { + key: value + for key, value in message.items() + if key not in {"filename", "url", "videoId", "video_id"} + } + return JobMessage(filename=filename, url=url, video_id=video_id, extras=extras) + + def _prepare_workspace(self, context: PipelineContext) -> None: + context.workspace = self.media_preparer.prepare(context.job.filename) + + def _generate_transcription(self, context: PipelineContext) -> None: + if not context.workspace: + raise RuntimeError("Workspace não preparado") + transcription = self.transcriber.transcribe(context.workspace.audio_path) + TranscriptionService.persist(transcription, context.workspace.workspace_dir) + context.transcription = transcription + + def _determine_highlights(self, context: PipelineContext) -> None: + if not context.transcription: + raise RuntimeError("Transcricao nao disponivel") + + highlights_raw = self.highlighter.generate_highlights(context.transcription) + windows: List[HighlightWindow] = [] + + for item in highlights_raw: + try: + start = float(item.get("start", 0)) # type: ignore[arg-type] + end = float(item.get("end", start)) # type: ignore[arg-type] + except (TypeError, ValueError): + logger.warning("Highlight invalido ignorado: %s", item) + continue + + summary = str(item.get("summary", "")).strip() + if end <= start: + logger.debug("Highlight com intervalo invalido ignorado: %s", item) + continue + + windows.append(HighlightWindow(start=start, end=end, summary=summary)) + + if not windows: + last_end = ( + context.transcription.segments[-1].end + if context.transcription.segments + else 0 + ) + windows.append( + HighlightWindow( + start=0.0, + end=max(last_end, 10.0), + summary="Sem destaque identificado; fallback automatico.", + ) + ) + + context.highlight_windows = windows + + def _generate_titles(self, context: PipelineContext) -> None: + if not context.highlight_windows: + return + + highlight_dicts = [ + {"start": window.start, "end": window.end, "summary": window.summary} + for window in context.highlight_windows + ] + titles = self.copywriter.generate_titles(highlight_dicts) + + for window, title in zip(context.highlight_windows, titles): + window.title = title.strip() + + + def _render_clips(self, context: PipelineContext) -> None: + if not context.workspace or not context.highlight_windows or not context.transcription: + return + + titles = [ + window.title or window.summary for window in context.highlight_windows + ] + + render_results = self.renderer.render( + workspace_path=str(context.workspace.working_video_path), + highlight_windows=context.highlight_windows, + transcription=context.transcription, + titles=titles, + output_dir=context.workspace.output_dir, + ) + + context.rendered_clips = [ + RenderedClip( + path=Path(path), + start=start, + end=end, + title=title, + summary=summary, + index=index, + ) + for path, start, end, title, summary, index in render_results + ] + + def _build_success_payload(self, context: PipelineContext) -> Dict[str, Any]: + return { + "hasError": False, + "videosProcessedQuantity": len(context.rendered_clips), + "filename": context.job.filename, + "videoId": context.job.video_id, + "url": context.job.url, + "workspaceFolder": context.workspace.sanitized_name if context.workspace else None, + "outputDirectory": self._relative_path(context.workspace.output_dir) if context.workspace else None, + "processedFiles": [ + { + "path": self._relative_path(clip.path), + "start": clip.start, + "end": clip.end, + "title": clip.title, + "summary": clip.summary, + "clipIndex": clip.index, + } + for clip in context.rendered_clips + ], + } + + def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]: + logger.error("Erro no pipeline: %s", exc) + cleanup_targets: List[Path] = [] + + if context.workspace: + cleanup_targets.append(context.workspace.workspace_dir) + cleanup_targets.append(context.workspace.output_dir) + original_path = context.workspace.source_path + if original_path.exists(): + cleanup_targets.append(original_path) + else: + sanitized = sanitize_filename(Path(context.job.filename).stem) + job_output_dir = self.settings.outputs_dir / sanitized + if job_output_dir.exists(): + cleanup_targets.append(job_output_dir) + original_path = self.settings.videos_dir / context.job.filename + if original_path.exists(): + cleanup_targets.append(original_path) + + remove_paths(cleanup_targets) + + return { + "hasError": True, + "error": str(exc), + "filename": context.job.filename, + "videoId": context.job.video_id, + "url": context.job.url, + "processedFiles": [], + } + + def _relative_path(self, path: Path) -> str: + base = self.settings.videos_dir.parent + try: + return str(path.relative_to(base)) + except ValueError: + return str(path) diff --git a/video_render/rendering.py b/video_render/rendering.py new file mode 100644 index 0000000..b59abb7 --- /dev/null +++ b/video_render/rendering.py @@ -0,0 +1,406 @@ +from __future__ import annotations + +import logging +import math +import re +from dataclasses import dataclass +from typing import Iterable, List, Sequence, Tuple + +import numpy as np +from moviepy.editor import ( + ColorClip, + CompositeVideoClip, + ImageClip, + TextClip, + VideoFileClip, +) +from PIL import Image, ImageColor, ImageDraw, ImageFont + +from .config import Settings +from .transcription import TranscriptionResult, WordTiming + +logger = logging.getLogger(__name__) + + +def clamp_time(value: float, minimum: float = 0.0) -> float: + return max(minimum, float(value)) + + +@dataclass +class CaptionClipSet: + base: ImageClip + highlights: List[ImageClip] + + +class CaptionBuilder: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self.font_path = settings.rendering.font_path + if not self.font_path.exists(): + raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}") + + self.font = ImageFont.truetype( + str(self.font_path), settings.rendering.subtitle_font_size + ) + self.base_color = ImageColor.getrgb(settings.rendering.base_color) + self.highlight_color = ImageColor.getrgb(settings.rendering.highlight_color) + self.canvas_width = settings.rendering.frame_width - 160 + self.canvas_height = int(settings.rendering.subtitle_font_size * 2.2) + self.min_words = settings.rendering.caption_min_words + self.max_words = settings.rendering.caption_max_words + + bbox = self.font.getbbox("Ay") + self.text_height = bbox[3] - bbox[1] + self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1] + self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0] + + def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]: + grouped = self._group_words(words) + clip_sets: List[CaptionClipSet] = [] + + for group in grouped: + group_start = clamp_time(group[0].start, minimum=clip_start) + group_end = clamp_time(group[-1].end, minimum=group_start + 0.05) + duration = max(0.05, group_end - group_start) + start_offset = group_start - clip_start + + base_image, highlight_images = self._render_group(group) + + base_clip = ( + ImageClip(np.array(base_image)) + .with_start(start_offset) + .with_duration(duration) + ) + + highlight_clips: List[ImageClip] = [] + for word, image in zip(group, highlight_images): + h_start = clamp_time(word.start, minimum=clip_start) - clip_start + h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start + h_duration = max(0.05, h_end - h_start) + highlight_clip = ( + ImageClip(np.array(image)) + .with_start(h_start) + .with_duration(h_duration) + ) + highlight_clips.append(highlight_clip) + + clip_sets.append(CaptionClipSet(base=base_clip, highlights=highlight_clips)) + + return clip_sets + + def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]: + texts = [self._clean_word(word.word) for word in group] + + widths = [] + for text in texts: + bbox = self.font.getbbox(text) + widths.append(bbox[2] - bbox[0]) + + total_width = sum(widths) + if len(widths) > 1: + total_width += self.space_width * (len(widths) - 1) + + start_x = max(0, (self.canvas_width - total_width) // 2) + + base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0)) + base_draw = ImageDraw.Draw(base_image) + highlight_images: List[Image.Image] = [] + + x = start_x + for text, width in zip(texts, widths): + base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color) + + highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0)) + highlight_draw = ImageDraw.Draw(highlight_image) + highlight_draw.text( + (x, self.baseline), text, font=self.font, fill=self.highlight_color + ) + highlight_images.append(highlight_image) + + x += width + self.space_width + + return base_image, highlight_images + + def _group_words(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]: + if not words: + return [] + + grouped: List[List[WordTiming]] = [] + buffer: List[WordTiming] = [] + + for word in words: + buffer.append(word) + if len(buffer) == self.max_words: + grouped.append(buffer) + buffer = [] + + if buffer: + if len(buffer) == 1 and grouped: + grouped[-1].extend(buffer) + else: + grouped.append(buffer) + + # Rebalance groups to respect minimum size when possible + for idx, group in enumerate(grouped[:-1]): + if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words: + deficit = self.min_words - len(group) + transfer = grouped[idx + 1][:deficit] + grouped[idx] = group + transfer + grouped[idx + 1] = grouped[idx + 1][deficit:] + + grouped = [grp for grp in grouped if grp] + return grouped + + @staticmethod + def _clean_word(text: str) -> str: + text = text.strip() + text = re.sub(r"\s+", " ", text) + return text or "..." + + +class VideoRenderer: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self.captions = CaptionBuilder(settings) + + def render( + self, + workspace_path: str, + highlight_windows: Sequence, + transcription: TranscriptionResult, + titles: Sequence[str], + output_dir, + ) -> List[Tuple[str, float, float, str, str, int]]: + results: List[Tuple[str, float, float, str, str, int]] = [] + + with VideoFileClip(workspace_path) as base_clip: + video_duration = base_clip.duration or 0 + for index, window in enumerate(highlight_windows, start=1): + start = clamp_time(window.start) + end = clamp_time(window.end) + start = min(start, video_duration) + end = min(end, video_duration) + if end <= start: + logger.info("Janela ignorada por intervalo invalido: %s", window) + continue + + subclip = base_clip.subclipped(start, end) + try: + rendered_path = self._render_single_clip( + subclip=subclip, + start=start, + end=end, + title=titles[index - 1] if index - 1 < len(titles) else window.summary, + summary=window.summary, + index=index, + transcription=transcription, + output_dir=output_dir, + ) + finally: + subclip.close() + + results.append( + ( + rendered_path, + float(start), + float(end), + titles[index - 1] if index - 1 < len(titles) else window.summary, + window.summary, + index, + ) + ) + + return results + + def _render_single_clip( + self, + subclip: VideoFileClip, + start: float, + end: float, + title: str, + summary: str, + index: int, + transcription: TranscriptionResult, + output_dir, + ) -> str: + duration = end - start + frame_w = self.settings.rendering.frame_width + frame_h = self.settings.rendering.frame_height + top_h = int(frame_h * 0.18) + bottom_h = int(frame_h * 0.20) + video_area_h = frame_h - top_h - bottom_h + + scale_factor = min( + frame_w / subclip.w, + video_area_h / subclip.h, + ) + resized_clip = subclip.resized(scale_factor) + video_y = top_h + (video_area_h - resized_clip.h) // 2 + + video_clip = resized_clip.with_position( + ((frame_w - resized_clip.w) // 2, video_y) + ) + + background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration) + top_panel = ( + ColorClip(size=(frame_w, top_h), color=(12, 12, 12)) + .with_duration(duration) + .with_opacity(0.85) + ) + bottom_panel = ( + ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12)) + .with_position((0, frame_h - bottom_h)) + .with_duration(duration) + .with_opacity(0.85) + ) + + title_text = title or summary + wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160) + title_clip = ( + TextClip( + text=wrapped_title, + font=str(self.settings.rendering.font_path), + font_size=self.settings.rendering.title_font_size, + color=self.settings.rendering.base_color, + method="caption", + size=(frame_w - 160, top_h - 40), + ) + .with_duration(duration) + ) + title_clip = title_clip.with_position( + ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2) + ) + + words = self._collect_words(transcription, start, end) + caption_sets = self.captions.build(words, clip_start=start) + + caption_clips = [] + caption_resources: List[ImageClip] = [] + caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2 + for clip_set in caption_sets: + base_positioned = clip_set.base.with_position(("center", caption_y)) + caption_clips.append(base_positioned) + caption_resources.append(clip_set.base) + for highlight in clip_set.highlights: + positioned = highlight.with_position(("center", caption_y)) + caption_clips.append(positioned) + caption_resources.append(highlight) + + if not caption_clips: + fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160) + caption_clips.append( + TextClip( + text=fallback_text, + font=str(self.settings.rendering.font_path), + font_size=self.settings.rendering.subtitle_font_size, + color=self.settings.rendering.base_color, + method="caption", + size=(frame_w - 160, bottom_h - 40), + ) + .with_duration(duration) + .with_position(("center", caption_y)) + ) + + composite = CompositeVideoClip( + [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], + size=(frame_w, frame_h), + ) + + output_path = output_dir / f"clip_{index:02d}.mp4" + composite.write_videofile( + str(output_path), + codec=self.settings.rendering.video_codec, + audio_codec=self.settings.rendering.audio_codec, + fps=self.settings.rendering.fps, + bitrate=self.settings.rendering.bitrate, + ffmpeg_params=[ + "-preset", + self.settings.rendering.preset, + "-pix_fmt", + "yuv420p", + ], + temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"), + remove_temp=True, + threads=4, + ) + + composite.close() + resized_clip.close() + video_clip.close() + title_clip.close() + background.close() + top_panel.close() + bottom_panel.close() + for clip in caption_clips: + clip.close() + for clip in caption_resources: + clip.close() + + return str(output_path) + + def _collect_words( + self, transcription: TranscriptionResult, start: float, end: float + ) -> List[WordTiming]: + collected: List[WordTiming] = [] + for segment in transcription.segments: + if segment.end < start or segment.start > end: + continue + + if segment.words: + for word in segment.words: + if word.end < start or word.start > end: + continue + collected.append( + WordTiming( + start=max(start, word.start), + end=min(end, word.end), + word=word.word, + ) + ) + else: + collected.extend(self._fallback_words(segment.text, segment.start, segment.end, start, end)) + + collected.sort(key=lambda w: w.start) + return collected + + def _fallback_words( + self, + text: str, + segment_start: float, + segment_end: float, + window_start: float, + window_end: float, + ) -> Iterable[WordTiming]: + words = [w for w in re.split(r"\s+", text.strip()) if w] + if not words: + return [] + + seg_start = max(segment_start, window_start) + seg_end = min(segment_end, window_end) + duration = max(0.01, seg_end - seg_start) + step = duration / len(words) + + timings: List[WordTiming] = [] + for idx, word in enumerate(words): + w_start = seg_start + idx * step + w_end = min(seg_end, w_start + step) + timings.append(WordTiming(start=w_start, end=w_end, word=word)) + return timings + + @staticmethod + def _wrap_text(text: str, max_width: int) -> str: + text = text.strip() + if not text: + return "" + + words = text.split() + lines: List[str] = [] + current: List[str] = [] + for word in words: + current.append(word) + if len(" ".join(current)) > max_width // 18: + lines.append(" ".join(current[:-1])) + current = [current[-1]] + if current: + lines.append(" ".join(current)) + return "\n".join(lines) diff --git a/video_render/transcription.py b/video_render/transcription.py new file mode 100644 index 0000000..bf5d695 --- /dev/null +++ b/video_render/transcription.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional + +from faster_whisper import WhisperModel + +from .config import Settings + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class WordTiming: + start: float + end: float + word: str + + +@dataclass(frozen=True) +class TranscriptSegment: + id: int + start: float + end: float + text: str + words: List[WordTiming] + + +@dataclass(frozen=True) +class TranscriptionResult: + segments: List[TranscriptSegment] + full_text: str + + +class TranscriptionService: + def __init__(self, settings: Settings) -> None: + self.settings = settings + self._model: Optional[WhisperModel] = None + + def _load_model(self) -> WhisperModel: + if self._model is None: + logger.info( + "Carregando modelo Faster-Whisper '%s' (device=%s, compute_type=%s)", + self.settings.whisper.model_size, + self.settings.whisper.device or "auto", + self.settings.whisper.compute_type or "default", + ) + self._model = WhisperModel( + self.settings.whisper.model_size, + device=self.settings.whisper.device or "auto", + compute_type=self.settings.whisper.compute_type or "default", + download_root=str(self.settings.whisper.download_root), + ) + return self._model + + def transcribe(self, audio_path: Path) -> TranscriptionResult: + model = self._load_model() + segments, _ = model.transcribe( + str(audio_path), + beam_size=5, + word_timestamps=True, + ) + + parsed_segments: List[TranscriptSegment] = [] + full_text_parts: List[str] = [] + + for idx, segment in enumerate(segments): + words = [ + WordTiming(start=w.start, end=w.end, word=w.word.strip()) + for w in segment.words or [] + if w.word.strip() + ] + text = segment.text.strip() + full_text_parts.append(text) + parsed_segments.append( + TranscriptSegment( + id=idx, + start=segment.start, + end=segment.end, + text=text, + words=words, + ) + ) + + return TranscriptionResult( + segments=parsed_segments, + full_text=" ".join(full_text_parts).strip(), + ) + + @staticmethod + def persist(result: TranscriptionResult, destination: Path) -> None: + json_path = destination / "transcription.json" + text_path = destination / "transcription.txt" + + payload = { + "segments": [ + { + "id": segment.id, + "start": segment.start, + "end": segment.end, + "text": segment.text, + "words": [ + {"start": word.start, "end": word.end, "text": word.word} + for word in segment.words + ], + } + for segment in result.segments + ], + "full_text": result.full_text, + } + + with json_path.open("w", encoding="utf-8") as fp: + json.dump(payload, fp, ensure_ascii=False, indent=2) + + with text_path.open("w", encoding="utf-8") as fp: + fp.write(result.full_text) + + logger.info("Transcrição salva em %s", destination) + diff --git a/video_render/utils.py b/video_render/utils.py new file mode 100644 index 0000000..8d8a4fd --- /dev/null +++ b/video_render/utils.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import re +import unicodedata +from pathlib import Path +from typing import Iterable + + +def sanitize_filename(name: str) -> str: + normalized = unicodedata.normalize("NFKD", name) + ascii_text = normalized.encode("ASCII", "ignore").decode() + ascii_text = ascii_text.lower() + ascii_text = ascii_text.replace(" ", "_") + ascii_text = re.sub(r"[^a-z0-9_\-\.]", "", ascii_text) + ascii_text = re.sub(r"_+", "_", ascii_text) + return ascii_text.strip("_") or "video" + + +def ensure_workspace(root: Path, folder_name: str) -> Path: + workspace = root / folder_name + workspace.mkdir(parents=True, exist_ok=True) + return workspace + + +def remove_paths(paths: Iterable[Path]) -> None: + for path in paths: + if not path.exists(): + continue + if path.is_file() or path.is_symlink(): + path.unlink(missing_ok=True) + else: + for child in sorted(path.rglob("*"), reverse=True): + if child.is_file() or child.is_symlink(): + child.unlink(missing_ok=True) + elif child.is_dir(): + child.rmdir() + path.rmdir() + From c641fd6331e8df3bc5201016677e5eed1a8875f3 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 22 Oct 2025 12:02:38 -0300 Subject: [PATCH 05/15] Ajusta docker --- docker-compose.yml | 23 ++++++++++++-------- dockerfile | 40 +++++++++++++++++------------------ requirements.txt | 8 +++---- video_render/llm.py | 4 ++-- video_render/media.py | 6 +++--- video_render/messaging.py | 2 +- video_render/pipeline.py | 12 +++++------ video_render/rendering.py | 4 ++-- video_render/transcription.py | 2 +- 9 files changed, 52 insertions(+), 49 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b74bd0d..3f4fbc5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,27 +4,32 @@ # OPENROUTER_MODEL="openai/gpt-oss-20b:free" services: - video-render-new: + video-render: restart: unless-stopped build: . - container_name: video-render-new + container_name: video-render environment: # RabbitMQ credentials - - RABBITMQ_PASS=${RABBITMQ_PASS} - - GEMINI_API_KEY=${GEMINI_API_KEY} + # - RABBITMQ_PASS=${RABBITMQ_PASS} + - RABBITMQ_PASS="L@l321321321" + # - GEMINI_API_KEY=${GEMINI_API_KEY} + - GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw" - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - OPENROUTER_API_KEY="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" - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small} - ports: - - "5000:5000" + # ports: + # - "5000:5000" volumes: # Mount host directories into the container so that videos can be # provided and outputs collected. These paths can be customised when # deploying the stack. The defaults assume /root/videos and # /root/outputs on the host. - - "/root/videos:/app/videos" - - "/root/outputs:/app/outputs" + # - "/root/videos:/app/videos" + # - "/root/outputs:/app/outputs" + - "./videos:/app/videos" + - "./outputs:/app/outputs" command: "python -u main.py" # runtime: nvidia diff --git a/dockerfile b/dockerfile index 048fdd3..6ee410f 100644 --- a/dockerfile +++ b/dockerfile @@ -1,49 +1,47 @@ FROM python:3.11-slim -# Create and set the working directory WORKDIR /app -# Prevent some interactive prompts during package installation ENV DEBIAN_FRONTEND=noninteractive -# Install ffmpeg and other system dependencies. The list largely mirrors -# the original project but omits PostgreSQL development headers which are -# unused here. We include libgl1 and libglib2.0-0 so that MoviePy -# (through its dependencies) can find OpenGL and GLib when using the -# Pillow and numpy backends. RUN apt-get update && \ apt-get install -y --no-install-recommends \ ffmpeg \ + pkg-config \ + libavcodec-dev \ + libavdevice-dev \ + libavfilter-dev \ + libavformat-dev \ + libavutil-dev \ + libswresample-dev \ + libswscale-dev \ + gcc \ + g++ \ libgl1 \ libglib2.0-0 \ - build-essential \ - xvfb \ - xdg-utils \ - wget \ - unzip \ - ffmpeg \ libgomp1 \ libpq-dev \ - vim \ libmagick++-dev \ imagemagick \ fonts-liberation \ sox \ bc \ - gsfonts && \ + gsfonts \ + xvfb \ + xdg-utils \ + wget \ + unzip \ + vim && \ rm -rf /var/lib/apt/lists/* -# Copy dependency specification and install Python dependencies COPY requirements.txt ./ + +RUN pip install --no-cache-dir --upgrade pip setuptools wheel + RUN pip install --no-cache-dir -r requirements.txt -# Copy the rest of the application code COPY . . -# Declare volumes for videos and outputs. These paths correspond to the -# mount points defined in the docker-compose file. Using VOLUME here -# documents the intended persistent storage locations. VOLUME ["/app/videos", "/app/outputs"] -# The default command starts the consumer loop CMD ["python", "-u", "main.py"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f329669..1593182 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ moviepy==2.2.0 -pillow==10.3.0 +pillow==9.5.0 numpy>=1.26.0 -requests>=2.31.0 -pika>=1.3.2 -faster-whisper==1.0.0 +requests +pika +faster-whisper==1.2.0 \ No newline at end of file diff --git a/video_render/llm.py b/video_render/llm.py index 8a7b143..c0742bc 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -7,8 +7,8 @@ from typing import Dict, List import requests -from .config import BASE_DIR, Settings -from .transcription import TranscriptionResult +from video_render.config import BASE_DIR, Settings +from video_render.transcription import TranscriptionResult logger = logging.getLogger(__name__) diff --git a/video_render/media.py b/video_render/media.py index 360b231..7fb878e 100644 --- a/video_render/media.py +++ b/video_render/media.py @@ -5,9 +5,9 @@ import shutil from dataclasses import dataclass from pathlib import Path -from .config import Settings -from .ffmpeg import extract_audio_to_wav -from .utils import ensure_workspace, remove_paths, sanitize_filename +from video_render.config import Settings +from video_render.ffmpeg import extract_audio_to_wav +from video_render.utils import ensure_workspace, remove_paths, sanitize_filename logger = logging.getLogger(__name__) diff --git a/video_render/messaging.py b/video_render/messaging.py index 28470f5..08ead1d 100644 --- a/video_render/messaging.py +++ b/video_render/messaging.py @@ -6,7 +6,7 @@ from typing import Any, Callable, Dict import pika -from .config import Settings +from video_render.config import Settings logger = logging.getLogger(__name__) diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 0d4b7cd..0b33843 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -5,12 +5,12 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional -from .config import Settings -from .llm import GeminiHighlighter, OpenRouterCopywriter -from .media import MediaPreparer, VideoWorkspace -from .transcription import TranscriptionResult, TranscriptionService -from .utils import remove_paths, sanitize_filename -from .rendering import VideoRenderer +from video_render.config import Settings +from video_render.llm import GeminiHighlighter, OpenRouterCopywriter +from video_render.media import MediaPreparer, VideoWorkspace +from video_render.transcription import TranscriptionResult, TranscriptionService +from video_render.utils import remove_paths, sanitize_filename +from video_render.rendering import VideoRenderer logger = logging.getLogger(__name__) diff --git a/video_render/rendering.py b/video_render/rendering.py index b59abb7..efd45e0 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -16,8 +16,8 @@ from moviepy.editor import ( ) from PIL import Image, ImageColor, ImageDraw, ImageFont -from .config import Settings -from .transcription import TranscriptionResult, WordTiming +from video_render.config import Settings +from video_render.transcription import TranscriptionResult, WordTiming logger = logging.getLogger(__name__) diff --git a/video_render/transcription.py b/video_render/transcription.py index bf5d695..b5d86db 100644 --- a/video_render/transcription.py +++ b/video_render/transcription.py @@ -8,7 +8,7 @@ from typing import List, Optional from faster_whisper import WhisperModel -from .config import Settings +from video_render.config import Settings logger = logging.getLogger(__name__) From b9e1dcd1e2feb87ae900a6e2ea483ee3486130c6 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 22 Oct 2025 13:14:56 -0300 Subject: [PATCH 06/15] Ajustes no dockerfile --- .gitignore | 31 +++++++++++++++++++++++++++++++ dockerfile | 35 ++++++++++++++++++----------------- 2 files changed, 49 insertions(+), 17 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b53c1fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Ignore Python files +*.pyc +*.pyo +*.pyd +__pycache__/ +*.egg-info/ +.eggs/ +dist/ +build/ +doc/ + +# Ignore virtual envs +venv/ +env/ + +# Ignore editor files +.idea/ +*.swp +*.swo + +# Ignore project files +*.tmproj +*.sublime-project +*.sublime-workspace + +# Ignore git itself +.git + +# Ignore mypy and pylint cache +.mypy_cache/ +.pylint.d/ diff --git a/dockerfile b/dockerfile index 6ee410f..ec261de 100644 --- a/dockerfile +++ b/dockerfile @@ -2,12 +2,15 @@ FROM python:3.11-slim WORKDIR /app -ENV DEBIAN_FRONTEND=noninteractive +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 +# Install system dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ ffmpeg \ - pkg-config \ libavcodec-dev \ libavdevice-dev \ libavfilter-dev \ @@ -15,33 +18,31 @@ RUN apt-get update && \ libavutil-dev \ libswresample-dev \ libswscale-dev \ - gcc \ - g++ \ libgl1 \ libglib2.0-0 \ libgomp1 \ - libpq-dev \ libmagick++-dev \ imagemagick \ fonts-liberation \ - sox \ - bc \ - gsfonts \ - xvfb \ - xdg-utils \ wget \ - unzip \ - vim && \ - rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* -COPY requirements.txt ./ +# Copy requirements first to leverage Docker cache +COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip setuptools wheel - -RUN pip install --no-cache-dir -r requirements.txt +# Install Python dependencies +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir setuptools wheel && \ + pip install --no-cache-dir -r requirements.txt +# Copy the rest of the application COPY . . +# Create necessary directories +RUN mkdir -p /app/videos /app/outputs + +# Set volumes VOLUME ["/app/videos", "/app/outputs"] +# Set the command to run your application CMD ["python", "-u", "main.py"] \ No newline at end of file From ba768cf093a73230893a60547a212f841bedbf9a Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Sat, 25 Oct 2025 00:54:30 -0300 Subject: [PATCH 07/15] Ajusta demais partes do projeto --- Montserrat.ttf | Bin 0 -> 29016 bytes docker-compose.yml | 20 +++++--------------- dockerfile | 8 -------- video_render/config.py | 4 ++-- video_render/llm.py | 7 ++++--- video_render/messaging.py | 2 ++ video_render/pipeline.py | 1 + video_render/rendering.py | 28 ++++++++++++++++------------ 8 files changed, 30 insertions(+), 40 deletions(-) create mode 100644 Montserrat.ttf diff --git a/Montserrat.ttf b/Montserrat.ttf new file mode 100644 index 0000000000000000000000000000000000000000..5b4b5afe6ee4b560b65b2f2040ad38f6c094b347 GIT binary patch literal 29016 zcmdUYcU+Xm_V>&z?4lsjK|okox=34klcETM3Wz935d;NAKtL=prWs?4n&?gSUQKeZ z>4`DT#Pn!lye6g?W6Cuq8Z~Ov*c;Ej-!soHQq1pu-rwi_=LH{jpWT@=r_Y%==NTDi zj5*?u6>}IlA#XJ9a5JY6uQoY3C1aoWjOA=M~@tp!(*8i_qn*&k1iZPA#`l} zSll--=KRU%2@|uL9_Uzu{!#S(_z96Q|9ZDF10Wy4{nTl*Yg#}3;=L1$Xw>m+mca&H7b*^#UF^D^ zpyNlli0ydMGr^tc9v?$`@)h^=84qS-cqbbxerChOEEcG7V&UQq7R`>cvAh6d7cf7* zg{6r^7R86KEdCSA|gjmFq0Mp0= z(Ibos`uS=Yn=Ia88Y>TGC!Sz~MXTwGc#1{xY505~^Tul^ zU%}kO*DP0)$x}f`sMy4cLaalQ-E|Lr+|49bsizk}? zBpKlKDqb}jH|A;KKgpoy1-hc-J!C*~uv}6GF9GIC@QwI=i%YW&pJ%H4CmC2S@cuYt z06CZr;kqJ9pkEgLlMH%Z&{26$GU$1c3`oZGx)icVG+h9!Qv_E`VcDc#xR)}Pa$bke zvzQOhU~WV!jb|0c;&4R>(luO=d8qj{Oe_R%A%D^{lC^jew(%s}ECN`a@L_d4kG1jz ztdM;NozSsrw2c)fn2+#e#o}95%nmRE`(C|x8{{}gy@V|b5Y{Y+zrZGnw^^1Ibdp!H zL|j?oHj^T*g8#=^ie?3Lq7l4zV?G)`Rw3Sp9TG1-XC9h5=)@8>NW8|%#BD4?%mBSJ zSbx1-B`egk`{R@8nenI}iU+X{E-^t(8-^YKLf3E*j z|N6k)*M-Rhm;_bNo?tKWU_dqU1fIc*c`2XDZwJ&*!~sBc)_dr^^?C_4sSi}g9#HE5 zl>@5AblLQq>0{GJrpHa|P3z#7SDF@^Za2*{RhaS=H$~eOaOmqpdk=ka=<`Fn4}Ey( ztwUQ5<^8W(ZVa?2h1iJ z^!(xQ_UeyS&yR2Mo+QN}A1!7V8#aIqWVXx>Jhf+oG21#aC*};|@%mp#b$YL>f#j^y=u1PGJrLa_% z#?sjc=tU;WV%cmY8^v^i%~jxZCO%@$$oXV}~9E>_1{SQq;d?7YgF+3oBszPX#-%N}N5vai^0>?k|I zPO@W|8{gtq>_dEefn8+h*m-V^8U8d|z|(TgC2S_p=pj4O`0|V5`~R*h6T& zo^4=%XOFX;>?igso5K#WpV=X{kR4{fu>UZ&jhpV>Vs$?n_WJRL9HxRBhj<8&vHBV~Z(CoGnR^zN1tTtKgusULO z!`jI@-g=yMh4p;v_16Eie$RTp^(C#FHdouE{fBmoc8~U;_J+<=7o{7ctI*BVEz+&l zJ*Im`w_kV7#=|Dnrp{)G%^NoRZB7qx8W1)he!%nrO9%XOz^(x&23ieF9#}qb;lNb` zpBebkz+VSmvkkJ1wVi5vx9vY{zp%Yv7h+dvS7*1??#n?ogJK7b8&oxD!=UE|?HY8% z-o`%MKH0w9exdy;`{(T6wm)EhZm{>@;=yYN?;3o{!QLU>p}}E;!ybooj^U04j!llM z9p7>M((#Dn4JW-*j#G!z2B#fPXPtwbE1Wx>-*EoHS#b$+DR-Ig@|eqh*MY9-uAQ#i zTyMB#xvg^h!0oKNk9(o}YWFAI-*ErJ{hUXbN0rACk7qo-93qCq4=Eq=(2)0r9PzaG z9O>EQxxsUv=M66(uMDqoUUz#v=e5V{hPR7%xOa|smG>g=C%tzMwHq2gbn4JMhrTrQ zYoCEW1wO4l5BhxMv(M+K&kbLD-*Dd?-&)^J-%Y+R_-^z4!uN>p6?m@B}Am~U8bc+#-Xa07l|jOhyd zS?qz04?^T^bk!I%PC>EpF>Wr}ajSd^eL}*+)562Wp1YKXd1Kd$bdwg2eq;Gg9*P)H zLvg2boYB>YYwWm;aeU_`{6NnHY@LbM0ox1SfN_J)s59sc3GzxXCKzxTb@IaJMzOJJ z=xuKEyj#52dAEAcaH|{I=GHQ_W$3!0t-h^i*ZHjDuWfGLgrDZk&ClVdc{9e)ut3u` zk%pMg8@>iJj&)p6aDtnATm+AePXI=4?z*60SECPi(dt~&c`R;()&&i?*YeqUC1rCO z7t~}l*Jfso)wDM)njV!Ho;uqYl9-!kUp*<${v)r+FE(4p7;UmEB5mLCs(88CI(J({ zPN>ss{P*G6!A>udIa#xCL=VS6rw-oH8Ppo^sf*U1*l5iS+$uK6A7jQ?KGT}-dHJNf zN+;dLKT_CTf+=tMN9+Bgg{}NhtKPIMsj6I3<=8b!Y=0>=G&D6-xg*_=dh>wd6;zMg!{4>sx&+;#4}sM=Hbx^k-94X88i4x8Ex$u$;OzJ5p(9G zC&w58BZ@sFY(z8Rm=I@(BaJq?8eGr3|9<)V??3cj)jRK0y+^oZnI1;OwFUv;vcgk`d!;4^hGy!A4NDz1t-+bv=CVtCJ+G!YI3do@HJ2A$YHXA+t^mgUWIge! zx%p=eOAilE4@e(|S7;Dt-lmlzS~HCLB6czG&+C-&b(&!}roy&TpmTGj&JC6vjRBYZ zg+sjF%|WYwal)3OqAfdDbD!0#m0$UHC)TbxwMHrBV|MOT-qtVgSZ+Cr{z$s`KFh;}1JE64n46moSz; z`s~!E2ae2+DXmHyKY3hmz>}i5vEp4#d*Q5wdDW>&vodVU-vLhoftM>_*#m#Gd<=1Z zaonikMXe42X0*bT;q-8;S4~+M@ZJ` zf*tu$<@@l&+STiu?k^c(<5f~uJn7!l81c@m7d$$5-&<8`;})3OKEf{^D=ooN0WV4H z0!`%G{I$AtZUG6qPl%6k*F|u0H3Xg4R!-(as*M5Bf!0jF&78DcYul@mzRk;J4=2#R%*glb(Zuix)pnqR;6#M=7&HybJ@ zw@$8{o}WKGUwk(6iS&$)@`Ve_J2KLr=rW}=r=~V@kye(PT1MjtX2dvA7TN=2;uGw- zfxMkD#vM%6k!XFm6DcI<=L7$W_PGV4YWtDSt%&p{Y(fZ@i@E7PSTpNDQMZ){v$f<( zYav^)*bMixm=5~p!A5^Z-+_oxds?Z3xqDuC+HkKlSgQn(1(QvVnAYIRt-E1x#Hn+I zmDsG#aDlGY_#Mu_Q#N7wFmCY=Z5wT$YLom(WtT*(aDW78E`)W^Zz{i{*w7^$s;Wpp z9Fgl>VNl_5SL?Vy{0f5-%BKS4p2gp{J!QKQ00j#$UFV;Q-RevdXn-#t!6i3!2Gs_2 z_8OhmRhZ{#9x=yRpXVM`V6<8=aNgEdKW7&|cb!vd#PAUj!@o}p3D)Ng8512DTvlH* zDK*5y#n;`&EkwCEEG-=WRC(U5DZ`$|Fht`tgOC@2+CZkk^oInC8Pn+K0(-@@q1QUV zaueL`xsy7v#q;-`IeeNI9V;Fgoh%Y>E4^pY^oU}osR_Qu50$<9cmHjEhMS>dOu@o2 zd5iNFAL4~)&b-icZ+7}!EB398^2=79%9%1|)4~19{!dc_eFFKOlG58Nr+^Q{+@sKa zo%FOi13AG+Zp(M%EatKFFDL4bJ2q}4ueEVD*DHr0fg1E#gFfbYIFvj30ePAV?YVS3 zIxP$pR1D*elAK|juT#F@4x?ubiHdc$cgh?alcK!-(4Fa-55JW;>Tl19?fygKM;3?J z>ul`Hvhor$`J?&M!{G4_Mm$|146S;Dr^h*df_x{^xtBrzsIjOv`W z?Qu{TQ8yxVLc+|ot3J)Udw;JZ{R{* zg1+LBU8A^<@~ed=Mm)j%M{`t7XD5kTrPtSVjqkyC;1=c-N7a`@E#u=H=kt<9H496# zM`lK3ilUi{apA(!%FOh1PV+A5$15rhfXR&mxIv?H1#F)2kaCO)iIuH*a=c$T|Oh{|INaW!{`GUP;-tRMlNP|-l=qScJhaaSgt4nuWCFUYr3j&vCR2Q#+31~ zLMoh_gRuK@&6+RQOi_02+s704iK*+qS-<`pW%ynmw|B4dZ;OuIEyqKY#NeujaaRmU zo?2?#*$4mj*rLv}b4Hg>oxfacU#N6`4y13-e;`%CcC~=6w`+1`On~UYTG#C;bi{Y4 zJ`}^rj1_-%@CJ#;;zs}2pgia7Nv#>3nG!=Smnc0p_w|tBA^OUST=6f7D)CXZgO?>A zN!wg>?uK9kkc1IyC7P~AvF717AA0)nPHwen#fQ0Lsvla%54<>BIeAiSZ@QYk+;~mO zBLsFi3b^{MpqHq!Nl}G$@YUgI<#^%n3)~Wm+W&XoYJ@qhOrrbUi{h;&W zE%ehe<_E_Ied3#zD94t3_uUfG2kh-|(lskPw86?P(3zS*-{{xKQ#Q=pu;q>mbJy9d zn|tAo9sIH~fM13-{jB(*J>d>~#Di*IKkDSHF}h4XN5^j@C9XuJaKk=0m6c1JX8#-A$chh3RD)!yTySimVk$NsMD} zf3olvztiX35S$>qr!VXjlcz?5RE&K^FyW238Jg#3h*Kxf6fDRngr=F)RTpRQccs8k zu<2PX6J~czd!TbnVNA_!Rr6XGFG!tLIC^~5-^aPkjL#WcJ^g{<1;L@I#CO4p0DBJh z*lCq1?bkn;*y9*4WR&Tz<=;Q@@Mndstyv>0mefvNS|PmGUwUkEp*X61Rj)izGxgE> zdZH@=aE1a$+fl|&CbmSJWgwW{+PHxoN=aAo3uwGErTU-KdxHUnw?$0CMCizH$-HF zmn%8^?efr)X>sEX!-=<1rZCtt$&FS>fCinK@1*2In5SUIFJ zdFHZZ-()FY6)m4wx~xdJC{g#{pFc6BxO`+};`*P~Pa3a$R$Z~Yx)vw`$07`rB7UQY z)R}y}qg*q%;w_Xz`(bOVM@?dROY6eqQVM3oZ^{?ZiOmhl+dN?1Si&(IP`(nEp?6So znq`RnaO3{yx;QL3EG${L+(9p4G@iiDbU53K_EKNmrR!0tvc%9ut>8$k|EK^5*?Wi> z-nADjDe(&z-*m%K@lKR_Cr5R}wvDqPScg#$tlj|^WZToL(-2887vv3O_+J0qgO~?<;CLR<2#8TNfSkDe22;umzwCrh6XcN z@^X{|JfL7SxO0=g-rf9_@ZEaw*Q+|A2Y(6FP4nwDKe$QHobe=CW+Yfzu2a7TI{d9X`5tU>98*| z9M!(ag*j?1=Jm^{R~J8vcCKlq14rR!jzxLg?dXx`KGhoA#~($Y-gm^SIdHpkVAMNh^Ym zy1KagXx!cSp61fz+oR$uCk2Jiw6WUUzO6VcvmhkK(u~y@n5O z98(rQVc5{2o^}Hq99*0NFc#)!*eb>7&;vL^7(SM$PA9zlfAI_1W32tN7c> zxuD^O@p02ixNZ5KwzS9N~nWA5D6v1AGT z+jb`!PzSXWZz9;BNqC_jEY#VZ=2I97CfChvo%g=|(TY13-Sh6N!sX?cl^e?0S6<FolZ*^npKiR}C~SRt^d0vpz4yXvmNJZvMuN^A*LtbYI*Qzj(d9a8>5>WnIf+ zQ_(*h1D_R6GM1+)R?e^)e3SAmPf=2_Ww3Q>z4BVkY;KO7Lrl%w8}p;wxfqHd%ME|r z;57ZyutPcw`zqx4VpF(l5O+z>${HH#zs$qa1OJx!Ys8~T-{;ub=A86&sa$fv&#Thg z|H~znoM@k9n!%n!t{rwy-e%H-M>S91C?orzby71A!I*k{XNZ&V6N1#?bQq5iz<5wG zyest&9h#Np=b&-2jQe1vi|5H4Tf3a^eZ55Gk}v(eE4};c!cOCYx9zZjf>%JnL~0$hkD{~ZkQ)*HxCibLF}8zJ#b*> zY;?A)tq-j9DI7XY?t$AZ=8f0{f6u%Hj(*Xmr9v;(V$SfwOxkG+ zis>11^0LxWvQn}pG|kH!owTsScHN-LTF~x2w`??y!F_!V|O4dnnF0d35>6mqwS5E;Ghg zP|lYTlHb%CNA?oJm#dD=XInXZR2CJZu{{ zCm7>`VbdBT_=&%+2Aw3r5NDYFp4Hn+KfCK)-HtoHyyq>=JMZ59-oAM4WIjQ8pJysB zPS(axUPQ?=;625JoS2OBXm15trNK1?J1YDoK3aK8Nv6MOXGE-)Am^x9w1YMiDZ!XZ zu-ok;u$!UoI?|3GzKg>m0A3AzpxFlzg5iO=Q({L&1mw14G{+@>K0RY(M8w?MHO(2d zRkN}`o}HCaGl(~M!m>l6$K}NoIXLD7j;g6GC>p5`ODY-{ zRcNc+2Bw&l)4+*A(RbT;+ZSMc9bz;F;kG zy2IC-(JOb2^er<^8=2r2Dw_ezs2%DMF&b-{3y>Uf=i8>w>=YdjK*u;MMU0by`3`fej7kBU zCT~NWJ;#cslmcLy!W%+968mY=9%G)w9sdnd(cfTND^lST9DySYm{fy)W!MsA=$y}o zRkoIUxFr<31dH&A9XB4=Xm4EOKWl<~l4bwV$gcmymqR|x$s9qCmfOHwZ{GdoEDW*p zNwlUp*|HcwWU52#>vl4|I>6I87+Ea4fYB~FbAyuHk_Ni#hggpcqOBa%NcppO!9(m7 zskxQV^_k$QGJ{tDB5Z;0;#rWDSwA2yT78-0%*S$u56>CCyse<1t$<&N$cu=`tIBWA z&!??8^v!}DowV3dV6f3$hb>Y1%WoqyYMLPbp0yYe`e?wfE1E>1^t{?ncY6}Yzw4*Sg2kaa?Ew6*%<3XPPWB|TItfN-DA!iQs~A&%5hcFVQB+`Iy*2v3I!4QDBBta}{!AbaZ~pxAe(ibZC7Cwz6$V7+a10UVp4sFPMkqK#%R3n>kkti0XtDtfs zY*j84pq3Nf;VtdS>}X@iFKrH!x&>R}JB0IS}#nDK*!iJzgTwpdwE z#DH{_n!E}`?n=h{nB6_cpM8fiSlomJ*0pO$Vez0w9!^;-9+Y_Y{l+&JSY&=!GY{)n zK&5$ucGZns)7LrNOpz?e?H?`yu-g| zGo%iz)VQFYhW6%zpkXp6guQ=RE`@yq1VTPMkaoG;+?|8mV}mucjMTX7+YlF(RF^lg zcHf4y)cCs51(TJv4Qo=2`vShK_{!U}VGW=5_+Gn(8zP>b@z`GHwnw8j{(b$-%vJ;c z#7`zp55~hM$y|`sLGtO)K$M>J&Io-E4a~G;g~$!u__c3ps2k3Gs^<00Dv^J< znOXW8I@dS5Bz&aKS@5|{)wq6eRE@g{>TXrn`ayfDTh~C4%qD zO^FQ*u^SrVmzpyp)23l$RcmeZyd+fSYs|T&-=+Tg|JUr2h7{~CGE5H8v;J8ozLSE- z|NC*^>i%M!Xb9m}n%|DJyyC} zY%j$k3^e1R@c0ad#pnLGMiGlEod+PV%2$kOn`F(sk7kTR{&4El>J4K& zD9bu#%suT>_1^Zb$h02M9GODT&qX3;_5n_M?Ckbh|5M>=&x9#rY5r)BIJthgIel9H zP*zrPgJX~FfpoidC%TiV#e@6?fOa(x`a9ma$fyTK%Ws_x19-TyM%3LEZ|as6GjUlsR*5;nH^{vwNO4n+S7{7ltu z`;5S6OQW?i^XD6L{DN8AvSVX02q%P08KpS{to{q0E$U$t%3Xb#C$RI(*e;pp z?-8T#f)oZJqX~w8i%b#C>qVFp62%e7P_{!jg4p%Fs+rBlsNJgQjcPm7A+$>s2hpx~ zj9%2*I=~#rxpbWp2hIM9<~fc`tfUX}+O+;`GZx30Un`b-b4+un=0J@rqq>cwS^d2jnp zi#}WG-Yt5q!zRbqJ*s*?r)}=n8rrJ>mgJ_q^)pRXQ78!}5SdW7;>`RV@jc zJF0yn|DfjS7#?q>{Ih05B+{s=UaNU$(ld6|&*m~P~D@D+BKi(jbLFGd?^Y|-zn2d6* zPg1{^+rH6{C@34{_+{-7?Tp}=lwOrXwE9pJMZEeS-8Dq5fq|{!Z#QAZ8^?hRa+{W)6kA z%dT`PpdNKs8XpOT^fz3nHCNDIqT5n+<=iV8MK%gVo6E4iycvd4Ug~C5mU5wcpH`-* z%xl<0wN8bgyY{c?a{0Bpq>Jncvi`fQ>9SSvv3LUvuO`nhT(afi2~IU$!XN8w)F5Fu zGQm|xo27Ah(r0$(k3aGeO8)rq$XUERvT@`Ctv`7*Vhf$CURAEm&3&NN=+V%cpWj+s zjOOpSHM%M1&bT*XuMs%WTK+Ij%ordmo#2Yi`-K$Dc2`k91UEs7+r2MXEBz(t5h?ID z()ZpXMj?;WGc0)lVEV-(e4vCChe_!I@TOg_o_qCFAZ^5sR|@d**BrNO3wQnN;m<#R z_{hq-va)*f#`P{fY}e&I5AZ)gmz?vhsD_ksPS|7U8CQ2dkm0U+04wv%tQJgKv73=@ zW?Nv<{qr|%n!lrG2DeMEYYs<$ULkB`l zT3PF+p=>bi;5qlIcnd|xpSz3Ru6_Km_*c)mHw&GMC7syg{|h>iXHB?8;kmzRzkM*{ zPnvBw4Dgo14-6t7+q)7k*dRgnD9Ece!|G9&_o5jN)w1Vm7OT-t|7vz~4NC-1Fas4? z*#~B?n6vqg{F&`ZBQp-nS>2d@FACacY0ZwSHf&-@Da%5!?&BOGhEz*^%L~N%IwJ#p!>!I zJdGtQZw7Nb+KdMfr#_Yv6Eu35b@V8^QLeV_>tmC>b0e+Y*4V8vHcyW$35fOQRh1iL z(Q!oJ#!BU3n&&vq^O!6Ia6~<#d40h3W`fLXLC)&L75)MEL2bn^+N$&KB@8#~!Cd;l4>L*b@u@fs;_0=E1UJrh zj~Nu@pA-u4Dn70D5Z0OFn!8OgjGlD zqgJ7wEncC*A!xWwi7TI*Q&y)G@-5Sp%Oi_8uL_BEGQc(fvzYygYK}mREuZf#=AFN0 zXUoqIi+g%}E}{=-W#MxJzsg<|ACiom)tUkDWT|&Re4tZGnwaI3X7Eq&oCwge?FF=b zN48b>((#~io|n1p7>5x(+u}Ke7jgas;8Il^ijI1;wRTKbn+h7A=g9G~%DssJJ-CS- zp&q!j^~I^Q2W|xC0v{!DBRJh7_8Gs=&})HXp{3M9 z6UXzC>HN5O7rp}ZMBNorj$b_Y+{`UoJ`!W5@7+7S>kXBsL1=pjZ84gxsWgYqmR>x5 z)~uP$&6_vRe2yX% zG8(X~ar?fi&w6i`7N`DIpZgA%5tRBD2 z`@Dgw|90L0zuxokfhKv>fIVXV?XReZ4_xhW_`v-=-h($(-{&RklFZe8mbyM!(5LdA zYL^1lHVLwF>GkenCDM130;h8a;waeC08}lDJLJI#a@Db&_#exx#2(a51w-TFjPl_D zJi&z>U=+B)c>P=rl&@1CBcaMuRJq1B_&qagW98`Z@g0@!rtqEQ&7Ac!~`G^tt`>0~#9F zRXW47`hT-->ZB-qovvHzDYCJ zub(;TKDXwU862#jXIHL>;m{FJs)6LF*s~XMdzVUEQNRz+HkhNfS3_<)2G0@)@%^C@ z-lRy##+O7@Yt>(9=n7uMEM)(P1kFJKS#1<6_`VJ zYFxlWW=#)$;u1R-DQgyq#(kQ3Heo?$c*zm2SJM;v0lq`|lS|wq6nCiqX%-=RrZy+>0 z^Xc*C%C0%ex62;k3DcCZ8v7e9{2Q|0(X3cAl)r_wh_9?Qqv}oCItZrc+TrGir|I09 zScxk0AZv=hF;lsT&VngbW78w`Mx(cJW!02|l(ML(GUZkOuwenI`IC5Yz_2iX=4j$3|Hnt9for3BEJw(?)J8v(~pe4%o^<}|D zj+J(v-ov)?)V1Y7V=*lYoR{>Tm?Em6)mUZ7Y`CnLR_m~+UNr%wkI*D4qwevcm{ODx zl$7ch?rA->q}gME|fuz3!x3aKaPTxbKiJcE))yaefHj zsZ1O;LFdF|yccnb`{dWO-=L%_-C8;;8_cLJ|EiR9H%~^ zPj#}{>Qnl7w4MrOEzjzYrsvlR&?mss$MO`u27N?;`AK})$MQIS*D~|7TT^ffTO{8{ z`jr6nox=C|@mH0xpab=ZVCDquQ#fP&hrIW#o)-2HG~Qs@v*`O&TGwNi zT?>*Gjgc*%Z^Ih%uSVzx`4%vbl8!Ov!GmBm>Q4(hWPZ=tKtA34UN9ZM%lux${P|w< zdn=qZ=V5-2Cnm&b^LtcU;T$*>rVi)Toi@L>VU8M#eCTHY-p@C`AH>#LyU+*yaN36YUNCz+s-(Wxu#u?WRo`2&0DjK=-kNy|IxmIbXmQ5fMDu$c ztHQa1>gP7hU*ltbKLGDb%onA#x3E^6z17AV*$kZS)xq?T!!%fn z9w(PY;ry~_oDreEi@^?^9_N$I#o1tutR8J^a6b`uH8}CB1*e6zvsr*e5N7sj6(R3> zx5~hHv(d8={Ub193+_VY2=nk+2XN6N@~j8UdSKUyzjgQ|2kn|AmgCTOw)|d?*>bG; zbG3CFzU!blG`j^Sd^XN#=+K8u3)RO&MMn?EpO|p{n7K`j^&K_(iA^=lEuHPN7U)A} zsvkwnwS1H@V|GnrQ^d5E*`eY3d5s+n`r`Wb`nJycI(<${bBBH$j!13S2am;jeB4&k zfdOxY2fNvVNs?xm+0lmE`WYAxZ+f$}KU0%I{H=h~=c;&+RyI@BO;ob}=FrJRKHA(0 z$nd^s-Rw5J8^cm5h;|5vL_niPKtV`FSZRG*dt*zpJ|-e63b+63A#T-Cj{(~;9O+sI zG@%B1P%ky74cev$GSl(5H~Vg-mB|0!q)Ok|uCLK|wAIwr&#q~krEi(uUn9`rzeBaK z{H7xPZ+n$M<5fy;>i7gnT?f_;z*$%axfYSsE z@H!qAPHlTyPeCsTWY>Mg8x8T&rDr!W>piXon;KZBYR7aMVUryDrVW$aa&XRJ5njnE zNny#)bO5Vl?14mrhH1dN5%(QrF zYMfTz+zykR+gw-QrtfH|*H6eR(2sAeZ&n)@s13sP7A1|2h>p-pVCKG389HI=HMLFk zs-f5Db25wcH66+NhK`QbE|9>fWlPvca~^*9mpE%pKX&}!Vof&!6U zY}HU()27YqXl$yh*EY%*(Kyp~%(TX~X>(^!Z>pbfJM-pyn_1K9>l&MyYV2msXsfSp z#&lQLIL)f5uBAgeu4dZYj(Tmg`cf=k?20i1&aSDM21DzxYwK~NEuL1>*3ztPlP^}I z=C-wve$olDw426Bh4;*UMQ-)%J`?2$!FEK(PwHq>ZSIvVchAoY*~hLO%HNAUdtPB+UF*I0?$G}BwiCA zVU2VSYdS>}rODDv)hyL)uo`GJ*=i|H`MhQwWnG1nJx^;(v~O##=(M_2U8jwMO}WjH z0p$ZW;FQkRfnV6#+0L}xV!PWe+;)rIYP(15cH12vG-l9_LA&j}?Q89q*?%yLWd6=JsqPQ(;W*PKXE+dc+~NVlaEt{Q?t_+r;nU| z0zG!7!>Ci!;*7HsIQ#Ado@YJ7(oM%;1B2Lcd}qsSO@A-fn~FYL(SH}(ab}M@JB+#yqn?tZPC?9Dg%!#& z(-*Kx8`yvzr_2sUpEJ;I7xeQ-KM(XnCJJM?V_Xkh-nfD=ek5p1#6D`0=~K{s6mPJgCBV$H8rDv^a(qXVKykS{%ivr;%B( z1J4FSZ=67rE530_pd3Ajq|o_dty+JZIlR(smhmTr*>SU2=f<>j>;G1N)O0=Q6Org5Fne(VKWlbQ5>3$+4=@maKLw z@Noh!e*=ep0WU8A!exn_I_`JqMZV_*^yw%tJq#?5LcW(kyFYx2Ga&p7x*g!*&V$o- zre~p1&j8~4(54g6q~m~m3Ov|m)}$Sf_b%|^J;?eUjC39{Azu6N zV0r^1T!$t-3r%_!n)Dn-u?J*lXp0MUJ_vdm2@WLT{-!n^!wA2_S6wjM<}uK9oe@vR z;j-|Rcv^`z)!cXtYZhc`!yfjZr?tC|_cfA27-LuWR zO-hHfg0$`&KBGN(^5bcMdmXwEiN8tsJO#9-qfMsdH0gmw!heDX*T4gk$tl426T0OB zoJfOG&?+6@=Hq^Xd9*S>AZe<;?vm8aDvVH#&w?;7or4TM1^%A^%4hIBhcGvNhMmse z0P!>6a~^Ub{NDuBlYKa|7c_kUnzjL_OBiJjMtKKvc^{*^3)#GbQMO6>EHmwfC7g#g zc)$`)nP(dEv-+(@BgS54u+zgpwtaG5@EO0*!yh$DofKIViIk;m74Bc=Sgn2m@-zFgf zNygaf4EYPJlyr*l`yEhD1Ha!9Ejr6t*c~3_Eb2(FgP-`nE})4tcO?O0GN?#_oKxXj z)mh>zc$mxZ5(nW&eh0oc0F`Y08lYYW)N9gT9Rs}MxGtfMBcPv$G$}f|B=tSXv;~l_ z0kS1>-3N%L0Fff68=(9mD8C_jN8bAq_^mbLcLh)gx9bwZ2te@w{NJG^XQ0tn(drtg zxQuaVetQ+I$V+_-3Vs3w6eWKO3eI7?pE2GsGX*y!1(g_ArQibm`e{&b8e^Zr2u|>@ z&XB(gUVVwn(t@wy|7dVV)D(gLr6C9!^D>=)DUUMP^4p;~8N8KCr(I?5_Z8@`#rK^B7=W2aKcWdkWZ5 z9HvUHKW7%ehZ1+rV&=Q@e<5FrI{#yq?{&!cs#(4i38?eYG2nL`*Cptu4|MM$M)?h+ zP~3VFb1Qk5ix`pS{&SE8>Ej2W=CWB9KR_0TK-CV&;uJ>v5u+W3EGYJ-2$5nBimU#> zi0aHyi_w*=iW50v(e#LlKrR>%D zktB2p+De}L8*uc9Dh-TJ8hZ**h>Gh{qg0)H%S^$q(57R6NOJiG*dGPyy2d*Uo*y;Sd>lO~Cb))PG^3HPqKNG(Xg&m*&w%FB&^p4w84_^8 zI6VM) zKxZY^TLFdQmrIbAjl`Csr199%E&&vZL@Ac0$a5DyBbzkhoe_AFpQgDo30E4fbUG*45+SI>lpWzEbB4V>!~Xj(o@xU|7qp~t*p)iB4Kda ztV6VZquGn%cCziO(s$Dug6!KGl5xT=>;QN&FJSBd4;7BMaWHZ&0l@weD&eef=Zfzg z@VyqF_#@V!^&YKW2jkTRue5qqy)i{Ov}Qj5oX7(l1Wt#|RDKVfPMfWQ*6-wRXg(pF z$ew6rMe&ttSIhAEP0#!%#-{&!N5$6@_zVR;PQYiF^bHOY&tPCZSo&AOldPo?xY2t3 z9CUzm>m=m=6Xbs!+t3{~wDdd&%^=C|2AyX>=SA@S z5czeCMloL!A{uo@JpvApj{X}QxCqK=wxG4pc~E~E)SmF^%nqB?qEQF)V+rb zQY(K0O!AArVusOSh8YZfbwpb?XpB30s(UA=q064IQB;zFJFe(Snc0E(-vdH0e}zH1 z?wF6F(Iysh_rV@|4nE7p|1LBf|F=*PzM=nDC=UDnwfLWjTJbv)yP$3OpNJOVHwU|& zccD^XEq-&cOZo(I2`^*MG9Rd@4JtOFXfNiBm6%wP}ipb literal 0 HcmV?d00001 diff --git a/docker-compose.yml b/docker-compose.yml index 3f4fbc5..9fe75ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,31 +1,21 @@ -# GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw" -# YOUTUBE_API="https://totally-real-dingo.ngrok-free.app" -# OPENROUTER_API_KEY="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" -# OPENROUTER_MODEL="openai/gpt-oss-20b:free" - services: video-render: restart: unless-stopped build: . container_name: video-render environment: - # RabbitMQ credentials # - RABBITMQ_PASS=${RABBITMQ_PASS} - - RABBITMQ_PASS="L@l321321321" + - RABBITMQ_PASS=L@l321321321 + - RABBITMQ_HOST=154.12.229.181 + - RABBITMQ_PORT=32790 # - GEMINI_API_KEY=${GEMINI_API_KEY} - - GEMINI_API_KEY="AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw" + - GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - - OPENROUTER_API_KEY="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" + - OPENROUTER_API_KEY=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 - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small} - # ports: - # - "5000:5000" volumes: - # Mount host directories into the container so that videos can be - # provided and outputs collected. These paths can be customised when - # deploying the stack. The defaults assume /root/videos and - # /root/outputs on the host. # - "/root/videos:/app/videos" # - "/root/outputs:/app/outputs" - "./videos:/app/videos" diff --git a/dockerfile b/dockerfile index ec261de..d146341 100644 --- a/dockerfile +++ b/dockerfile @@ -2,12 +2,10 @@ FROM python:3.11-slim WORKDIR /app -# Set environment variables ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 -# Install system dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ ffmpeg \ @@ -27,22 +25,16 @@ RUN apt-get update && \ wget \ && rm -rf /var/lib/apt/lists/* -# Copy requirements first to leverage Docker cache COPY requirements.txt . -# Install Python dependencies RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir setuptools wheel && \ pip install --no-cache-dir -r requirements.txt -# Copy the rest of the application COPY . . -# Create necessary directories RUN mkdir -p /app/videos /app/outputs -# Set volumes VOLUME ["/app/videos", "/app/outputs"] -# Set the command to run your application CMD ["python", "-u", "main.py"] \ No newline at end of file diff --git a/video_render/config.py b/video_render/config.py index ee42f54..37d560c 100644 --- a/video_render/config.py +++ b/video_render/config.py @@ -16,7 +16,7 @@ class RabbitMQSettings: host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq") port: int = int(os.environ.get("RABBITMQ_PORT", 5672)) user: str = os.environ.get("RABBITMQ_USER", "admin") - password: str = os.environ.get("RABBITMQ_PASS", "") + password: str = os.environ.get("RABBITMQ_PASS") consume_queue: str = os.environ.get("RABBITMQ_QUEUE", "to-render") publish_queue: str = os.environ.get("RABBITMQ_UPLOAD_QUEUE", "to-upload") prefetch_count: int = int(os.environ.get("RABBITMQ_PREFETCH", 1)) @@ -27,7 +27,7 @@ class RabbitMQSettings: @dataclass(frozen=True) class GeminiSettings: api_key: str = os.environ.get("GEMINI_API_KEY", "") - model: str = os.environ.get("GEMINI_MODEL", "gemini-1.5-pro-latest") + model: str = os.environ.get("GEMINI_MODEL", "gemini-2.5-pro") safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS") temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2)) top_k: int | None = ( diff --git a/video_render/llm.py b/video_render/llm.py index c0742bc..2437fb5 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -150,8 +150,6 @@ class OpenRouterCopywriter: headers = { "Authorization": f"Bearer {self.settings.openrouter.api_key}", "Content-Type": "application/json", - "HTTP-Referer": "https://localhost", - "X-Title": "video-render-pipeline", } response = requests.post( @@ -159,19 +157,22 @@ class OpenRouterCopywriter: ) response.raise_for_status() data = response.json() - choices = data.get("choices") or [] + if not choices: raise RuntimeError("OpenRouter nao retornou escolhas") message = choices[0].get("message", {}).get("content") + if not message: raise RuntimeError("Resposta do OpenRouter sem conteudo") parsed = self._extract_json(message) titles = parsed.get("titles") + if not isinstance(titles, list): raise ValueError("Resposta do OpenRouter invalida: campo 'titles'") + return [str(title) for title in titles] @staticmethod diff --git a/video_render/messaging.py b/video_render/messaging.py index 08ead1d..c37058d 100644 --- a/video_render/messaging.py +++ b/video_render/messaging.py @@ -15,6 +15,7 @@ MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] class RabbitMQWorker: def __init__(self, settings: Settings) -> None: + print(settings) self.settings = settings self._params = pika.ConnectionParameters( host=settings.rabbitmq.host, @@ -27,6 +28,7 @@ class RabbitMQWorker: ) def consume_forever(self, handler: MessageHandler) -> None: + while True: try: with pika.BlockingConnection(self._params) as connection: diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 0b33843..a7fc042 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -74,6 +74,7 @@ class VideoPipeline: def _parse_job(self, message: Dict[str, Any]) -> JobMessage: filename = message.get("filename") + if not filename: raise ValueError("Mensagem inválida: 'filename' é obrigatório") diff --git a/video_render/rendering.py b/video_render/rendering.py index efd45e0..070427c 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -1,19 +1,14 @@ from __future__ import annotations import logging -import math import re from dataclasses import dataclass from typing import Iterable, List, Sequence, Tuple import numpy as np -from moviepy.editor import ( - ColorClip, - CompositeVideoClip, - ImageClip, - TextClip, - VideoFileClip, -) +from moviepy.video.VideoClip import ColorClip, ImageClip, TextClip +from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip +from moviepy.video.io.VideoFileClip import VideoFileClip from PIL import Image, ImageColor, ImageDraw, ImageFont from video_render.config import Settings @@ -36,6 +31,7 @@ class CaptionBuilder: def __init__(self, settings: Settings) -> None: self.settings = settings self.font_path = settings.rendering.font_path + if not self.font_path.exists(): raise FileNotFoundError(f"Fonte nao encontrada: {self.font_path}") @@ -50,6 +46,7 @@ class CaptionBuilder: self.max_words = settings.rendering.caption_max_words bbox = self.font.getbbox("Ay") + self.text_height = bbox[3] - bbox[1] self.baseline = (self.canvas_height - self.text_height) // 2 - bbox[1] self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0] @@ -73,6 +70,7 @@ class CaptionBuilder: ) highlight_clips: List[ImageClip] = [] + for word, image in zip(group, highlight_images): h_start = clamp_time(word.start, minimum=clip_start) - clip_start h_end = clamp_time(word.end, minimum=word.start + 0.02) - clip_start @@ -90,13 +88,14 @@ class CaptionBuilder: def _render_group(self, group: Sequence[WordTiming]) -> Tuple[Image.Image, List[Image.Image]]: texts = [self._clean_word(word.word) for word in group] - widths = [] + for text in texts: bbox = self.font.getbbox(text) widths.append(bbox[2] - bbox[0]) total_width = sum(widths) + if len(widths) > 1: total_width += self.space_width * (len(widths) - 1) @@ -105,8 +104,8 @@ class CaptionBuilder: base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0)) base_draw = ImageDraw.Draw(base_image) highlight_images: List[Image.Image] = [] - x = start_x + for text, width in zip(texts, widths): base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color) @@ -130,6 +129,7 @@ class CaptionBuilder: for word in words: buffer.append(word) + if len(buffer) == self.max_words: grouped.append(buffer) buffer = [] @@ -140,7 +140,6 @@ class CaptionBuilder: else: grouped.append(buffer) - # Rebalance groups to respect minimum size when possible for idx, group in enumerate(grouped[:-1]): if len(group) < self.min_words and len(grouped[idx + 1]) > self.min_words: deficit = self.min_words - len(group) @@ -149,6 +148,7 @@ class CaptionBuilder: grouped[idx + 1] = grouped[idx + 1][deficit:] grouped = [grp for grp in grouped if grp] + return grouped @staticmethod @@ -175,16 +175,20 @@ class VideoRenderer: with VideoFileClip(workspace_path) as base_clip: video_duration = base_clip.duration or 0 + for index, window in enumerate(highlight_windows, start=1): start = clamp_time(window.start) end = clamp_time(window.end) start = min(start, video_duration) end = min(end, video_duration) + if end <= start: logger.info("Janela ignorada por intervalo invalido: %s", window) + continue subclip = base_clip.subclipped(start, end) + try: rendered_path = self._render_single_clip( subclip=subclip, @@ -236,7 +240,6 @@ class VideoRenderer: ) resized_clip = subclip.resized(scale_factor) video_y = top_h + (video_area_h - resized_clip.h) // 2 - video_clip = resized_clip.with_position( ((frame_w - resized_clip.w) // 2, video_y) ) @@ -277,6 +280,7 @@ class VideoRenderer: caption_clips = [] caption_resources: List[ImageClip] = [] caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2 + for clip_set in caption_sets: base_positioned = clip_set.base.with_position(("center", caption_y)) caption_clips.append(base_positioned) From 8caa8491481ab73fc8b467133b311007e14d33e8 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Mon, 27 Oct 2025 09:15:12 -0300 Subject: [PATCH 08/15] Ajustes de rendering --- prompts/generate.txt | 3 ++- video_render/config.py | 5 ++--- video_render/llm.py | 10 ++++++++-- video_render/pipeline.py | 36 ++++++++++++++++++------------------ video_render/rendering.py | 7 ++++--- 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/prompts/generate.txt b/prompts/generate.txt index ed2853b..2ab45f9 100644 --- a/prompts/generate.txt +++ b/prompts/generate.txt @@ -2,7 +2,8 @@ Voce e um estrategista de conteudo especializado em identificar cortes curtos de FUNCAO: - Analisar a transcricao completa de um video. -- Escolher trechos curtos (entre 20s e 90s) com maior chance de engajamento. +- Escolher trechos curtos (entre 60s e 90s) com maior chance de engajamento. +- O inicio do trecho deve ter um hook para engajar e prender a atenção do espectador. - Responder APENAS em JSON valido. FORMATO DA RESPOSTA: diff --git a/video_render/config.py b/video_render/config.py index 37d560c..4940751 100644 --- a/video_render/config.py +++ b/video_render/config.py @@ -43,15 +43,14 @@ class GeminiSettings: class OpenRouterSettings: api_key: str = os.environ.get("OPENROUTER_API_KEY", "") model: str = os.environ.get( - "OPENROUTER_MODEL", "anthropic/claude-3-haiku:beta" + "OPENROUTER_MODEL", "openai/gpt-oss-20b:free" ) temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6)) - max_output_tokens: int = int(os.environ.get("OPENROUTER_MAX_OUTPUT_TOKENS", 256)) @dataclass(frozen=True) class WhisperSettings: - model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium") + model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "small") device: str | None = os.environ.get("FASTER_WHISPER_DEVICE") compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE") download_root: Path = Path( diff --git a/video_render/llm.py b/video_render/llm.py index 2437fb5..de6c4ae 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -137,7 +137,6 @@ class OpenRouterCopywriter: body = { "model": self.settings.openrouter.model, "temperature": self.settings.openrouter.temperature, - "max_tokens": self.settings.openrouter.max_output_tokens, "messages": [ {"role": "system", "content": prompt}, { @@ -153,11 +152,18 @@ class OpenRouterCopywriter: } response = requests.post( - OPENROUTER_ENDPOINT, json=body, headers=headers, timeout=120 + url=OPENROUTER_ENDPOINT, + data=json.dumps(body), + headers=headers, + timeout=120, ) response.raise_for_status() data = response.json() choices = data.get("choices") or [] + print("Data:") + print(data) + print("Choices:") + print(choices) if not choices: raise RuntimeError("OpenRouter nao retornou escolhas") diff --git a/video_render/pipeline.py b/video_render/pipeline.py index a7fc042..6bd6689 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -70,7 +70,7 @@ class VideoPipeline: return self._build_success_payload(context) except Exception as exc: logger.exception("Falha ao processar vídeo %s", context.job.filename) - return self._handle_failure(context, exc) + # return self._handle_failure(context, exc) def _parse_job(self, message: Dict[str, Any]) -> JobMessage: filename = message.get("filename") @@ -200,25 +200,25 @@ class VideoPipeline: } def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]: - logger.error("Erro no pipeline: %s", exc) - cleanup_targets: List[Path] = [] + logger.error("Erro na pipeline: %s", exc) + # cleanup_targets: List[Path] = [] - if context.workspace: - cleanup_targets.append(context.workspace.workspace_dir) - cleanup_targets.append(context.workspace.output_dir) - original_path = context.workspace.source_path - if original_path.exists(): - cleanup_targets.append(original_path) - else: - sanitized = sanitize_filename(Path(context.job.filename).stem) - job_output_dir = self.settings.outputs_dir / sanitized - if job_output_dir.exists(): - cleanup_targets.append(job_output_dir) - original_path = self.settings.videos_dir / context.job.filename - if original_path.exists(): - cleanup_targets.append(original_path) + # if context.workspace: + # cleanup_targets.append(context.workspace.workspace_dir) + # cleanup_targets.append(context.workspace.output_dir) + # original_path = context.workspace.source_path + # if original_path.exists(): + # cleanup_targets.append(original_path) + # else: + # sanitized = sanitize_filename(Path(context.job.filename).stem) + # job_output_dir = self.settings.outputs_dir / sanitized + # if job_output_dir.exists(): + # cleanup_targets.append(job_output_dir) + # original_path = self.settings.videos_dir / context.job.filename + # if original_path.exists(): + # cleanup_targets.append(original_path) - remove_paths(cleanup_targets) + # remove_paths(cleanup_targets) return { "hasError": True, diff --git a/video_render/rendering.py b/video_render/rendering.py index 070427c..f09ab87 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -279,8 +279,9 @@ class VideoRenderer: caption_clips = [] caption_resources: List[ImageClip] = [] - caption_y = frame_h - bottom_h + (bottom_h - self.captions.canvas_height) // 2 - + margin = 20 + caption_y = max(0, video_y - self.captions.canvas_height - margin) + for clip_set in caption_sets: base_positioned = clip_set.base.with_position(("center", caption_y)) caption_clips.append(base_positioned) @@ -299,7 +300,7 @@ class VideoRenderer: font_size=self.settings.rendering.subtitle_font_size, color=self.settings.rendering.base_color, method="caption", - size=(frame_w - 160, bottom_h - 40), + size=(frame_w - 160, max(40, self.captions.canvas_height)), ) .with_duration(duration) .with_position(("center", caption_y)) From 2692cc4dfd85ef9356a2aa724680fcc42e5b9a59 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Mon, 27 Oct 2025 09:15:43 -0300 Subject: [PATCH 09/15] Ajusta git ignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b53c1fd..64e5617 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ __pycache__/ dist/ build/ doc/ +videos/ +outputs/ # Ignore virtual envs venv/ From b5a27fa9385ac9b9422838a6142b15904d023dde Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Mon, 27 Oct 2025 14:08:10 -0300 Subject: [PATCH 10/15] Ajustes do Gemini --- .gitignore | 4 +- docker-compose.yml | 3 +- requirements.txt | 3 +- video_render/__pycache__/llm.cpython-39.pyc | Bin 5533 -> 6370 bytes .../__pycache__/rendering.cpython-39.pyc | Bin 10198 -> 10656 bytes video_render/llm.py | 109 +++++++++++------- video_render/media.py | 8 +- video_render/messaging.py | 1 - video_render/pipeline.py | 37 +++--- video_render/rendering.py | 19 ++- 10 files changed, 115 insertions(+), 69 deletions(-) diff --git a/.gitignore b/.gitignore index 64e5617..7a2b6cf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ *.pyc *.pyo *.pyd -__pycache__/ +/__pycache__/ *.egg-info/ .eggs/ dist/ @@ -10,7 +10,7 @@ build/ doc/ videos/ outputs/ - +.DS_STORE # Ignore virtual envs venv/ env/ diff --git a/docker-compose.yml b/docker-compose.yml index 9fe75ac..338e355 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ services: video-render: restart: unless-stopped build: . - container_name: video-render environment: # - RABBITMQ_PASS=${RABBITMQ_PASS} - RABBITMQ_PASS=L@l321321321 @@ -10,7 +9,7 @@ services: - RABBITMQ_PORT=32790 # - GEMINI_API_KEY=${GEMINI_API_KEY} - GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw - - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-pro} + - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash} # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - OPENROUTER_API_KEY=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 - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} diff --git a/requirements.txt b/requirements.txt index 1593182..f38966b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ pillow==9.5.0 numpy>=1.26.0 requests pika -faster-whisper==1.2.0 \ No newline at end of file +faster-whisper==1.2.0 +google-genai diff --git a/video_render/__pycache__/llm.cpython-39.pyc b/video_render/__pycache__/llm.cpython-39.pyc index 72379c37fcd88c5dde3dbcb2a437e46a004cf331..21992660a7376088f93f3a87e25ad00805a9d07e 100644 GIT binary patch literal 6370 zcmcgw+ix6K8J{_G+1ZQN8#_sJY3Yzs+O1MMk)eXLe_E z&e=5Hu6d|a2_Y2$4+ue!YzZOdkKmm*#Ou8BR9<eZ0Q48w9!2j9qLX+Q>Hygr@M!v!?q=y zjU(ddrX4-Q%h9pR7O(K?1B+MJS#%t=8lOOI0<{yI-LRVVPw56tJFmvQUZ!K6WIeTr zx_d3w%S|VD&i7VxcRp$B+wYHoHm>x-Vu23p9OIGK(6$?Uz?X4}iLo^5B{ek#mK z+H|_zKNppAL}Q79up*mVpsURpcOEkCa_@l)`ue=Y1JujBg1XDAbZ^w_d=hmJwZ=o6 zAL3J}`J#mSG(U{~BmC$CcioNxv`zD8_%XB{=O;{CnV$r`p3M(k7Tu(md_U`vsp?m8`GD6=)x$X>BlY}L@X#k&2GtsT>XQ>?KO z+we9mbz1u)cG5ES66zOpz|r?d>raekjMz$IuwrKSs zVtF-Dt@yUeQWAwSb^GEz1Wo0KE+wg0%=D#f(Bo!$UX~)}EiLZrd~&>q7Lb-0b0;xZ zmXJjL#WWE;o%?N*LyBD`(xqa=5sY|b$>)%LHhnMQB5O&}<3i4+X?Lc-ng^{GD4|;| z8mSH=vFyMOSdCE{*mYL7eS3J~2|1YAnkcKZc!kvEJ<^<8P>~4`Z$}wq10(hbMt}yA zBO@5ULxI>wg8hI>_Pn)jlZdaeVSQw6fc=~Hn)8m;n{zGgtiEqsYp!h#Pic3<^)8Iin)`FNS1Ix{))@=yltvEL@lXmz9!mbm-zak*rIN0UN*ZQE z{Z`j_$p~5rA`~pPZJciFN3!kXyFv2&B@{YX_SVrgn~qNh#|OLj(D@C!>tee7$jLpW zW2tioRL@=Fr96d+pb?un6P1+cbiq8spdz7V^UB#2ZP`)y$?Ywo96rz2$U!4^TsNM!S)Eh22(){t%-pr=HkHWSdPDkjxlBlGIBlg-NcZo20yTNtR-6@g#Tzm1Ln9q%kG~1+8oAsW*z&EBJ92pGEEHj-v=Xe8m#oAZIe)f)WgGp; zU2s@Ry7;9>c2pBRHISket9Ft=30#?_BKJT>O|I4JW?ZCsK;{>60joGBB3x<Qi3ho>7 zy-7psV_j(7`5=}ZCBH{|>}9lk1#Rc>DLR+sFj&pTWA@nn!mS22d+d9#q_CyKFFnc9 z-7?;^3y$qKIK)!Ju|5ik1!Dt_tr$ZC`iEOJ@Xj_|pgw>u^qn2|!O3#A-By-!Y4dn>--pgrcGyO>wW>5y>&^+k-=d%))=&~_-60zy)-Jx2&=>O(CW;x&3D7P8)3df zuI(e7!=M_YOnIdp_c+X|7Ah}|1&<_9pTQHPZUIOWGQhDJ|^vK8DR;5XYy;l3J_=pH*WG=m3?Ny;VBK zhA$t$xC0HjB-8Z>xMIr?7aKppK++XrqG8|?3d5SVHlQ(^_B$XR(1W$Y2tUvtmdxQc zyg&LkTuvCnSin0>orAS=?l5`bBa8yb1lWGY`j{QJh`m@3IKl_YxB*;2OnKxj=JtoV zz4qxB_(-b_?g@V0Oh$SDSPC~s~9U^pyVq^B3}(| z16l94G_&aY0G}ePX)y+h){Vnx7`g)P?nmo0dmP|mvqFHc7k5Rgl~-CV7{WnHHPTF0emoqMj$C8e->xf1S2)@V~67tK67zAtBXq;>u zt=E}7HN|Yx+eXKpA93y+!73#GbEqS{y}=HG8J{S~oWcJS2A>B4Pk_0%y-VS9V2BDFz)PAifx`n{{=^^yAco3Am^@w` z&+#c_D?-sVPXin1GU0Wgb$jGdaQcaB5O`&ibkW~%fhu4IH>$j`Y4`sCLm2>zA6mzg zg07Kaq?}KU>jCAXK>bZm@M(kB1Ah3ph1o|EYm*_zv%p$;R964Oh-b_QYok6YqqQ=s zp#K<3RXsVX>c%?zwXLS~A%5JL*%O8aRiXiyeR46Dpwrt~TZAGUbi?+xJcR(8qMZQV z4iPJcWMdOONHZ0-2T~&w;k`u0i3(vXW8i9$s+r1l(H6JiPq9`U5S7NM4+y^JXw z$UAOhIV#e2CThc`UCdX5F1CRs2aukrAa>=0A+0msHx#{lS$dBUb@(zoL-?&T0NsR( z1?D!3Q()~lgf?{PBm|aGjiD?tj@u2%PMOW(5V64^p`~ylIe_<%=PDnb!VOcQPR)hy zQ4yC7UpsX=+$w+V)UCD3%`p+UxQ7)1|7hWUCiS3$xZ-p;P_Yc}33(fLTtc%TLP@bi z&mc0)#`l~t68uE`MF7^sUu2q0{B??a^M=J?>Zp+QIh33|JL(FCGc&XP8^@#ZmL zZWvrF>T|}3N;NjpoIV@1k>*lB!FbDQA{M$(L=XS)|5SYoitfM>a1 zkZ3sjl)+B~lEwI!o)8i8B}PQZ2N@Bmt=RxufNn)#ZdeLlUnWY7dF`3i4r2D-G3wv= ztmafBy|Q>SYLHbBEl7Wh{OUj% zxkDf%p>c(iFy8ewTJ~#{d>u)%R>UV1ZsvaRBox&sz{2ZN5vsbkGrlT2G{ayy0zopN zWQCG%Q*wury)Gx8+$X5N#29rGiA7Hi4*P;t0-t?`pr&RXh6sE-b|16P{fW8>atPWX z+_Bkze4jL;PC*a-fzbnY)TD@Q0S6HZePu_8cD(bx_*Y<2lx~b z2B^zxTn5^r*5=6wU~h?RIKZAHJYb|YD$!dCUNq^cL*s?_cJXBCBWgpdc}*x7KtLq& zJ6YCAMe*!dD=NgaIFYoayhKTlM(%lPoGHRP^W14(kE0X2Um|CA-lJfv%xhv)Mu_s3 zdFCmIahg`nC?Wh}?zD^ch6`ky^WbcOXx}6oWa4r2jtQK67=lNl*FY>%VXjl}{}-YR BV;KMd delta 2855 zcmZWr&2JmW72jDdcbCiMiu$x9mP$cI+@eJRCo#6@p>+!BQXrQWMQ;UqD9}G3$98Y+p+GLZ1jxZPq`o(#Y$YWz zzxjA?X5O25^LzWR$xlWdzfiCg_+423a^rU+_noK6hxbQctPq7NR1I@(j%KMITJ^k} zuiLH-<5_Big}UQ9L}8iT9#-5VZZ9?6KALm;=M`$v{7)2`zpJ_f(6ea)dIjhW-v8`O zpPFj3!J0_5g&-E66WBNzCgV#>8$vYWsfmdyYcTGM;5G~WMz!Tv*?2XIsv+~6K|H<| z)hBLGt}@|I)FaBmc;ZL0-k$2tW3NRG!5Yadc|F;5<_CJ`Sv_b3SA*(C2t_c?&E$mo z9pWZ8)VFo2+)&EdrEY4#R^BbPnOw?@6C+`n%Qp*PXE~Eo@c6dL) zrwTs~)x*K@iQ55Xk;hqsGCmQ8_3`FrYI+`QEj(`kl;S=Bg_y)tZ57cZj_M=@?Ig)1 z=d@=^4v5!ORORi(KHYLx_J zI|CZonqD)g(jt79T8I6Bv@hj!hq4j4vB&aUXMR_nYwvUv3e$?PYWeQ4 zO`UsWTcbVQegO=~3a8f7*4kNEvPgUHY1?{R-!1MbM1eZ9r)(Q-W4D*~?UCkxs>-(6 zHseR4kM`dsACR_52SyZ-4hG7e3iVJG>g`-R_vXhWeiqb@Nwr*8P2VHiR@(yE49JG1 zOxf*kTQHt)=V5^nXxU<*ZHvLXD(J0?A$nZu*>L|S5+hrgI0Z*tR#SESylcd)T4xO* zZ?^*{K$=?9Ap|YNU~}dh$uF|3FE!U>xQRH`S~0}dv&ph~dMC{bR&Qb$w>Wc)hanv` zAUM~9s;h~pxh(N}Ws+uP1g9B@xXqO~|`q=pFUENY3=TJsw;puS-7 z*2zv_ml}Rl;!H%m5w%Jk`3t^?VvbKMHM;ik=b^WCYK6tkC>DN+My2ljnAJ;CsYRpY z&-s%{S--M#%+0O0!q5}!j^JluDj!BTk4t80tKN(|kD!R7qX2FeT=JTo>Q*{DKX?7w z;x%t>advs>+G544%w1nzc%?Fz>gy4&`+}dtnZ3B4$=UlY7UL13ZE2DF&jm5_K+ywJ zMJrA-RVFyTpS0kiMgE%b@MMyI=%+Rcd<54z3E&z{e>04Hnrf>N-Apqr9`cFqCg6D^ zW~m0p=ksr&wuSC_aMU7pP453tx+~8fKDekNL+JSnY*^X)*m>Gxk5hyAk6nyax(Ex{uF58F}v51|tjpvOt6AR9@+glL zSD(VcIe^s0;|$j7Ol(9{>K&OYSHURfm<^EKVk95%tEYHbCEQBhakla}#PbMt@=u3P z;0?-~k?~MgJ9i-o@@r<}#lB_UmIbk6; z9bu+?mzlC%X0jzS?RJ^z_o5(~>6Jrjvb)q|(;;9MAg1^;Q0cIevg@>S9Y)%<=`eDT z=xOIg@va(w`KK?6_X)X}{IqXzp#;9kn@o+kRj>Pevz&Qyz5FJK%Ys%RH`~t#r09W`o0n1wFsmVub)%`o3h;Fe)ocyZ)?76Mp!>k)d zYks%^&u?o`X0owPCL3!8O%?_XR@Rcg^glhdWnZmSmd94uI^&FQ88^o&fe0BN$DJ}A zelRwe4@ofQlQ0Mbv=IesEKY_8Cdq~5^?_gRU|@h_SFq|jmKAcDuZ0YH_6I0mKvBwl z9m;tN+DBaKMVEjZn=R4xOj^OejcPJFU%=Lj0BQfy^4#Le)Y2Q3xs}J%#HUb>r`(BS zd_Mdogc*eI0+j7eSYJ$jJ2!dydB=7CXue)<|VuOoaP;j8vZKKNrYb?nuh6_nmUs31rJq1bu@ zK_1YPxw0h#@lBB4f^x_k+2YH<8#r1XaxKqWZ(%z0JdPhZ@|Ij9jH=b3QB8rk1$>~b z@(g?r{`c6x4&IupLt=t_$qk$!sRvn}BPqB!$O#KD8nq=KlbYU9CI- diff --git a/video_render/__pycache__/rendering.cpython-39.pyc b/video_render/__pycache__/rendering.cpython-39.pyc index 18da5d3d83d76b06c40d67abc91f0aa05449a052..6577a62a9fbb680d3d7a58da07edc64f949e30a3 100644 GIT binary patch delta 3108 zcmZuzYiwLc6}~fjcYXKn!|Q!}pMKkGCyok9o2J2px;RRqC8E%lOT+EP_j+@ky}O&a z*G*DpS%jNX9u^KG3N&qH^8>XAsYw2V5JLPy;_(j}#3BSg2|_~R2Pyr5Ip^*sv7PQ} zzBx1JF=x)4J7;cwYU`cd*PKwu15>xz3X#h9ba9x{hFUhhGfX~fIXlM z*ovmuK`m&jn(EEddPoZc-(Qc|Q7vi@YJ+x6i+Q|2eaMb$agQFTC+wt_^k}7?veR1H z&S)7st7YwBZP=R+)^m1V%X_q1FFa=#wW3|pN<=d0R@ofcCfcZxHp1RN^19E68iOzS zwK3CwS~6nB&`Xjrv`*KFHV)&sk$`bxooW*>N*XB`rC>B^(5IwIMvRc9hkf1PVzb_4 zi*>8j4W6;9HPa)N&zM(Sq$;EvUu@c~renG0)0Sa2d$ZyOIU%}ab2tT2t5BAPj%5Je zmRKIT_lLzbIvGg<=}-WPu%g)XCB<*)0tpJ$r%$7pC)3H9{D;ZoGoI%8^UW&< z*6J#{ZZ#}d*UKQ{i~&d_Kvb%d7?nwMH{gqr7>SZNQDHVrMSS2WNr}?~M@V+(GSDO= zeykKvp*kA@s6@MAy~dhrE&Xzn84fGLWY_PwRpxemX2W1PoQUWQXp{9sSx^Ln50R4i zSn&7hdm+JSbp7Yoth(`_m{q3<6`xkm!NNaLzeY%2oCweMq&-3PWZiPyWjLn=O}rTX zIGGc_3_n3~A{zO{anIM`rK32M7KPD0h7|);bVYhPGc7e;<5v>8^Nh;U!>v5t=g#AbCsRG71DY z3QGnc=n!zK(kz*PuqVN(5{**vyVxwrircY^p*;qLJ@im$6zniOI5sZc8JatCkMB|5 zb|-<2S>=QQ6cWqC$75r$G9_%%%g18;oe*Xhn?jfte~dr)pauB8azr)Z+pe;g# zoj>eYnPWsYq8kHZE4$bk+@S4TO={<%6?lk8vQoRqBkhtAYnOSEm$e>RMQM|;n|!oA-iWz=H^WEx=sM+N>$E-2bB{^qo@-C= ziPh{@(1`Hy&I;Wo?Ma^B8s?MRgh;#q6tF#dVigr;+EaY0C+*s1GggFze!sfm$_;L!F?4rI)j>Sl+U<>AR(=i)lO`k^vQg`j~gNI%u{kR zMi^#c|Lb&x8bcccTOl6YCik4drQHHg@FY+1IM3GnJpTj2RUQKQI4oJ=@zo+Y@=qS? z_hhx$p5?QKv_>|49Y%R&tHfs^3Kgyx2_7(#H)JFAhA%I*=Xhnc48G3Yb#kQdPi z>-XbEx=&3Q86#VhK}#`)`_^)3ZJii-qhJ&_f)M4(*5tMXeKpo6;{86`?-Tt#+3!>R zJ`V>HZ6D%?dgD#srMGzDzPX?GoQ9fG62HnlLn@+_FQvMHWwYU$tTMoK$VYY*pexSg z3&*?uYTc?eDuiL0-@+2HSBPCh@h>C1jPO;2=K+M9f6F(;W<`%v2<1ZWk%N~RlrjAUtYa%y-PA%JsV_GTbD4buHNT0lf$D)4 zGeMQLVNf}|Q!b`SC)U3R^eQu}y4!4NiG4E{OsjUm)uMMM?JBET4K2c|m-RbySG9<3 zHTDgwSN05p_XL$?D8V{7p$(p|I;JkpmOgU4=W=gDq2`*~T66VQ)xDs}D1Goc=HUIG zp*jB+KP?pq5$~3c9VYa@fgrt2cWGEAxm{U~dNofb3SP@_^(r*JOMQ4L$EX-DkCiA% zdMdIS_z&HcqZIDy!2bd=77LY%&zIY9@%;&|)mgkenS(JS>|015xaX9U^{Rc|s4jGb zfAsl40!4D-;^-HW`vuvzfn$lkjJ^rCKOg(oebwiKtb5hx>*71(j}syOHl8`*Wyn=r z;~avC;N=RgTZ#2#VL6^P@oUe$@i6_COzfSyyydi!xdE%hZ zbr5<@yf^u1XHR`XlaC^F5b#jE{V}x2>IkT|CstlCyb5LmV;hBzpPVt9J9t z&BlsVV_1Q;+&+%WstsqESzh5~ar6z(Obgab_mi+ElQA#8FCqtDTHfQ)tJhvoJ}8!5 p<>XPjX{^=F1p*q*1V9+-YX1S`y*cnH6vlUmFDXNo`=cF6{y#KT$teH; delta 2505 zcmZuzZ)_vQ72g@JH|w>%-uTaY?e)LdNiNRaC2E0k9H%29*E`Vyt>Dld3s;k5H$De% z?3>+CK;uiLPy~phxlRl8A9X7A5B*l9{elo*5MMw-AS967N)=MSpbEjK7EXxc&Dy$) z0$$DUy*Ka8doyol-nf77`#(^Z;&DZQU-6#*o%fGiRtxCU%Ll8QxGh?uE!d%U*p(~^ z3BZf(F;})^H)2Ix#Zug;6%G1vJ7&c}A8xB|!b-TBrMctQxSO<+ZpumpHc~t7W~_{> zTe_RIvaVqn&_8C&?VOvp@&O-dn{L4>xJ9dY8VL^Wmz?qrvMP4Qjs{=sw;?-jt51Zj z2}e35*a=&ELa?R zU3M9zGlR$-?o!j?@<2ZAJn9FUKcl1Jj}YXAeTcSA9hf>MSs=s!cv&C?AXm(}dG-vR ziswM^Fo1waiM@*NK?+NR8V9&hU^#>$7s~*g@k9Ux%ZQZOFGJ58Ne~C(-JNykWjv5Q z$M;+7O^@Vu7sXRZC*z?0h=;g>Duhq0o+tI)e)ur@c0Cxmyxe(oU#x-LXtdT_exor7 zCSDalKoU|AyDDW7V%f19kL4 zgTVh#o<^v^Zi~$htph{lSi9x9c{6V%E`z{)1Z^Z6GCQHR%I}$iO3;sxu&^v&8 z0Gc-kAfO}`k&I%_%5N?$V{20jT2%6_k9ib#bJ5`@r@hLw|JdZ<=7Y6!_hqs z7#Y7g%6Ej+I83s?j~|@Bt`E3idDEcgCSD9cMp`CW)h4tOMr4|)$=9R&+>iqtX4t=y z--+jet%nFd6kt(KWwe03?z+fMq>i8z>!mJ2Hoi!m<=MC|^HWq}|IAckmf~$JufT2S zI`+@?LvTP2>+UUkhJ)jQG~Dq&0q>mv5M&X@AO~q^!;dE)LmzujKM6ka*|{>mB`Xnb zJ^Jyb@l`Ftws~iFo7tNXz9g}}vAC&T!o7S`=$X(8G)m)!K;u+cORwn$RHeL-6?#RQ zKOvlZuvem`wQN6PCungM@1S0pntg*p=|BQ400-KDFRkUkex_HUl|9xIf%U{S>k8*K zx6y@np!KP_jlJ^_I8MXcxT{f-Ds}{>puhyHe5kL|s-FN0VGXTmTWG}PM>J_iha+g* zjv2x#w&Rziew0Rb(Dh01kiJP%G)*%!NsZ=an3{h;RH0EAph2h_mF;mV*~#CCcItN_ zQ|L|7+FAhuPhJaI9EB{6Xn8~{c6!83Kq=LmB8-yl?8uwJy=@^oXXkD6QUr2V>sMEI z1Yop&HK556HAXZwqUjOMjHn6eReMu(YAA1qE*zqTuk|eqCk++3z!uCiXqLTbmg04Z zG~g(Z8v!i#mRU&D5#dK`Agws-zC&2K@Rv}H)Y+Sb^C-%0C@vZmKHx46yjqbPIPmku z{#Yy*pW@Qf9G(HNq%LuwsFEHC_37OYi#lQtl+PaQZ1~*`ztLUt&s!ot_WQ3tg#jNx z^WJCwEf)}Ga^l-NJIu=Yn7_525K=P{U2Wy;~1)f^raML zf35W3!p%%zG|Mk3@%U5_d6x70?w5S5z2q+2OUEB(r>hT1X>MY&SE@hSUxR)In)}$H z+DlMj-l~1_wTkvl-h+zv2>Z|E_s}+*pUNKPRh&G=;T#8tLy%S;x2}-|-VyB1^PGEu z!;2h##eOpN{ryw=1!&gUzo+hoDa}p4iGIyKpFXEcfH)%`f965 zg7SNTt5O_z#;i!w@$c+3n+_qt>K^AB{&ohBKjCFBnAA6|WFQ}NJN8D~IgY^0s{q8{ ZCF9@lO)HEUP*!97ok@#uw!-SguK=n;4gLTC diff --git a/video_render/llm.py b/video_render/llm.py index de6c4ae..84d2d4f 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -3,8 +3,10 @@ from __future__ import annotations import json import logging from pathlib import Path -from typing import Dict, List +from typing import Any, Dict, List, Optional +from google import genai +from google.genai import types as genai_types import requests from video_render.config import BASE_DIR, Settings @@ -12,7 +14,6 @@ from video_render.transcription import TranscriptionResult logger = logging.getLogger(__name__) -GEMINI_ENDPOINT_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions" @@ -31,6 +32,7 @@ class GeminiHighlighter: self.prompt_template = prompt_path.read_text(encoding="utf-8") self.settings = settings + self.client = genai.Client() def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]: payload = { @@ -45,45 +47,13 @@ class GeminiHighlighter: ], } - body = { - "contents": [ - { - "role": "user", - "parts": [ - {"text": self.prompt_template}, - {"text": json.dumps(payload, ensure_ascii=False)}, - ], - } - ] - } + try: + response = self._call_gemini(payload) + except Exception as exc: + logger.error("Gemini API request falhou: %s", exc) + raise RuntimeError("Gemini API request falhou") from exc - if self.settings.gemini.temperature is not None: - body["generationConfig"] = { - "temperature": self.settings.gemini.temperature, - } - if self.settings.gemini.top_p is not None: - body["generationConfig"]["topP"] = self.settings.gemini.top_p - if self.settings.gemini.top_k is not None: - body["generationConfig"]["topK"] = self.settings.gemini.top_k - - url = GEMINI_ENDPOINT_TEMPLATE.format(model=self.settings.gemini.model) - params = {"key": self.settings.gemini.api_key} - - response = requests.post(url, params=params, json=body, timeout=120) - response.raise_for_status() - data = response.json() - - candidates = data.get("candidates") or [] - if not candidates: - raise RuntimeError("Gemini nao retornou candidatos") - - text_parts = candidates[0].get("content", {}).get("parts", []) - if not text_parts: - raise RuntimeError("Resposta do Gemini sem conteudo") - - raw_text = text_parts[0].get("text") - if not raw_text: - raise RuntimeError("Resposta do Gemini sem texto") + raw_text = self._extract_response_text(response) parsed = self._extract_json(raw_text) highlights = parsed.get("highlights") @@ -91,6 +61,61 @@ class GeminiHighlighter: raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente") return highlights + def _call_gemini(self, payload: Dict[str, Any]) -> Any: + contents = [ + { + "role": "user", + "parts": [ + {"text": self.prompt_template}, + {"text": json.dumps(payload, ensure_ascii=False)}, + ], + } + ] + + request_kwargs: Dict[str, Any] = { + "model": self.settings.gemini.model, + "contents": contents, + } + + config = self._build_generation_config() + if config is not None: + request_kwargs["config"] = config + + return self.client.models.generate_content(**request_kwargs) + + def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]: + config_kwargs: Dict[str, Any] = {} + if self.settings.gemini.temperature is not None: + config_kwargs["temperature"] = self.settings.gemini.temperature + if self.settings.gemini.top_p is not None: + config_kwargs["top_p"] = self.settings.gemini.top_p + if self.settings.gemini.top_k is not None: + config_kwargs["top_k"] = self.settings.gemini.top_k + + if not config_kwargs: + return None + + return genai_types.GenerateContentConfig(**config_kwargs) + + @staticmethod + def _extract_response_text(response: Any) -> str: + text = getattr(response, "text", None) + if text: + return str(text).strip() + + candidates = getattr(response, "candidates", None) or [] + for candidate in candidates: + content = getattr(candidate, "content", None) + if not content: + continue + parts = getattr(content, "parts", None) or [] + for part in parts: + part_text = getattr(part, "text", None) + if part_text: + return str(part_text).strip() + + raise RuntimeError("Resposta do Gemini sem texto") + @staticmethod def _extract_json(response_text: str) -> Dict: try: @@ -160,10 +185,6 @@ class OpenRouterCopywriter: response.raise_for_status() data = response.json() choices = data.get("choices") or [] - print("Data:") - print(data) - print("Choices:") - print(choices) if not choices: raise RuntimeError("OpenRouter nao retornou escolhas") diff --git a/video_render/media.py b/video_render/media.py index 7fb878e..a79dd4f 100644 --- a/video_render/media.py +++ b/video_render/media.py @@ -38,7 +38,7 @@ class MediaPreparer: existing_children = list(workspace_dir.iterdir()) if existing_children: logger.info("Limpando workspace existente para %s", sanitized_name) - remove_paths(existing_children) + # remove_paths(existing_children) destination_name = f"{sanitized_name}{source_path.suffix.lower()}" working_video_path = workspace_dir / destination_name @@ -46,9 +46,9 @@ class MediaPreparer: logger.info("Cópia do vídeo criada em %s", working_video_path) output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name) - existing_outputs = list(output_dir.iterdir()) - if existing_outputs: - remove_paths(existing_outputs) + # existing_outputs = list(output_dir.iterdir()) + # if existing_outputs: + # remove_paths(existing_outputs) audio_path = workspace_dir / "audio.wav" extract_audio_to_wav(working_video_path, audio_path) diff --git a/video_render/messaging.py b/video_render/messaging.py index c37058d..b61599c 100644 --- a/video_render/messaging.py +++ b/video_render/messaging.py @@ -15,7 +15,6 @@ MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] class RabbitMQWorker: def __init__(self, settings: Settings) -> None: - print(settings) self.settings = settings self._params = pika.ConnectionParameters( host=settings.rabbitmq.host, diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 6bd6689..c8e309e 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -101,7 +101,15 @@ class VideoPipeline: if not context.transcription: raise RuntimeError("Transcricao nao disponivel") - highlights_raw = self.highlighter.generate_highlights(context.transcription) + try: + highlights_raw = self.highlighter.generate_highlights(context.transcription) + except Exception: + logger.exception( + "Falha ao gerar destaques com Gemini; aplicando fallback padrao." + ) + context.highlight_windows = [self._build_fallback_highlight(context)] + return + windows: List[HighlightWindow] = [] for item in highlights_raw: @@ -120,18 +128,7 @@ class VideoPipeline: windows.append(HighlightWindow(start=start, end=end, summary=summary)) if not windows: - last_end = ( - context.transcription.segments[-1].end - if context.transcription.segments - else 0 - ) - windows.append( - HighlightWindow( - start=0.0, - end=max(last_end, 10.0), - summary="Sem destaque identificado; fallback automatico.", - ) - ) + windows.append(self._build_fallback_highlight(context)) context.highlight_windows = windows @@ -148,6 +145,20 @@ class VideoPipeline: for window, title in zip(context.highlight_windows, titles): window.title = title.strip() + def _build_fallback_highlight(self, context: PipelineContext) -> HighlightWindow: + if not context.transcription: + raise RuntimeError("Transcricao nao disponivel para criar fallback") + + last_end = ( + context.transcription.segments[-1].end + if context.transcription.segments + else 0.0 + ) + return HighlightWindow( + start=0.0, + end=max(last_end, 10.0), + summary="Sem destaque identificado; fallback automatico.", + ) def _render_clips(self, context: PipelineContext) -> None: if not context.workspace or not context.highlight_windows or not context.transcription: diff --git a/video_render/rendering.py b/video_render/rendering.py index f09ab87..723f17d 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -267,6 +267,7 @@ class VideoRenderer: color=self.settings.rendering.base_color, method="caption", size=(frame_w - 160, top_h - 40), + align="center", ) .with_duration(duration) ) @@ -279,8 +280,18 @@ class VideoRenderer: caption_clips = [] caption_resources: List[ImageClip] = [] - margin = 20 - caption_y = max(0, video_y - self.captions.canvas_height - margin) + caption_area_top = frame_h - bottom_h + caption_area_height = bottom_h + caption_margin = 20 + raw_caption_y = caption_area_top + (caption_area_height - self.captions.canvas_height) // 2 + min_caption_y = caption_area_top + caption_margin + max_caption_y = ( + caption_area_top + caption_area_height - self.captions.canvas_height - caption_margin + ) + if max_caption_y < min_caption_y: + caption_y = min_caption_y + else: + caption_y = min(max(raw_caption_y, min_caption_y), max_caption_y) for clip_set in caption_sets: base_positioned = clip_set.base.with_position(("center", caption_y)) @@ -300,6 +311,7 @@ class VideoRenderer: font_size=self.settings.rendering.subtitle_font_size, color=self.settings.rendering.base_color, method="caption", + align="center", size=(frame_w - 160, max(40, self.captions.canvas_height)), ) .with_duration(duration) @@ -310,6 +322,9 @@ class VideoRenderer: [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], size=(frame_w, frame_h), ) + video_audio = video_clip.audio or resized_clip.audio or subclip.audio + if video_audio is not None: + composite = composite.set_audio(video_audio) output_path = output_dir / f"clip_{index:02d}.mp4" composite.write_videofile( From c18884e7780cb80f6f3c2267874f6ebadf6debe6 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Tue, 28 Oct 2025 17:34:13 -0300 Subject: [PATCH 11/15] Finaliza os ajustes para render de video --- video_render/messaging.py | 100 ++++++++--- video_render/pipeline.py | 9 + video_render/rendering.py | 302 +++++++++++++++++++++++++++++----- video_render/transcription.py | 72 +++++++- 4 files changed, 420 insertions(+), 63 deletions(-) diff --git a/video_render/messaging.py b/video_render/messaging.py index b61599c..d00283c 100644 --- a/video_render/messaging.py +++ b/video_render/messaging.py @@ -13,6 +13,22 @@ logger = logging.getLogger(__name__) MessageHandler = Callable[[Dict[str, Any]], Dict[str, Any]] +def _safe_ack( + channel: pika.adapters.blocking_connection.BlockingChannel, delivery_tag +) -> bool: + if not channel.is_open: + logger.warning( + "Canal fechado antes do ACK; mensagem sera reprocessada apos reconexao" + ) + return False + try: + channel.basic_ack(delivery_tag=delivery_tag) + return True + except Exception: + logger.exception("Falha ao confirmar mensagem") + return False + + class RabbitMQWorker: def __init__(self, settings: Settings) -> None: self.settings = settings @@ -27,50 +43,59 @@ class RabbitMQWorker: ) def consume_forever(self, handler: MessageHandler) -> None: - while True: try: with pika.BlockingConnection(self._params) as connection: channel = connection.channel() - channel.queue_declare(queue=self.settings.rabbitmq.consume_queue, durable=True) - channel.queue_declare(queue=self.settings.rabbitmq.publish_queue, durable=True) - channel.basic_qos(prefetch_count=self.settings.rabbitmq.prefetch_count) + channel.queue_declare( + queue=self.settings.rabbitmq.consume_queue, durable=True + ) + channel.queue_declare( + queue=self.settings.rabbitmq.publish_queue, durable=True + ) + channel.basic_qos( + prefetch_count=self.settings.rabbitmq.prefetch_count + ) - def _on_message(ch: pika.adapters.blocking_connection.BlockingChannel, method, properties, body): + def _on_message( + ch: pika.adapters.blocking_connection.BlockingChannel, + method, + properties, + body, + ) -> None: + """Consume message, ACK immediately, then process.""" try: message = json.loads(body) except json.JSONDecodeError: - logger.error("Mensagem inválida recebida: %s", body) - ch.basic_ack(delivery_tag=method.delivery_tag) + logger.error("Mensagem invalida recebida: %s", body) + _safe_ack(ch, method.delivery_tag) return - logger.info("Mensagem recebida: %s", message.get("filename", "")) + if not _safe_ack(ch, method.delivery_tag): + logger.warning( + "Nao foi possivel confirmar mensagem; abortando processamento" + ) + return + + logger.info( + "Mensagem recebida: %s", + message.get("filename", ""), + ) + try: response = handler(message) except Exception: - logger.exception("Erro não tratado durante o processamento") + logger.exception("Erro nao tratado durante o processamento") response = { "hasError": True, - "error": "Erro não tratado no pipeline", + "error": "Erro nao tratado no pipeline", "filename": message.get("filename"), "videoId": message.get("videoId"), "url": message.get("url"), "processedFiles": [], } - try: - payload = json.dumps(response) - ch.basic_publish( - exchange="", - routing_key=self.settings.rabbitmq.publish_queue, - body=payload, - properties=pika.BasicProperties(delivery_mode=2), - ) - logger.info("Resposta publicada para '%s'", self.settings.rabbitmq.publish_queue) - except Exception: - logger.exception("Falha ao publicar a resposta na fila de upload") - finally: - ch.basic_ack(delivery_tag=method.delivery_tag) + self._publish_response(response) channel.basic_consume( queue=self.settings.rabbitmq.consume_queue, @@ -80,7 +105,32 @@ class RabbitMQWorker: logger.info("Consumidor iniciado. Aguardando mensagens...") channel.start_consuming() except pika.exceptions.AMQPConnectionError: - logger.exception("Conexão com RabbitMQ perdida. Tentando reconectar...") + logger.exception( + "Conexao com RabbitMQ perdida. Tentando reconectar..." + ) + except pika.exceptions.AMQPError: + logger.exception("Erro AMQP inesperado. Reiniciando consumo...") except KeyboardInterrupt: - logger.info("Encerrando consumidor por interrupção do usuário.") + logger.info("Encerrando consumidor por interrupcao do usuario.") break + + def _publish_response(self, response: Dict[str, Any]) -> None: + payload = json.dumps(response) + try: + with pika.BlockingConnection(self._params) as publish_connection: + publish_channel = publish_connection.channel() + publish_channel.queue_declare( + queue=self.settings.rabbitmq.publish_queue, durable=True + ) + publish_channel.basic_publish( + exchange="", + routing_key=self.settings.rabbitmq.publish_queue, + body=payload, + properties=pika.BasicProperties(delivery_mode=2), + ) + logger.info( + "Resposta publicada para '%s'", + self.settings.rabbitmq.publish_queue, + ) + except Exception: + logger.exception("Falha ao publicar a resposta na fila de upload apos ACK") diff --git a/video_render/pipeline.py b/video_render/pipeline.py index c8e309e..4401771 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -93,6 +93,15 @@ class VideoPipeline: def _generate_transcription(self, context: PipelineContext) -> None: if not context.workspace: raise RuntimeError("Workspace não preparado") + existing = TranscriptionService.load(context.workspace.workspace_dir) + if existing: + logger.info( + "Transcricao existente encontrada em %s; reutilizando resultado", + context.workspace.workspace_dir, + ) + context.transcription = existing + return + transcription = self.transcriber.transcribe(context.workspace.audio_path) TranscriptionService.persist(transcription, context.workspace.workspace_dir) context.transcription = transcription diff --git a/video_render/rendering.py b/video_render/rendering.py index 723f17d..1a80b9a 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -3,9 +3,11 @@ from __future__ import annotations import logging import re from dataclasses import dataclass -from typing import Iterable, List, Sequence, Tuple +from typing import Dict, Iterable, List, Sequence, Tuple, Optional import numpy as np +from moviepy.audio.AudioClip import AudioArrayClip, AudioClip +from moviepy.audio.io.AudioFileClip import AudioFileClip from moviepy.video.VideoClip import ColorClip, ImageClip, TextClip from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip from moviepy.video.io.VideoFileClip import VideoFileClip @@ -199,6 +201,7 @@ class VideoRenderer: index=index, transcription=transcription, output_dir=output_dir, + source_path=workspace_path, ) finally: subclip.close() @@ -226,13 +229,14 @@ class VideoRenderer: index: int, transcription: TranscriptionResult, output_dir, + source_path: str, ) -> str: duration = end - start frame_w = self.settings.rendering.frame_width frame_h = self.settings.rendering.frame_height top_h = int(frame_h * 0.18) bottom_h = int(frame_h * 0.20) - video_area_h = frame_h - top_h - bottom_h + video_area_h = max(1, frame_h - top_h - bottom_h) scale_factor = min( frame_w / subclip.w, @@ -257,19 +261,12 @@ class VideoRenderer: .with_opacity(0.85) ) - title_text = title or summary - wrapped_title = self._wrap_text(title_text, max_width=frame_w - 160) - title_clip = ( - TextClip( - text=wrapped_title, - font=str(self.settings.rendering.font_path), - font_size=self.settings.rendering.title_font_size, - color=self.settings.rendering.base_color, - method="caption", - size=(frame_w - 160, top_h - 40), - align="center", - ) - .with_duration(duration) + title_clip = self._build_title_clip( + title=title, + summary=summary, + duration=duration, + frame_width=frame_w, + top_panel_height=top_h, ) title_clip = title_clip.with_position( ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2) @@ -305,43 +302,38 @@ class VideoRenderer: if not caption_clips: fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160) caption_clips.append( - TextClip( + self._make_textclip( text=fallback_text, - font=str(self.settings.rendering.font_path), + font_path=self.settings.rendering.font_path, font_size=self.settings.rendering.subtitle_font_size, color=self.settings.rendering.base_color, - method="caption", - align="center", size=(frame_w - 160, max(40, self.captions.canvas_height)), ) .with_duration(duration) .with_position(("center", caption_y)) ) + audio_clip, audio_needs_close = self._materialize_audio( + source_path=source_path, + start=start, + end=end, + duration=duration, + fallback_audio=video_clip.audio or resized_clip.audio or subclip.audio, + ) + composite = CompositeVideoClip( [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], size=(frame_w, frame_h), ) - video_audio = video_clip.audio or resized_clip.audio or subclip.audio - if video_audio is not None: - composite = composite.set_audio(video_audio) + if audio_clip is not None: + composite = self._with_audio(composite, audio_clip) output_path = output_dir / f"clip_{index:02d}.mp4" - composite.write_videofile( - str(output_path), - codec=self.settings.rendering.video_codec, - audio_codec=self.settings.rendering.audio_codec, - fps=self.settings.rendering.fps, - bitrate=self.settings.rendering.bitrate, - ffmpeg_params=[ - "-preset", - self.settings.rendering.preset, - "-pix_fmt", - "yuv420p", - ], - temp_audiofile=str(output_dir / f"temp_audio_{index:02d}.m4a"), - remove_temp=True, - threads=4, + self._write_with_fallback( + composite=composite, + output_path=output_path, + index=index, + output_dir=output_dir, ) composite.close() @@ -355,9 +347,128 @@ class VideoRenderer: clip.close() for clip in caption_resources: clip.close() + if audio_clip is not None and audio_needs_close: + audio_clip.close() return str(output_path) + def _build_title_clip( + self, + *, + title: str, + summary: str, + duration: float, + frame_width: int, + top_panel_height: int, + ) -> ImageClip: + text = (title or summary or "").strip() + if not text: + text = summary or "" + + max_width = max(200, frame_width - 160) + font_size = self.settings.rendering.title_font_size + min_font_size = max(28, int(font_size * 0.6)) + target_height = max(80, top_panel_height - 40) + title_color = ImageColor.getrgb(self.settings.rendering.base_color) + font_path = self.settings.rendering.font_path + + while True: + font = ImageFont.truetype(str(font_path), font_size) + lines = self._split_title_lines(text, font, max_width) + line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1] + spacing = max(4, int(line_height * 0.25)) + text_height = self._measure_text_height(len(lines), line_height, spacing) + + if text_height <= target_height or font_size <= min_font_size: + break + + font_size = max(min_font_size, font_size - 6) + + # Recompute dimensions with final font size to ensure consistency + font = ImageFont.truetype(str(font_path), font_size) + lines = self._split_title_lines(text, font, max_width) + line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1] + spacing = max(4, int(line_height * 0.25)) + text_height = self._measure_text_height(len(lines), line_height, spacing) + canvas_height = max(1, text_height) + + image = Image.new("RGBA", (max_width, canvas_height), (0, 0, 0, 0)) + draw = ImageDraw.Draw(image) + y = 0 + for idx, line in enumerate(lines): + bbox = font.getbbox(line) + line_width = bbox[2] - bbox[0] + x = max(0, (max_width - line_width) // 2) + draw.text((x, y - bbox[1]), line, font=font, fill=title_color) + y += line_height + if idx < len(lines) - 1: + y += spacing + + return ImageClip(np.array(image)).with_duration(duration) + + @staticmethod + def _measure_text_height(line_count: int, line_height: int, spacing: int) -> int: + if line_count <= 0: + return line_height + return line_count * line_height + max(0, line_count - 1) * spacing + + @staticmethod + def _split_title_lines( + text: str, font: ImageFont.FreeTypeFont, max_width: int + ) -> List[str]: + words = text.split() + if not words: + return [""] + + lines: List[str] = [] + current: List[str] = [] + for word in words: + test_line = " ".join(current + [word]) if current else word + bbox = font.getbbox(test_line) + line_width = bbox[2] - bbox[0] + if line_width <= max_width or not current: + current.append(word) + if line_width > max_width and not current[:-1]: + lines.append(" ".join(current)) + current = [] + continue + + lines.append(" ".join(current)) + current = [word] + + if current: + lines.append(" ".join(current)) + + return lines + + def _materialize_audio( + self, + *, + source_path: str, + start: float, + end: float, + duration: float, + fallback_audio, + ) -> Tuple[Optional[AudioClip], bool]: + try: + with AudioFileClip(source_path) as audio_file: + segment = audio_file.subclipped(start, end) + fps = ( + getattr(segment, "fps", None) + or getattr(audio_file, "fps", None) + or 44100 + ) + samples = segment.to_soundarray(fps=fps) + except Exception: + logger.warning( + "Falha ao carregar audio independente; utilizando fluxo original", + exc_info=True, + ) + return fallback_audio, False + + audio_clip = AudioArrayClip(samples, fps=fps).with_duration(duration) + return audio_clip, True + def _collect_words( self, transcription: TranscriptionResult, start: float, end: float ) -> List[WordTiming]: @@ -424,3 +535,120 @@ class VideoRenderer: if current: lines.append(" ".join(current)) return "\n".join(lines) + + def _write_with_fallback( + self, + *, + composite: CompositeVideoClip, + output_path, + index: int, + output_dir, + ) -> None: + attempts = self._encoding_attempts() + temp_audio_path = output_dir / f"temp_audio_{index:02d}.m4a" + last_error: Exception | None = None + + for attempt in attempts: + codec = attempt["codec"] + bitrate = attempt["bitrate"] + preset = attempt["preset"] + + ffmpeg_params = ["-pix_fmt", "yuv420p"] + if preset: + ffmpeg_params = ["-preset", preset, "-pix_fmt", "yuv420p"] + + try: + logger.info( + "Renderizando clip %02d com codec %s (bitrate=%s, preset=%s)", + index, + codec, + bitrate, + preset or "default", + ) + composite.write_videofile( + str(output_path), + codec=codec, + audio_codec=self.settings.rendering.audio_codec, + fps=self.settings.rendering.fps, + bitrate=bitrate, + ffmpeg_params=ffmpeg_params, + temp_audiofile=str(temp_audio_path), + remove_temp=True, + threads=4, + ) + return + except Exception as exc: # noqa: BLE001 - propagate after fallbacks + last_error = exc + logger.warning( + "Falha ao renderizar com codec %s: %s", codec, exc, exc_info=True + ) + if output_path.exists(): + output_path.unlink(missing_ok=True) + if temp_audio_path.exists(): + temp_audio_path.unlink(missing_ok=True) + + raise RuntimeError("Todas as tentativas de renderizacao falharam") from last_error + + def _encoding_attempts(self) -> List[Dict[str, str | None]]: + settings = self.settings.rendering + attempts: List[Dict[str, str | None]] = [] + + attempts.append( + { + "codec": settings.video_codec, + "bitrate": settings.bitrate, + "preset": settings.preset, + } + ) + + deduped: List[Dict[str, str | None]] = [] + seen = set() + for attempt in attempts: + key = (attempt["codec"], attempt["bitrate"], attempt["preset"]) + if key in seen: + continue + seen.add(key) + deduped.append(attempt) + + return deduped + + @staticmethod + def _with_audio( + composite: CompositeVideoClip, + audio_clip, + ) -> CompositeVideoClip: + """Attach audio to a composite clip across MoviePy versions.""" + if hasattr(composite, "with_audio"): + return composite.with_audio(audio_clip) + if hasattr(composite, "set_audio"): + return composite.set_audio(audio_clip) + raise AttributeError("CompositeVideoClip does not support audio assignment") + + @staticmethod + def _make_textclip( + *, + text: str, + font_path, + font_size: int, + color: str, + size: Tuple[int, int], + ) -> TextClip: + """Create a TextClip compatible with MoviePy 1.x and 2.x. + + MoviePy 2.x removed the 'align' keyword from TextClip. We try with + 'align' for older versions and fall back to a call without it when + unsupported. + """ + kwargs = dict( + text=text, + font=str(font_path), + font_size=font_size, + color=color, + method="caption", + size=size, + ) + try: + return TextClip(**kwargs, align="center") # MoviePy 1.x style + except TypeError: + logger.debug("TextClip 'align' not supported; falling back without it") + return TextClip(**kwargs) # MoviePy 2.x style diff --git a/video_render/transcription.py b/video_render/transcription.py index b5d86db..a175659 100644 --- a/video_render/transcription.py +++ b/video_render/transcription.py @@ -118,5 +118,75 @@ class TranscriptionService: with text_path.open("w", encoding="utf-8") as fp: fp.write(result.full_text) - logger.info("Transcrição salva em %s", destination) + logger.info("Transcricao salva em %s", destination) + + @staticmethod + def load(source: Path) -> Optional[TranscriptionResult]: + json_path = source / "transcription.json" + if not json_path.exists(): + return None + + try: + with json_path.open("r", encoding="utf-8") as fp: + payload = json.load(fp) + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "Falha ao carregar transcricao existente de %s: %s", json_path, exc + ) + return None + + segments_payload = payload.get("segments", []) + if not isinstance(segments_payload, list): + logger.warning( + "Formato inesperado ao carregar transcricao de %s: 'segments' invalido", + json_path, + ) + return None + + segments: List[TranscriptSegment] = [] + for idx, segment_data in enumerate(segments_payload): + if not isinstance(segment_data, dict): + logger.debug("Segmento invalido ignorado ao carregar: %s", segment_data) + continue + try: + segment_id = int(segment_data.get("id", idx)) + start = float(segment_data["start"]) + end = float(segment_data["end"]) + except (KeyError, TypeError, ValueError): + logger.debug("Segmento sem dados obrigatorios ignorado: %s", segment_data) + continue + + text = str(segment_data.get("text", "")).strip() + words_payload = segment_data.get("words", []) + words: List[WordTiming] = [] + + if isinstance(words_payload, list): + for word_data in words_payload: + if not isinstance(word_data, dict): + continue + try: + w_start = float(word_data["start"]) + w_end = float(word_data["end"]) + except (KeyError, TypeError, ValueError): + logger.debug( + "Palavra sem dados obrigatorios ignorada: %s", word_data + ) + continue + word_text = str(word_data.get("text", "")).strip() + if not word_text: + continue + words.append(WordTiming(start=w_start, end=w_end, word=word_text)) + + segments.append( + TranscriptSegment( + id=segment_id, + start=start, + end=end, + text=text, + words=words, + ) + ) + + full_text = str(payload.get("full_text", "")).strip() + return TranscriptionResult(segments=segments, full_text=full_text) From 8abb8001d72cfa29eac1a1c2007661d22deaf59e Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 29 Oct 2025 08:27:02 -0300 Subject: [PATCH 12/15] Ajusta configs do compose --- docker-compose.yml | 41 ++++++++++++----------------------------- video_render/config.py | 2 +- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 338e355..0ebf9cb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,35 +3,18 @@ services: restart: unless-stopped build: . environment: - # - RABBITMQ_PASS=${RABBITMQ_PASS} - - RABBITMQ_PASS=L@l321321321 - - RABBITMQ_HOST=154.12.229.181 - - RABBITMQ_PORT=32790 - # - GEMINI_API_KEY=${GEMINI_API_KEY} - - GEMINI_API_KEY=AIzaSyB5TPjSPPZG1Qb6EtblhKFAjvCOdY15rcw - - GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash} - # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - - OPENROUTER_API_KEY=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 - - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} - - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-small} + - FASTER_WHISPER_MODEL_SIZE=medium + - GEMINI_MODEL=gemini-2.5-flash + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - OPENROUTER_MODEL=openai/gpt-oss-20b:free + - RABBITMQ_PASS=${RABBITMQ_PASS} volumes: - # - "/root/videos:/app/videos" - # - "/root/outputs:/app/outputs" - - "./videos:/app/videos" - - "./outputs:/app/outputs" + - "/root/videos:/app/videos" + - "/root/outputs:/app/outputs" command: "python -u main.py" - # runtime: nvidia + networks: + - dokploy-network - # networks: - # - dokploy-network - - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: all - # capabilities: [gpu] -# networks: -# dokploy-network: -# external: true +networks: + dokploy-network: + external: true diff --git a/video_render/config.py b/video_render/config.py index 4940751..8f346ad 100644 --- a/video_render/config.py +++ b/video_render/config.py @@ -27,7 +27,7 @@ class RabbitMQSettings: @dataclass(frozen=True) class GeminiSettings: api_key: str = os.environ.get("GEMINI_API_KEY", "") - model: str = os.environ.get("GEMINI_MODEL", "gemini-2.5-pro") + model: str = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS") temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2)) top_k: int | None = ( From ae8b228ea141dcb7c31e743f7542e5589c80f825 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 29 Oct 2025 08:34:57 -0300 Subject: [PATCH 13/15] Add gemini api key env --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 0ebf9cb..b9264d9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,7 @@ services: build: . environment: - FASTER_WHISPER_MODEL_SIZE=medium + - GEMINI_API_KEY=${GEMINI_API_KEY} - GEMINI_MODEL=gemini-2.5-flash - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - OPENROUTER_MODEL=openai/gpt-oss-20b:free From 87c6a5e27c7ab19d648e1716a27177d82d26a864 Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 29 Oct 2025 23:58:06 -0300 Subject: [PATCH 14/15] Adiciona limpeza de arquivos apos sucesso ou falha --- video_render/media.py | 8 ++++---- video_render/pipeline.py | 32 ++++++++++++++++---------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/video_render/media.py b/video_render/media.py index a79dd4f..7fb878e 100644 --- a/video_render/media.py +++ b/video_render/media.py @@ -38,7 +38,7 @@ class MediaPreparer: existing_children = list(workspace_dir.iterdir()) if existing_children: logger.info("Limpando workspace existente para %s", sanitized_name) - # remove_paths(existing_children) + remove_paths(existing_children) destination_name = f"{sanitized_name}{source_path.suffix.lower()}" working_video_path = workspace_dir / destination_name @@ -46,9 +46,9 @@ class MediaPreparer: logger.info("Cópia do vídeo criada em %s", working_video_path) output_dir = ensure_workspace(self.settings.outputs_dir, sanitized_name) - # existing_outputs = list(output_dir.iterdir()) - # if existing_outputs: - # remove_paths(existing_outputs) + existing_outputs = list(output_dir.iterdir()) + if existing_outputs: + remove_paths(existing_outputs) audio_path = workspace_dir / "audio.wav" extract_audio_to_wav(working_video_path, audio_path) diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 4401771..3c4f348 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -221,24 +221,24 @@ class VideoPipeline: def _handle_failure(self, context: PipelineContext, exc: Exception) -> Dict[str, Any]: logger.error("Erro na pipeline: %s", exc) - # cleanup_targets: List[Path] = [] + cleanup_targets: List[Path] = [] - # if context.workspace: - # cleanup_targets.append(context.workspace.workspace_dir) - # cleanup_targets.append(context.workspace.output_dir) - # original_path = context.workspace.source_path - # if original_path.exists(): - # cleanup_targets.append(original_path) - # else: - # sanitized = sanitize_filename(Path(context.job.filename).stem) - # job_output_dir = self.settings.outputs_dir / sanitized - # if job_output_dir.exists(): - # cleanup_targets.append(job_output_dir) - # original_path = self.settings.videos_dir / context.job.filename - # if original_path.exists(): - # cleanup_targets.append(original_path) + if context.workspace: + cleanup_targets.append(context.workspace.workspace_dir) + cleanup_targets.append(context.workspace.output_dir) + original_path = context.workspace.source_path + if original_path.exists(): + cleanup_targets.append(original_path) + else: + sanitized = sanitize_filename(Path(context.job.filename).stem) + job_output_dir = self.settings.outputs_dir / sanitized + if job_output_dir.exists(): + cleanup_targets.append(job_output_dir) + original_path = self.settings.videos_dir / context.job.filename + if original_path.exists(): + cleanup_targets.append(original_path) - # remove_paths(cleanup_targets) + remove_paths(cleanup_targets) return { "hasError": True, From c5d3e83a5f8f37da0f633ce17567e6dc7a987e6c Mon Sep 17 00:00:00 2001 From: LeoMortari Date: Wed, 12 Nov 2025 11:38:09 -0300 Subject: [PATCH 15/15] #v2 - Inicia testes da v2 - Adiciona rastreamento de objetos - Facial detection - Legenda interativa - Cortes mais precisos - Refinamento do Prompt --- .env.example | 47 ++ .gitignore | 3 +- docker-compose.yml | 13 +- dockerfile | 3 + main.py | 14 + prompts/generate.txt | 107 +++-- requirements.txt | 4 +- video_render/config.py | 34 +- video_render/context_detection.py | 398 +++++++++++++++++ video_render/llm.py | 221 ++++++---- video_render/media.py | 18 + video_render/pipeline.py | 37 +- video_render/rendering.py | 457 +++++++++++++------- video_render/smart_framing.py | 687 ++++++++++++++++++++++++++++++ video_render/transcription.py | 9 +- 15 files changed, 1739 insertions(+), 313 deletions(-) create mode 100644 .env.example create mode 100644 video_render/context_detection.py create mode 100644 video_render/smart_framing.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..45d20c8 --- /dev/null +++ b/.env.example @@ -0,0 +1,47 @@ +RABBITMQ_HOST=rabbitmq +RABBITMQ_PORT=5672 +RABBITMQ_USER=admin +RABBITMQ_PASS=your_password_here +RABBITMQ_QUEUE=to-render +RABBITMQ_UPLOAD_QUEUE=to-upload +RABBITMQ_PREFETCH=1 +RABBITMQ_HEARTBEAT=60 +RABBITMQ_BLOCKED_TIMEOUT=300 +OPENROUTER_API_URL=https://openrouter.ai/api/v1/chat/completions +OPENROUTER_API_KEY=your_openrouter_api_key_here + +# Model selection - Recommended options: +# - openai/gpt-oss-20b:free (Free tier, good quality) +# - qwen/qwen-2.5-72b-instruct:free (Free, excellent reasoning) +# - google/gemini-pro-1.5 (Best cost-benefit for podcasts) +# - anthropic/claude-3.5-sonnet (Premium quality, best reasoning) +OPENROUTER_MODEL=qwen/qwen-2.5-72b-instruct:free +OPENROUTER_TEMPERATURE=0.6 +OPENROUTER_PROMPT_PATH=prompts/generate.txt + +FASTER_WHISPER_MODEL_SIZE=medium +FASTER_WHISPER_DEVICE=auto + +RENDER_WIDTH=1080 +RENDER_HEIGHT=1920 + +RENDER_FPS=30 +RENDER_CODEC=libx264 +RENDER_AUDIO_CODEC=aac +RENDER_BITRATE=5000k +RENDER_PRESET=faster + +SUBTITLE_HIGHLIGHT_COLOR=#00FF00 +SUBTITLE_BASE_COLOR=#FFFFFF + +RENDER_FONT_PATH=./Montserrat.ttf +RENDER_TITLE_FONT_SIZE=110 +RENDER_SUBTITLE_FONT_SIZE=64 + +CAPTION_MIN_WORDS=2 +CAPTION_MAX_WORDS=2 + +ENABLE_SMART_FRAMING=true +SMART_FRAMING_MIN_CONFIDENCE=0.5 +SMART_FRAMING_SMOOTHING_WINDOW=20 +SMART_FRAMING_FRAME_SKIP=2 diff --git a/.gitignore b/.gitignore index 7a2b6cf..133b8c8 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ outputs/ # Ignore virtual envs venv/ env/ - +.claude # Ignore editor files .idea/ *.swp @@ -31,3 +31,4 @@ env/ # Ignore mypy and pylint cache .mypy_cache/ .pylint.d/ +CLAUDE.MD diff --git a/docker-compose.yml b/docker-compose.yml index b9264d9..628ee37 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,15 +3,18 @@ services: restart: unless-stopped build: . environment: - - FASTER_WHISPER_MODEL_SIZE=medium - - GEMINI_API_KEY=${GEMINI_API_KEY} - - GEMINI_MODEL=gemini-2.5-flash - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - - OPENROUTER_MODEL=openai/gpt-oss-20b:free - RABBITMQ_PASS=${RABBITMQ_PASS} + - OPENROUTER_API_URL=${OPENROUTER_API_URL:-https://openrouter.ai/api/v1/chat/completions} + - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-oss-20b:free} + - OPENROUTER_PROMPT_PATH=${OPENROUTER_PROMPT_PATH:-prompts/generate.txt} + - FASTER_WHISPER_MODEL_SIZE=${FASTER_WHISPER_MODEL_SIZE:-medium} volumes: - "/root/videos:/app/videos" - "/root/outputs:/app/outputs" + - "/root/prompts:/app/prompts" + # - "./videos:/app/videos" + # - "./outputs:/app/outputs" command: "python -u main.py" networks: - dokploy-network diff --git a/dockerfile b/dockerfile index d146341..15bb4b8 100644 --- a/dockerfile +++ b/dockerfile @@ -23,6 +23,9 @@ RUN apt-get update && \ imagemagick \ fonts-liberation \ wget \ + libsm6 \ + libxext6 \ + libxrender-dev \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . diff --git a/main.py b/main.py index 1ef531b..1b5cb5a 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,17 @@ +import os +import warnings + +# Suppress FFmpeg/AV1 warnings for cleaner logs +os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'loglevel;quiet' +os.environ['OPENCV_LOG_LEVEL'] = 'ERROR' + +# Suppress MoviePy verbose logging +os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1' + +# Filter deprecation warnings +warnings.filterwarnings('ignore', category=DeprecationWarning) +warnings.filterwarnings('ignore', category=UserWarning, module='moviepy') + from video_render.config import load_settings from video_render.logging_utils import setup_logging from video_render.messaging import RabbitMQWorker diff --git a/prompts/generate.txt b/prompts/generate.txt index 2ab45f9..8638af2 100644 --- a/prompts/generate.txt +++ b/prompts/generate.txt @@ -1,36 +1,85 @@ -Voce e um estrategista de conteudo especializado em identificar cortes curtos de videos longos que performam bem em redes sociais. +Voce e especialista em viralidade de redes sociais (TikTok, Instagram Reels, YouTube Shorts). Analise a transcricao e selecione trechos com MAXIMO potencial viral, priorizando qualidade sobre quantidade. -FUNCAO: -- Analisar a transcricao completa de um video. -- Escolher trechos curtos (entre 60s e 90s) com maior chance de engajamento. -- O inicio do trecho deve ter um hook para engajar e prender a atenção do espectador. -- Responder APENAS em JSON valido. +PROCESSO DE ANALISE: +1. Mapear potenciais trechos na transcricao +2. Avaliar cada trecho usando sistema de pontuacao abaixo +3. Rankear do maior para menor score viral +4. Selecionar apenas os top-ranked baseado na duracao do video -FORMATO DA RESPOSTA: -{ - "highlights": [ - { - "start": , - "end": , - "summary": "Resumo conciso do porque este trecho engaja" - } - ] -} +SISTEMA DE PONTUACAO VIRAL (0-100 pontos): -REGRAS: -- Liste no maximo 6 destaques. -- Respeite a ordem cronologica. -- Nunca deixe listas vazias; se nada for relevante, inclua uma entrada com start = 0, end = 0 e summary explicando a ausencia de cortes. -- Utilize apenas valores numericos simples (ponto como separador decimal). -- Nao repita um mesmo trecho. +HOOK/ABERTURA (0-25 pontos): +[25] Frase choqueante, pergunta polemica ou promessa ousada +[20] Historia intrigante ou situacao inusitada +[15] Afirmacao interessante mas previsivel +[10] Introducao generica mas aceitavel +[0] "Oi", "entao", silencio ou conteudo fraco -PERSPECTIVA DE ANALISE: -- Concentre-se em momentos com gatilhos emocionais, insights, storytelling ou chamadas para acao fortes. -- Prefira trechos com comeco, meio e fim claros. -- Evite partes redundantes, silenciosas ou extremamente tecnicas. +GATILHO EMOCIONAL (0-25 pontos): +[25] Emocao extrema: raiva, choque, riso intenso, inspiracao profunda +[20] Emocao forte: surpresa, indignacao, humor, curiosidade intensa +[15] Emocao moderada: interesse, leve humor, curiosidade +[10] Emocao fraca: informativo sem impacto emocional +[0] Monotono, tecnico, sem apelo emocional + +VALOR/UTILIDADE (0-20 pontos): +[20] Segredo valioso, insight transformador ou informacao exclusiva +[15] Ensina algo pratico e imediatamente aplicavel +[10] Opiniao interessante ou perspectiva util +[5] Informacao generica ou conhecimento comum +[0] Nenhum valor pratico, puro enrolation + +ESTRUTURA NARRATIVA (0-15 pontos): +[15] Historia completa com inicio, conflito/climax e resolucao +[10] Segmento com comeco e fim coerentes +[5] Trecho com sentido mas cortado abruptamente +[0] Fragmento sem contexto ou conclusao + +RITMO E ENERGIA (0-15 pontos): +[15] Dinamico, sem pausas, alta energia, palavras impactantes +[10] Bom ritmo com pausas naturais curtas +[5] Ritmo lento mas aceitavel +[0] Muitas pausas, hesitacoes, monotonia, silencio + +REGRAS DE QUANTIDADE: +5-10 min: 3 clipes (minimo 1 se score alto) +10-20 min: 4 clipes +20-30 min: 5 clipes +30+ min: 6 clipes (maximo absoluto) + +IMPORTANTE: Priorize qualidade. Melhor 3 clipes score 80+ que 6 clipes score 50. Se poucos momentos virais, retorne apenas os melhores (minimo 1). + +CRITERIOS DE SELECAO: +- Score viral maior ou igual 60 pontos (idealmente maior ou igual 70) +- Duracao ideal: 60-90s +- Duracao minima: 60s | Duracao maxima: 120s +- Sem sobreposicao (end de um menor que start do proximo) +- Inicio e fim coerentes + +EVITE: +- Introducoes genericas +- Trechos com silencio/pausas maiores que 3s +- Explicacoes tecnicas sem gancho emocional +- Segmentos sem conclusao +- Momentos de transicao + +FORMATO JSON (retorne APENAS isto): +{"highlights":[{"start":,"end":,"summary":"Score estimado e gatilhos principais"}]} + +REGRAS TECNICAS: +- Float com ponto decimal (45.5 NAO 45,5) +- Timestamps exatos dos segments fornecidos +- Ordem cronologica (start crescente) +- Minimo 1, maximo 6 highlights +- Summary conciso (1-2 frases) TAREFA: -- Leia a transcricao recebida no campo "transcript". -- Use a lista de marcas de tempo detalhadas no campo "segments" para embasar suas escolhas. -- Produza a saida JSON descrita acima. +1. Leia transcricao e timestamps +2. Avalie e pontue trechos mentalmente +3. Rankear por score viral +4. Selecione top-ranked baseado na duracao +5. Retorne JSON +6. Se video fraco, retorne pelo menos 1 highlight + +Objetivo: MAXIMIZAR chance de viralizar. Seja criterioso, apenas melhores trechos. diff --git a/requirements.txt b/requirements.txt index f38966b..758aa59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ numpy>=1.26.0 requests pika faster-whisper==1.2.0 -google-genai +mediapipe==0.10.18 +opencv-python==4.10.0.84 +scipy>=1.11.0 diff --git a/video_render/config.py b/video_render/config.py index 8f346ad..547d10b 100644 --- a/video_render/config.py +++ b/video_render/config.py @@ -13,6 +13,8 @@ TEMP_ROOT = BASE_DIR / "temp" @dataclass(frozen=True) class RabbitMQSettings: + # host: str = os.environ.get("RABBITMQ_HOST", "154.12.229.181") + # port: int = int(os.environ.get("RABBITMQ_PORT", 32790)) host: str = os.environ.get("RABBITMQ_HOST", "rabbitmq") port: int = int(os.environ.get("RABBITMQ_PORT", 5672)) user: str = os.environ.get("RABBITMQ_USER", "admin") @@ -24,33 +26,19 @@ class RabbitMQSettings: blocked_timeout: int = int(os.environ.get("RABBITMQ_BLOCKED_TIMEOUT", 300)) -@dataclass(frozen=True) -class GeminiSettings: - api_key: str = os.environ.get("GEMINI_API_KEY", "") - model: str = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash") - safety_settings: str | None = os.environ.get("GEMINI_SAFETY_SETTINGS") - temperature: float = float(os.environ.get("GEMINI_TEMPERATURE", 0.2)) - top_k: int | None = ( - int(os.environ["GEMINI_TOP_K"]) if os.environ.get("GEMINI_TOP_K") else None - ) - top_p: float | None = ( - float(os.environ["GEMINI_TOP_P"]) if os.environ.get("GEMINI_TOP_P") else None - ) - prompt_path: str = os.environ.get("GEMINI_PROMPT_PATH", "prompts/generate.txt") - - @dataclass(frozen=True) class OpenRouterSettings: - api_key: str = os.environ.get("OPENROUTER_API_KEY", "") + api_key: str = os.environ.get("OPENROUTER_API_KEY", "https://openrouter.ai/api/v1/chat/completions") model: str = os.environ.get( "OPENROUTER_MODEL", "openai/gpt-oss-20b:free" ) temperature: float = float(os.environ.get("OPENROUTER_TEMPERATURE", 0.6)) + prompt_path: str = os.environ.get("OPENROUTER_PROMPT_PATH", "prompts/generate.txt") @dataclass(frozen=True) class WhisperSettings: - model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "small") + model_size: str = os.environ.get("FASTER_WHISPER_MODEL_SIZE", "medium") device: str | None = os.environ.get("FASTER_WHISPER_DEVICE") compute_type: str | None = os.environ.get("FASTER_WHISPER_COMPUTE_TYPE") download_root: Path = Path( @@ -67,19 +55,23 @@ class RenderingSettings: audio_codec: str = os.environ.get("RENDER_AUDIO_CODEC", "aac") bitrate: str = os.environ.get("RENDER_BITRATE", "5000k") preset: str = os.environ.get("RENDER_PRESET", "faster") - highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#FFD200") + highlight_color: str = os.environ.get("SUBTITLE_HIGHLIGHT_COLOR", "#00FF00") base_color: str = os.environ.get("SUBTITLE_BASE_COLOR", "#FFFFFF") font_path: Path = Path(os.environ.get("RENDER_FONT_PATH", "./Montserrat.ttf")) title_font_size: int = int(os.environ.get("RENDER_TITLE_FONT_SIZE", 110)) subtitle_font_size: int = int(os.environ.get("RENDER_SUBTITLE_FONT_SIZE", 64)) - caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 3)) - caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 4)) + caption_min_words: int = int(os.environ.get("CAPTION_MIN_WORDS", 2)) + caption_max_words: int = int(os.environ.get("CAPTION_MAX_WORDS", 2)) + # Smart framing settings + enable_smart_framing: bool = os.environ.get("ENABLE_SMART_FRAMING", "true").lower() in ("true", "1", "yes") + smart_framing_min_confidence: float = float(os.environ.get("SMART_FRAMING_MIN_CONFIDENCE", 0.5)) + smart_framing_smoothing_window: int = int(os.environ.get("SMART_FRAMING_SMOOTHING_WINDOW", 20)) + smart_framing_frame_skip: int = int(os.environ.get("SMART_FRAMING_FRAME_SKIP", 2)) # Process every Nth frame (CPU optimization) @dataclass(frozen=True) class Settings: rabbitmq: RabbitMQSettings = RabbitMQSettings() - gemini: GeminiSettings = GeminiSettings() openrouter: OpenRouterSettings = OpenRouterSettings() whisper: WhisperSettings = WhisperSettings() rendering: RenderingSettings = RenderingSettings() diff --git a/video_render/context_detection.py b/video_render/context_detection.py new file mode 100644 index 0000000..e342b4c --- /dev/null +++ b/video_render/context_detection.py @@ -0,0 +1,398 @@ +""" +Context detection module for video analysis. + +This module provides functionality to detect faces, track people, +and identify who is speaking in video content using MediaPipe and audio analysis. +""" +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import cv2 +import mediapipe as mp +import numpy as np +from scipy import signal + +logger = logging.getLogger(__name__) + + +@dataclass +class FaceDetection: + """Represents a detected face in a frame.""" + x: int + y: int + width: int + height: int + confidence: float + center_x: int + center_y: int + landmarks: Optional[List[Tuple[int, int]]] = None + + +@dataclass +class PersonTracking: + """Tracks a person across frames.""" + person_id: int + face: FaceDetection + is_speaking: bool + speaking_confidence: float + frame_number: int + + +@dataclass +class FrameContext: + """Context information for a video frame.""" + frame_number: int + timestamp: float + detected_faces: List[FaceDetection] + active_speakers: List[int] # indices of speaking faces + primary_focus: Optional[Tuple[int, int]] # (x, y) center point + layout_mode: str # "single", "dual_split", "grid" + + +class MediaPipeDetector: + """Face and pose detection using MediaPipe.""" + + def __init__(self, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5): + self.min_detection_confidence = min_detection_confidence + self.min_tracking_confidence = min_tracking_confidence + self.mp_face_detection = mp.solutions.face_detection + self.mp_face_mesh = mp.solutions.face_mesh + + self.face_detection = self.mp_face_detection.FaceDetection( + min_detection_confidence=min_detection_confidence, + model_selection=1 + ) + + self.face_mesh = self.mp_face_mesh.FaceMesh( + max_num_faces=5, + min_detection_confidence=min_detection_confidence, + min_tracking_confidence=min_tracking_confidence, + static_image_mode=False + ) + + logger.info("MediaPipe detector initialized") + + def detect_faces(self, frame: np.ndarray) -> List[FaceDetection]: + """ + Detect faces in a frame. + + Args: + frame: RGB image array + + Returns: + List of detected faces + """ + height, width = frame.shape[:2] + + if len(frame.shape) == 2: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + elif frame.shape[2] == 4: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGRA2RGB) + else: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + results = self.face_detection.process(frame_rgb) + + faces = [] + if results.detections: + for detection in results.detections: + bbox = detection.location_data.relative_bounding_box + + x = int(bbox.xmin * width) + y = int(bbox.ymin * height) + w = int(bbox.width * width) + h = int(bbox.height * height) + + x = max(0, min(x, width - 1)) + y = max(0, min(y, height - 1)) + w = min(w, width - x) + h = min(h, height - y) + + center_x = x + w // 2 + center_y = y + h // 2 + + confidence = detection.score[0] if detection.score else 0.0 + + faces.append(FaceDetection( + x=x, + y=y, + width=w, + height=h, + confidence=confidence, + center_x=center_x, + center_y=center_y + )) + + return faces + + def detect_face_landmarks(self, frame: np.ndarray) -> List[FaceDetection]: + """ + Detect faces with landmarks for lip sync detection. + + Args: + frame: RGB image array + + Returns: + List of detected faces with landmark information + """ + height, width = frame.shape[:2] + + if len(frame.shape) == 2: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) + elif frame.shape[2] == 4: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGRA2RGB) + else: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + results = self.face_mesh.process(frame_rgb) + + faces = [] + if results.multi_face_landmarks: + for face_landmarks in results.multi_face_landmarks: + xs = [lm.x for lm in face_landmarks.landmark] + ys = [lm.y for lm in face_landmarks.landmark] + + x_min, x_max = min(xs), max(xs) + y_min, y_max = min(ys), max(ys) + + x = int(x_min * width) + y = int(y_min * height) + w = int((x_max - x_min) * width) + h = int((y_max - y_min) * height) + + center_x = x + w // 2 + center_y = y + h // 2 + + lip_landmarks = [] + for idx in [13, 14, 78, 308]: + lm = face_landmarks.landmark[idx] + lip_landmarks.append((int(lm.x * width), int(lm.y * height))) + + faces.append(FaceDetection( + x=x, + y=y, + width=w, + height=h, + confidence=1.0, + center_x=center_x, + center_y=center_y, + landmarks=lip_landmarks + )) + + return faces + + def close(self): + """Release MediaPipe resources.""" + self.face_detection.close() + self.face_mesh.close() + + +class AudioActivityDetector: + """Detects speech activity in audio.""" + + def __init__(self, sample_rate: int = 44100, frame_duration_ms: int = 30): + self.sample_rate = sample_rate + self.frame_duration_ms = frame_duration_ms + self.frame_size = int(sample_rate * frame_duration_ms / 1000) + + logger.info(f"Audio activity detector initialized (sr={sample_rate}, frame={frame_duration_ms}ms)") + + def detect_speaking_periods( + self, + audio_samples: np.ndarray, + threshold: float = 0.02, + min_speech_duration: float = 0.1 + ) -> List[Tuple[float, float]]: + """ + Detect periods of speech in audio. + + Args: + audio_samples: Audio samples array + threshold: Energy threshold for speech detection + min_speech_duration: Minimum duration of speech in seconds + + Returns: + List of (start_time, end_time) tuples in seconds + """ + if audio_samples.ndim > 1: + audio_samples = audio_samples.mean(axis=1) + + energies = [] + for i in range(0, len(audio_samples), self.frame_size): + frame = audio_samples[i:i + self.frame_size] + if len(frame) > 0: + energy = np.sqrt(np.mean(frame ** 2)) + energies.append(energy) + + speaking_frames = [e > threshold for e in energies] + + periods = [] + start_frame = None + + for i, is_speaking in enumerate(speaking_frames): + if is_speaking and start_frame is None: + start_frame = i + elif not is_speaking and start_frame is not None: + start_time = start_frame * self.frame_duration_ms / 1000 + end_time = i * self.frame_duration_ms / 1000 + + if end_time - start_time >= min_speech_duration: + periods.append((start_time, end_time)) + + start_frame = None + + if start_frame is not None: + start_time = start_frame * self.frame_duration_ms / 1000 + end_time = len(speaking_frames) * self.frame_duration_ms / 1000 + if end_time - start_time >= min_speech_duration: + periods.append((start_time, end_time)) + + return periods + + def is_speaking_at_time(self, speaking_periods: List[Tuple[float, float]], time: float) -> bool: + """Check if there is speech activity at a given time.""" + for start, end in speaking_periods: + if start <= time <= end: + return True + return False + + +class ContextAnalyzer: + """Analyzes video context to determine focus and layout.""" + + def __init__(self): + self.detector = MediaPipeDetector() + self.audio_detector = AudioActivityDetector() + self.previous_faces: List[FaceDetection] = [] + + logger.info("Context analyzer initialized") + + def analyze_frame( + self, + frame: np.ndarray, + timestamp: float, + frame_number: int, + speaking_periods: Optional[List[Tuple[float, float]]] = None + ) -> FrameContext: + """ + Analyze a single frame to extract context information. + + Args: + frame: Video frame (BGR format from OpenCV) + timestamp: Frame timestamp in seconds + frame_number: Frame index + speaking_periods: List of (start, end) times where speech is detected + + Returns: + FrameContext with detection results + """ + faces = self.detector.detect_face_landmarks(frame) + + if not faces: + faces = self.detector.detect_faces(frame) + + # Determine who is speaking + active_speakers = [] + for i, face in enumerate(faces): + is_speaking = False + + if speaking_periods and self.audio_detector.is_speaking_at_time(speaking_periods, timestamp): + is_speaking = True + + if face.landmarks and len(self.previous_faces) > i: + is_speaking = is_speaking or self._detect_lip_movement(face, self.previous_faces[i]) + + if is_speaking: + active_speakers.append(i) + + num_faces = len(faces) + num_speakers = len(active_speakers) + + if num_faces == 0: + layout_mode = "single" + elif num_faces == 1: + layout_mode = "single" + elif num_faces == 2: + layout_mode = "dual_split" + elif num_faces >= 3: + layout_mode = "dual_split" + else: + layout_mode = "single" + + primary_focus = self._calculate_focus_point(faces, active_speakers) + + self.previous_faces = faces + + return FrameContext( + frame_number=frame_number, + timestamp=timestamp, + detected_faces=faces, + active_speakers=active_speakers, + primary_focus=primary_focus, + layout_mode=layout_mode + ) + + def _detect_lip_movement(self, current_face: FaceDetection, previous_face: FaceDetection) -> bool: + """ + Detect lip movement by comparing landmarks between frames. + + Args: + current_face: Current frame face detection + previous_face: Previous frame face detection + + Returns: + True if significant lip movement detected + """ + if not current_face.landmarks or not previous_face.landmarks: + return False + + def lip_distance(landmarks): + if len(landmarks) < 4: + return 0 + + upper = np.array(landmarks[0:2]) + lower = np.array(landmarks[2:4]) + return np.linalg.norm(upper.mean(axis=0) - lower.mean(axis=0)) + + current_dist = lip_distance(current_face.landmarks) + previous_dist = lip_distance(previous_face.landmarks) + + threshold = 2.0 + return abs(current_dist - previous_dist) > threshold + + def _calculate_focus_point( + self, + faces: List[FaceDetection], + active_speakers: List[int] + ) -> Optional[Tuple[int, int]]: + """ + Calculate the primary focus point based on detected faces and speakers. + + IMPORTANT: This focuses on ONE person to avoid focusing on empty space (table). + When multiple people are present, we pick the most relevant person, not average positions. + + Args: + faces: List of detected faces + active_speakers: Indices of faces that are speaking + + Returns: + (x, y) tuple of focus center, or None if no faces + """ + if not faces: + return None + + if active_speakers: + speaker_faces = [faces[i] for i in active_speakers if i < len(faces)] + if speaker_faces: + primary_speaker = max(speaker_faces, key=lambda f: f.confidence) + return (primary_speaker.center_x, primary_speaker.center_y) + + most_confident = max(faces, key=lambda f: f.confidence) + return (most_confident.center_x, most_confident.center_y) + + def close(self): + """Release resources.""" + self.detector.close() diff --git a/video_render/llm.py b/video_render/llm.py index 84d2d4f..1f2d798 100644 --- a/video_render/llm.py +++ b/video_render/llm.py @@ -2,11 +2,11 @@ from __future__ import annotations import json import logging +import time +import os from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Dict, List -from google import genai -from google.genai import types as genai_types import requests from video_render.config import BASE_DIR, Settings @@ -14,27 +14,24 @@ from video_render.transcription import TranscriptionResult logger = logging.getLogger(__name__) -OPENROUTER_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions" +OPENROUTER_ENDPOINT = os.environ.get("OPENROUTER_API_URL", "https://openrouter.ai/api/v1/chat/completions") -class GeminiHighlighter: +class OpenRouterCopywriter: def __init__(self, settings: Settings) -> None: - if not settings.gemini.api_key: - raise RuntimeError("GEMINI_API_KEY nao foi definido") - - prompt_path = Path(settings.gemini.prompt_path) + if not settings.openrouter.api_key: + raise RuntimeError("OPENROUTER_API_KEY nao foi definido") + self.settings = settings + prompt_path = Path(settings.openrouter.prompt_path) if not prompt_path.is_absolute(): prompt_path = BASE_DIR / prompt_path - if not prompt_path.exists(): - raise FileNotFoundError(f"Prompt do Gemini nao encontrado: {prompt_path}") - - self.prompt_template = prompt_path.read_text(encoding="utf-8") - self.settings = settings - self.client = genai.Client() + raise FileNotFoundError(f"Prompt nao encontrado: {prompt_path}") + self.highlights_prompt_template = prompt_path.read_text(encoding="utf-8") def generate_highlights(self, transcription: TranscriptionResult) -> List[Dict]: + """Generate video highlights using OpenRouter GPT-OSS with retry logic.""" payload = { "transcript": transcription.full_text, "segments": [ @@ -47,93 +44,139 @@ class GeminiHighlighter: ], } - try: - response = self._call_gemini(payload) - except Exception as exc: - logger.error("Gemini API request falhou: %s", exc) - raise RuntimeError("Gemini API request falhou") from exc - - raw_text = self._extract_response_text(response) - - parsed = self._extract_json(raw_text) - highlights = parsed.get("highlights") - if not isinstance(highlights, list): - raise ValueError("Resposta do Gemini invalida: campo 'highlights' ausente") - return highlights - - def _call_gemini(self, payload: Dict[str, Any]) -> Any: - contents = [ - { - "role": "user", - "parts": [ - {"text": self.prompt_template}, - {"text": json.dumps(payload, ensure_ascii=False)}, - ], - } - ] - - request_kwargs: Dict[str, Any] = { - "model": self.settings.gemini.model, - "contents": contents, + body = { + "model": self.settings.openrouter.model, + "temperature": self.settings.openrouter.temperature, + "messages": [ + {"role": "system", "content": self.highlights_prompt_template}, + { + "role": "user", + "content": json.dumps(payload, ensure_ascii=False), + }, + ], } - config = self._build_generation_config() - if config is not None: - request_kwargs["config"] = config + headers = { + "Authorization": f"Bearer {self.settings.openrouter.api_key}", + "Content-Type": "application/json", + "X-Title": "Video Render - Highlights Detection" + } - return self.client.models.generate_content(**request_kwargs) + logger.info(f"Calling OpenRouter with model: {self.settings.openrouter.model}") + logger.debug(f"Request payload keys: transcript_length={len(payload['transcript'])}, segments_count={len(payload['segments'])}") - def _build_generation_config(self) -> Optional[genai_types.GenerateContentConfig]: - config_kwargs: Dict[str, Any] = {} - if self.settings.gemini.temperature is not None: - config_kwargs["temperature"] = self.settings.gemini.temperature - if self.settings.gemini.top_p is not None: - config_kwargs["top_p"] = self.settings.gemini.top_p - if self.settings.gemini.top_k is not None: - config_kwargs["top_k"] = self.settings.gemini.top_k + # Retry configuration for rate limits (especially free tier) + max_retries = 5 + base_delay = 5 # Start with 5s delay - if not config_kwargs: - return None + for attempt in range(max_retries): + try: + response = requests.post( + url=OPENROUTER_ENDPOINT, + data=json.dumps(body), + headers=headers, + timeout=120, + ) + response.raise_for_status() + data = response.json() + break - return genai_types.GenerateContentConfig(**config_kwargs) + except requests.exceptions.HTTPError as exc: + if exc.response.status_code == 429: + if attempt < max_retries - 1: + # Exponential backoff: 5s, 10s, 20s, 40s, 80s + delay = base_delay * (2 ** attempt) + logger.warning(f"Rate limit atingido (429). Aguardando {delay}s antes de tentar novamente (tentativa {attempt + 1}/{max_retries})") + time.sleep(delay) + continue + else: + logger.error("Rate limit atingido apos todas as tentativas") + logger.error("Solucao: Use um modelo pago ou adicione creditos na OpenRouter") + raise RuntimeError("OpenRouter rate limit excedido") from exc + else: + logger.error(f"OpenRouter API request falhou com status {exc.response.status_code}: {exc}") + raise RuntimeError("OpenRouter API request falhou") from exc - @staticmethod - def _extract_response_text(response: Any) -> str: - text = getattr(response, "text", None) - if text: - return str(text).strip() + except Exception as exc: + logger.error("OpenRouter API request falhou: %s", exc) + raise RuntimeError("OpenRouter API request falhou") from exc - candidates = getattr(response, "candidates", None) or [] - for candidate in candidates: - content = getattr(candidate, "content", None) - if not content: + # Debug: log response structure + logger.info(f"OpenRouter response keys: {list(data.keys())}") + if "error" in data: + logger.error(f"OpenRouter API error: {data.get('error')}") + raise RuntimeError(f"OpenRouter API error: {data.get('error')}") + + choices = data.get("choices") or [] + if not choices: + logger.error(f"OpenRouter response completa: {json.dumps(data, indent=2)}") + raise RuntimeError("OpenRouter nao retornou escolhas") + + message = choices[0].get("message", {}).get("content") + if not message: + raise RuntimeError("Resposta do OpenRouter sem conteudo") + + parsed = self._extract_json(message) + highlights = parsed.get("highlights") + if not isinstance(highlights, list): + raise ValueError("Resposta do OpenRouter invalida: campo 'highlights' ausente") + + valid_highlights = [] + for highlight in highlights: + try: + start = float(highlight.get("start", 0)) + end = float(highlight.get("end", 0)) + summary = str(highlight.get("summary", "")).strip() + + if start < 0 or end < 0: + logger.warning(f"Highlight ignorado: timestamps negativos (start={start}, end={end})") + continue + + if end <= start: + logger.warning(f"Highlight ignorado: end <= start (start={start}, end={end})") + continue + + duration = end - start + if duration < 45: + logger.warning(f"Highlight ignorado: muito curto ({duration}s, minimo 45s)") + continue + + if duration > 120: + logger.warning(f"Highlight ignorado: muito longo ({duration}s, maximo 120s)") + continue + + if not summary: + logger.warning(f"Highlight ignorado: summary vazio") + continue + + valid_highlights.append({ + "start": start, + "end": end, + "summary": summary + }) + + except (TypeError, ValueError) as e: + logger.warning(f"Highlight invalido ignorado: {highlight} - {e}") continue - parts = getattr(content, "parts", None) or [] - for part in parts: - part_text = getattr(part, "text", None) - if part_text: - return str(part_text).strip() - raise RuntimeError("Resposta do Gemini sem texto") + if not valid_highlights: + logger.warning("Nenhum highlight valido retornado pelo OpenRouter") + total_duration = 75.0 + if transcription.segments: + total_duration = max(seg.end for seg in transcription.segments) - @staticmethod - def _extract_json(response_text: str) -> Dict: - try: - return json.loads(response_text) - except json.JSONDecodeError: - start = response_text.find("{") - end = response_text.rfind("}") - if start == -1 or end == -1: - raise - subset = response_text[start : end + 1] - return json.loads(subset) + fallback_end = min(75.0, total_duration) + if fallback_end < 60.0: + fallback_end = min(60.0, total_duration) + return [{ + "start": 0.0, + "end": fallback_end, + "summary": "Trecho inicial do video (fallback automatico)" + }] -class OpenRouterCopywriter: - def __init__(self, settings: Settings) -> None: - if not settings.openrouter.api_key: - raise RuntimeError("OPENROUTER_API_KEY nao foi definido") - self.settings = settings + logger.info(f"OpenRouter retornou {len(valid_highlights)} highlights validos") + return valid_highlights def generate_titles(self, highlights: List[Dict]) -> List[str]: if not highlights: diff --git a/video_render/media.py b/video_render/media.py index 7fb878e..d99a71d 100644 --- a/video_render/media.py +++ b/video_render/media.py @@ -35,11 +35,29 @@ class MediaPreparer: sanitized_name = sanitize_filename(Path(filename).stem) workspace_dir = ensure_workspace(self.settings.videos_dir, sanitized_name) + transcription_json = workspace_dir / "transcription.json" + transcription_txt = workspace_dir / "transcription.txt" + temp_transcription_json = None + temp_transcription_txt = None + + if transcription_json.exists(): + temp_transcription_json = workspace_dir.parent / f".{sanitized_name}_transcription.json.tmp" + shutil.copy2(transcription_json, temp_transcription_json) + if transcription_txt.exists(): + temp_transcription_txt = workspace_dir.parent / f".{sanitized_name}_transcription.txt.tmp" + shutil.copy2(transcription_txt, temp_transcription_txt) + existing_children = list(workspace_dir.iterdir()) if existing_children: logger.info("Limpando workspace existente para %s", sanitized_name) remove_paths(existing_children) + if temp_transcription_json and temp_transcription_json.exists(): + shutil.move(str(temp_transcription_json), str(transcription_json)) + logger.info("Transcrição preservada em %s", transcription_json) + if temp_transcription_txt and temp_transcription_txt.exists(): + shutil.move(str(temp_transcription_txt), str(transcription_txt)) + destination_name = f"{sanitized_name}{source_path.suffix.lower()}" working_video_path = workspace_dir / destination_name shutil.copy2(source_path, working_video_path) diff --git a/video_render/pipeline.py b/video_render/pipeline.py index 3c4f348..0357788 100644 --- a/video_render/pipeline.py +++ b/video_render/pipeline.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional from video_render.config import Settings -from video_render.llm import GeminiHighlighter, OpenRouterCopywriter +from video_render.llm import OpenRouterCopywriter from video_render.media import MediaPreparer, VideoWorkspace from video_render.transcription import TranscriptionResult, TranscriptionService from video_render.utils import remove_paths, sanitize_filename @@ -55,8 +55,7 @@ class VideoPipeline: self.settings = settings self.media_preparer = MediaPreparer(settings) self.transcriber = TranscriptionService(settings) - self.highlighter = GeminiHighlighter(settings) - self.copywriter = OpenRouterCopywriter(settings) + self.llm_service = OpenRouterCopywriter(settings) # Using OpenRouter for both highlights and titles self.renderer = VideoRenderer(settings) def process_message(self, message: Dict[str, Any]) -> Dict[str, Any]: @@ -65,12 +64,11 @@ class VideoPipeline: self._prepare_workspace(context) self._generate_transcription(context) self._determine_highlights(context) - self._generate_titles(context) self._render_clips(context) + return self._build_success_payload(context) except Exception as exc: logger.exception("Falha ao processar vídeo %s", context.job.filename) - # return self._handle_failure(context, exc) def _parse_job(self, message: Dict[str, Any]) -> JobMessage: filename = message.get("filename") @@ -102,7 +100,10 @@ class VideoPipeline: context.transcription = existing return - transcription = self.transcriber.transcribe(context.workspace.audio_path) + transcription = self.transcriber.transcribe( + context.workspace.audio_path, + output_dir=context.workspace.workspace_dir + ) TranscriptionService.persist(transcription, context.workspace.workspace_dir) context.transcription = transcription @@ -111,10 +112,10 @@ class VideoPipeline: raise RuntimeError("Transcricao nao disponivel") try: - highlights_raw = self.highlighter.generate_highlights(context.transcription) + highlights_raw = self.llm_service.generate_highlights(context.transcription) except Exception: logger.exception( - "Falha ao gerar destaques com Gemini; aplicando fallback padrao." + "Falha ao gerar destaques com OpenRouter; aplicando fallback padrao." ) context.highlight_windows = [self._build_fallback_highlight(context)] return @@ -130,11 +131,13 @@ class VideoPipeline: continue summary = str(item.get("summary", "")).strip() + title = str(item.get("title", summary[:60])).strip() + if end <= start: logger.debug("Highlight com intervalo invalido ignorado: %s", item) continue - windows.append(HighlightWindow(start=start, end=end, summary=summary)) + windows.append(HighlightWindow(start=start, end=end, summary=summary, title=title)) if not windows: windows.append(self._build_fallback_highlight(context)) @@ -142,17 +145,12 @@ class VideoPipeline: context.highlight_windows = windows def _generate_titles(self, context: PipelineContext) -> None: - if not context.highlight_windows: - return + """DEPRECATED: Titles are now generated together with highlights. - highlight_dicts = [ - {"start": window.start, "end": window.end, "summary": window.summary} - for window in context.highlight_windows - ] - titles = self.copywriter.generate_titles(highlight_dicts) - - for window, title in zip(context.highlight_windows, titles): - window.title = title.strip() + This method is kept for backwards compatibility but does nothing. + Titles are extracted from highlights in _determine_highlights(). + """ + pass def _build_fallback_highlight(self, context: PipelineContext) -> HighlightWindow: if not context.transcription: @@ -167,6 +165,7 @@ class VideoPipeline: start=0.0, end=max(last_end, 10.0), summary="Sem destaque identificado; fallback automatico.", + title="Confira este momento", ) def _render_clips(self, context: PipelineContext) -> None: diff --git a/video_render/rendering.py b/video_render/rendering.py index 1a80b9a..ae69813 100644 --- a/video_render/rendering.py +++ b/video_render/rendering.py @@ -15,6 +15,7 @@ from PIL import Image, ImageColor, ImageDraw, ImageFont from video_render.config import Settings from video_render.transcription import TranscriptionResult, WordTiming +from video_render.smart_framing import SmartFramer, extract_audio_samples logger = logging.getLogger(__name__) @@ -54,7 +55,41 @@ class CaptionBuilder: self.space_width = self.font.getbbox(" ")[2] - self.font.getbbox(" ")[0] def build(self, words: Sequence[WordTiming], clip_start: float) -> List[CaptionClipSet]: - grouped = self._group_words(words) + # Filter out empty, whitespace-only, or very short words (likely noise) + valid_words = [ + w for w in words + if w.word + and w.word.strip() + and len(w.word.strip()) >= 2 # At least 2 characters + and not w.word.strip() in ['...', '..', '.', ',', '-', 'hmm', 'hm', 'ah', 'eh', 'uh'] # Not just punctuation or filler + ] + + # Note: We don't filter out words based on gaps here + # Gap detection is handled in _group_words_with_gaps + # This ensures captions disappear during silence naturally + filtered_words = valid_words + + # Calculate speech density (words per second) + # If density is too low, it's likely just noise/silence being misinterpreted + if filtered_words: + first_word_time = filtered_words[0].start + last_word_time = filtered_words[-1].end + duration = last_word_time - first_word_time + + if duration > 0: + words_per_second = len(filtered_words) / duration + # Typical speech is 2-3 words per second + # If less than 0.5 words/second, it's probably silence/noise + if words_per_second < 0.5: + logger.debug(f"Captions suprimidas: densidade muito baixa ({words_per_second:.2f} palavras/seg)") + return [] + + # Only show captions if we have at least 3 valid words (reduced from 5 for 2-word groups) + # This prevents showing captions for noise/mumbling + if len(filtered_words) < 3: + return [] + + grouped = self._group_words_with_gaps(filtered_words) clip_sets: List[CaptionClipSet] = [] for group in grouped: @@ -101,6 +136,92 @@ class CaptionBuilder: if len(widths) > 1: total_width += self.space_width * (len(widths) - 1) + # Check if text needs to wrap to multiple lines + # If total width exceeds canvas width, break into 2 lines + needs_wrap = total_width > self.canvas_width + + if needs_wrap: + # Split into 2 lines - try to balance the lines + mid_point = len(texts) // 2 + line1_texts = texts[:mid_point] + line2_texts = texts[mid_point:] + line1_widths = widths[:mid_point] + line2_widths = widths[mid_point:] + + # Calculate widths for each line + line1_width = sum(line1_widths) + if len(line1_widths) > 1: + line1_width += self.space_width * (len(line1_widths) - 1) + + line2_width = sum(line2_widths) + if len(line2_widths) > 1: + line2_width += self.space_width * (len(line2_widths) - 1) + + # Double the canvas height for 2 lines + canvas_height = self.canvas_height * 2 + base_image = Image.new("RGBA", (self.canvas_width, canvas_height), (0, 0, 0, 0)) + base_draw = ImageDraw.Draw(base_image) + highlight_images: List[Image.Image] = [] + + # Stroke settings: 8px black stroke for better readability + stroke_width = 8 + stroke_color = (0, 0, 0, 255) # Black + + # Draw line 1 + x = max(0, (self.canvas_width - line1_width) // 2) + y = self.baseline + for i, (text, width) in enumerate(zip(line1_texts, line1_widths)): + base_draw.text( + (x, y), + text, + font=self.font, + fill=self.base_color, + stroke_width=stroke_width, + stroke_fill=stroke_color + ) + + highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0)) + highlight_draw = ImageDraw.Draw(highlight_image) + highlight_draw.text( + (x, y), + text, + font=self.font, + fill=self.highlight_color, + stroke_width=stroke_width, + stroke_fill=stroke_color + ) + highlight_images.append(highlight_image) + x += width + self.space_width + + # Draw line 2 + x = max(0, (self.canvas_width - line2_width) // 2) + y = self.baseline + self.text_height + 5 # 5px spacing between lines + for i, (text, width) in enumerate(zip(line2_texts, line2_widths)): + base_draw.text( + (x, y), + text, + font=self.font, + fill=self.base_color, + stroke_width=stroke_width, + stroke_fill=stroke_color + ) + + highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0)) + highlight_draw = ImageDraw.Draw(highlight_image) + highlight_draw.text( + (x, y), + text, + font=self.font, + fill=self.highlight_color, + stroke_width=stroke_width, + stroke_fill=stroke_color + ) + highlight_images.append(highlight_image) + x += width + self.space_width + + return base_image, highlight_images + + # Single line rendering (original code) start_x = max(0, (self.canvas_width - total_width) // 2) base_image = Image.new("RGBA", (self.canvas_width, self.canvas_height), (0, 0, 0, 0)) @@ -108,13 +229,31 @@ class CaptionBuilder: highlight_images: List[Image.Image] = [] x = start_x - for text, width in zip(texts, widths): - base_draw.text((x, self.baseline), text, font=self.font, fill=self.base_color) + # Stroke settings: 8px black stroke for better readability + stroke_width = 8 + stroke_color = (0, 0, 0, 255) # Black + for text, width in zip(texts, widths): + # Draw base text with stroke + base_draw.text( + (x, self.baseline), + text, + font=self.font, + fill=self.base_color, + stroke_width=stroke_width, + stroke_fill=stroke_color + ) + + # Draw highlight text with stroke highlight_image = Image.new("RGBA", base_image.size, (0, 0, 0, 0)) highlight_draw = ImageDraw.Draw(highlight_image) highlight_draw.text( - (x, self.baseline), text, font=self.font, fill=self.highlight_color + (x, self.baseline), + text, + font=self.font, + fill=self.highlight_color, + stroke_width=stroke_width, + stroke_fill=stroke_color ) highlight_images.append(highlight_image) @@ -153,6 +292,44 @@ class CaptionBuilder: return grouped + def _group_words_with_gaps(self, words: Sequence[WordTiming]) -> List[List[WordTiming]]: + """ + Group words into 2-word chunks, respecting silence gaps. + Creates natural breaks where there are pauses > 1.5s + """ + if not words: + return [] + + grouped: List[List[WordTiming]] = [] + buffer: List[WordTiming] = [] + + for i, word in enumerate(words): + # Check if there's a long pause before this word + if i > 0: + gap = word.start - words[i-1].end + # If gap > 1.5s, finish current buffer and start new group + if gap > 1.5: + if buffer: + grouped.append(buffer) + buffer = [] + + buffer.append(word) + + # Group into 2 words maximum + if len(buffer) == 2: + grouped.append(buffer) + buffer = [] + + # Handle remaining words + if buffer: + if len(buffer) == 1 and grouped: + # Add single remaining word to last group + grouped[-1].append(buffer[0]) + else: + grouped.append(buffer) + + return [grp for grp in grouped if grp] + @staticmethod def _clean_word(text: str) -> str: text = text.strip() @@ -164,6 +341,12 @@ class VideoRenderer: def __init__(self, settings: Settings) -> None: self.settings = settings self.captions = CaptionBuilder(settings) + self.smart_framer = SmartFramer( + target_width=settings.rendering.frame_width, + target_height=settings.rendering.frame_height, + frame_skip=settings.rendering.smart_framing_frame_skip, + smoothing_window=settings.rendering.smart_framing_smoothing_window + ) def render( self, @@ -234,26 +417,100 @@ class VideoRenderer: duration = end - start frame_w = self.settings.rendering.frame_width frame_h = self.settings.rendering.frame_height - top_h = int(frame_h * 0.18) + # Removed top panel - no longer showing title bottom_h = int(frame_h * 0.20) - video_area_h = max(1, frame_h - top_h - bottom_h) - scale_factor = min( - frame_w / subclip.w, - video_area_h / subclip.h, - ) - resized_clip = subclip.resized(scale_factor) - video_y = top_h + (video_area_h - resized_clip.h) // 2 - video_clip = resized_clip.with_position( - ((frame_w - resized_clip.w) // 2, video_y) - ) + # Use smart framing to create intelligent 9:16 video (if enabled) + if self.settings.rendering.enable_smart_framing: + logger.info(f"Creating smart framing plan for clip {index} ({start:.2f}s - {end:.2f}s)") + + try: + # Extract audio for speech detection + audio_samples = extract_audio_samples(source_path, start, end) + + # Create framing plan + framing_plan = self.smart_framer.create_framing_plan( + video_path=source_path, + start_time=start, + end_time=end, + audio_samples=audio_samples + ) + + # Apply smart framing based on detected layout + use_split_screen = framing_plan.layout_mode in ["dual_split", "grid"] + video_clip = self.smart_framer.apply_framing( + video_clip=subclip, + framing_plan=framing_plan, + use_split_screen=use_split_screen + ) + + logger.info(f"Smart framing applied: layout={framing_plan.layout_mode}, " + f"faces_detected={len(framing_plan.frame_contexts[0].detected_faces) if framing_plan.frame_contexts else 0}") + + except Exception as exc: + logger.warning(f"Smart framing failed for clip {index}, falling back to center crop: {exc}", exc_info=True) + + # Fallback to center crop (maintains aspect ratio, crops to fit) + video_area_h = max(1, frame_h - bottom_h) + + # Use MAX to ensure video covers entire area (will crop excess) + scale_factor = max( + frame_w / subclip.w, + video_area_h / subclip.h, + ) + + # Resize to cover area + resized_clip = subclip.resized(scale_factor) + + # Calculate crop region (center crop) + crop_x1 = max(0, (resized_clip.w - frame_w) // 2) + crop_y1 = max(0, (resized_clip.h - video_area_h) // 2) + crop_x2 = crop_x1 + frame_w + crop_y2 = crop_y1 + video_area_h + + # Crop to fit target dimensions using MoviePy crop(x1, y1, x2, y2) + cropped_clip = resized_clip.cropped( + x1=crop_x1, + y1=crop_y1, + x2=crop_x2, + y2=crop_y2 + ) + + video_clip = cropped_clip.with_position((0, 0)) + resized_clip.close() + else: + # Use center crop (smart framing disabled) + logger.info(f"Using center crop for clip {index} (smart framing disabled)") + video_area_h = max(1, frame_h - bottom_h) + + # Use MAX to ensure video covers entire area (will crop excess) + scale_factor = max( + frame_w / subclip.w, + video_area_h / subclip.h, + ) + + # Resize to cover area + resized_clip = subclip.resized(scale_factor) + + # Calculate crop region (center crop) + crop_x1 = max(0, (resized_clip.w - frame_w) // 2) + crop_y1 = max(0, (resized_clip.h - video_area_h) // 2) + crop_x2 = crop_x1 + frame_w + crop_y2 = crop_y1 + video_area_h + + # Crop to fit target dimensions using MoviePy crop(x1, y1, x2, y2) + cropped_clip = resized_clip.cropped( + x1=crop_x1, + y1=crop_y1, + x2=crop_x2, + y2=crop_y2 + ) + + video_clip = cropped_clip.with_position((0, 0)) + resized_clip.close() background = ColorClip(size=(frame_w, frame_h), color=(0, 0, 0)).with_duration(duration) - top_panel = ( - ColorClip(size=(frame_w, top_h), color=(12, 12, 12)) - .with_duration(duration) - .with_opacity(0.85) - ) + # Removed top panel and title - no longer needed bottom_panel = ( ColorClip(size=(frame_w, bottom_h), color=(12, 12, 12)) .with_position((0, frame_h - bottom_h)) @@ -261,34 +518,42 @@ class VideoRenderer: .with_opacity(0.85) ) - title_clip = self._build_title_clip( - title=title, - summary=summary, - duration=duration, - frame_width=frame_w, - top_panel_height=top_h, - ) - title_clip = title_clip.with_position( - ((frame_w - title_clip.w) // 2, (top_h - title_clip.h) // 2) - ) - words = self._collect_words(transcription, start, end) - caption_sets = self.captions.build(words, clip_start=start) + + # Calculate speech coverage: how much of the clip has actual speech? + # If less than 30% of the clip has speech, don't show captions + clip_duration = end - start + if words and clip_duration > 0: + # Calculate total time with speech + total_speech_time = sum(w.end - w.start for w in words) + speech_coverage = total_speech_time / clip_duration + + if speech_coverage < 0.3: # Less than 30% speech + logger.debug(f"Captions suprimidas: cobertura de fala baixa ({speech_coverage:.1%})") + words = [] # Clear words to prevent captions + + # Only build captions if there are actual words to display + # This prevents empty/placeholder captions from appearing + caption_sets = self.captions.build(words, clip_start=start) if words else [] caption_clips = [] caption_resources: List[ImageClip] = [] - caption_area_top = frame_h - bottom_h - caption_area_height = bottom_h + + # Position captions 120px below center (for 1920px height, center is 960px, so 1080px) + # This ensures they're visible, well-positioned, and don't interfere with faces + # Range: 100-150px as requested, using 120px for optimal positioning + center_y = frame_h // 2 + caption_y = center_y + 120 caption_margin = 20 - raw_caption_y = caption_area_top + (caption_area_height - self.captions.canvas_height) // 2 - min_caption_y = caption_area_top + caption_margin - max_caption_y = ( - caption_area_top + caption_area_height - self.captions.canvas_height - caption_margin - ) + + # Ensure captions stay within reasonable bounds (no top panel now) + min_caption_y = caption_margin + max_caption_y = frame_h - bottom_h - self.captions.canvas_height - caption_margin + if max_caption_y < min_caption_y: caption_y = min_caption_y else: - caption_y = min(max(raw_caption_y, min_caption_y), max_caption_y) + caption_y = min(max(caption_y, min_caption_y), max_caption_y) for clip_set in caption_sets: base_positioned = clip_set.base.with_position(("center", caption_y)) @@ -299,30 +564,20 @@ class VideoRenderer: caption_clips.append(positioned) caption_resources.append(highlight) - if not caption_clips: - fallback_text = self._wrap_text(summary or title, max_width=frame_w - 160) - caption_clips.append( - self._make_textclip( - text=fallback_text, - font_path=self.settings.rendering.font_path, - font_size=self.settings.rendering.subtitle_font_size, - color=self.settings.rendering.base_color, - size=(frame_w - 160, max(40, self.captions.canvas_height)), - ) - .with_duration(duration) - .with_position(("center", caption_y)) - ) + # No fallback captions - if there are no dynamic captions, show nothing + # This matches Opus Clip behavior where captions only appear when there's actual speech audio_clip, audio_needs_close = self._materialize_audio( source_path=source_path, start=start, end=end, duration=duration, - fallback_audio=video_clip.audio or resized_clip.audio or subclip.audio, + fallback_audio=video_clip.audio or subclip.audio, ) + # Composite with background, bottom panel, video, and captions only (no top panel or title) composite = CompositeVideoClip( - [background, top_panel, bottom_panel, video_clip, title_clip, *caption_clips], + [background, bottom_panel, video_clip, *caption_clips], size=(frame_w, frame_h), ) if audio_clip is not None: @@ -337,11 +592,8 @@ class VideoRenderer: ) composite.close() - resized_clip.close() video_clip.close() - title_clip.close() background.close() - top_panel.close() bottom_panel.close() for clip in caption_clips: clip.close() @@ -352,95 +604,6 @@ class VideoRenderer: return str(output_path) - def _build_title_clip( - self, - *, - title: str, - summary: str, - duration: float, - frame_width: int, - top_panel_height: int, - ) -> ImageClip: - text = (title or summary or "").strip() - if not text: - text = summary or "" - - max_width = max(200, frame_width - 160) - font_size = self.settings.rendering.title_font_size - min_font_size = max(28, int(font_size * 0.6)) - target_height = max(80, top_panel_height - 40) - title_color = ImageColor.getrgb(self.settings.rendering.base_color) - font_path = self.settings.rendering.font_path - - while True: - font = ImageFont.truetype(str(font_path), font_size) - lines = self._split_title_lines(text, font, max_width) - line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1] - spacing = max(4, int(line_height * 0.25)) - text_height = self._measure_text_height(len(lines), line_height, spacing) - - if text_height <= target_height or font_size <= min_font_size: - break - - font_size = max(min_font_size, font_size - 6) - - # Recompute dimensions with final font size to ensure consistency - font = ImageFont.truetype(str(font_path), font_size) - lines = self._split_title_lines(text, font, max_width) - line_height = font.getbbox("Ay")[3] - font.getbbox("Ay")[1] - spacing = max(4, int(line_height * 0.25)) - text_height = self._measure_text_height(len(lines), line_height, spacing) - canvas_height = max(1, text_height) - - image = Image.new("RGBA", (max_width, canvas_height), (0, 0, 0, 0)) - draw = ImageDraw.Draw(image) - y = 0 - for idx, line in enumerate(lines): - bbox = font.getbbox(line) - line_width = bbox[2] - bbox[0] - x = max(0, (max_width - line_width) // 2) - draw.text((x, y - bbox[1]), line, font=font, fill=title_color) - y += line_height - if idx < len(lines) - 1: - y += spacing - - return ImageClip(np.array(image)).with_duration(duration) - - @staticmethod - def _measure_text_height(line_count: int, line_height: int, spacing: int) -> int: - if line_count <= 0: - return line_height - return line_count * line_height + max(0, line_count - 1) * spacing - - @staticmethod - def _split_title_lines( - text: str, font: ImageFont.FreeTypeFont, max_width: int - ) -> List[str]: - words = text.split() - if not words: - return [""] - - lines: List[str] = [] - current: List[str] = [] - for word in words: - test_line = " ".join(current + [word]) if current else word - bbox = font.getbbox(test_line) - line_width = bbox[2] - bbox[0] - if line_width <= max_width or not current: - current.append(word) - if line_width > max_width and not current[:-1]: - lines.append(" ".join(current)) - current = [] - continue - - lines.append(" ".join(current)) - current = [word] - - if current: - lines.append(" ".join(current)) - - return lines - def _materialize_audio( self, *, diff --git a/video_render/smart_framing.py b/video_render/smart_framing.py new file mode 100644 index 0000000..76087ba --- /dev/null +++ b/video_render/smart_framing.py @@ -0,0 +1,687 @@ +""" +Smart framing module for intelligent video cropping and composition. + +This module provides functionality to create 9:16 vertical videos with +intelligent framing that follows the action and speakers. +""" +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import cv2 +import numpy as np +from moviepy.video.VideoClip import VideoClip +from moviepy.video.io.VideoFileClip import VideoFileClip +from scipy import signal + +from video_render.context_detection import ContextAnalyzer, FrameContext, FaceDetection + +logger = logging.getLogger(__name__) + + +@dataclass +class CropRegion: + """Defines a crop region for a frame.""" + x: int + y: int + width: int + height: int + + +@dataclass +class FramingPlan: + """Complete framing plan for a video segment.""" + frame_contexts: List[FrameContext] + crop_regions: List[CropRegion] + layout_mode: str + fps: float + + +class SmartFramer: + """Creates intelligent 9:16 framing for horizontal videos.""" + + def __init__( + self, + target_width: int = 1080, + target_height: int = 1920, + frame_skip: int = 2, + smoothing_window: int = 15 + ): + self.target_width = target_width + self.target_height = target_height + self.target_aspect = target_height / target_width + + # Performance parameters + self.frame_skip = frame_skip # Process every Nth frame (CPU optimization) + + # Smoothing parameters + self.smoothing_window = smoothing_window + self.max_velocity = 30 # pixels per frame (reduced for smoother transitions) + + logger.info(f"Smart framer initialized (target: {target_width}x{target_height}, frame_skip={frame_skip})") + + def create_framing_plan( + self, + video_path: str, + start_time: float, + end_time: float, + audio_samples: Optional[np.ndarray] = None + ) -> FramingPlan: + """ + Analyze video and create a complete framing plan. + + Args: + video_path: Path to video file + start_time: Start time in seconds + end_time: End time in seconds + audio_samples: Optional audio samples for speech detection + + Returns: + FramingPlan with all frame contexts and crop regions + """ + analyzer = ContextAnalyzer() + + # Detect speaking periods from audio if available + speaking_periods = None + if audio_samples is not None: + speaking_periods = analyzer.audio_detector.detect_speaking_periods(audio_samples) + + # Open video with error suppression for AV1 codec warnings + import os + os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'loglevel;quiet' + + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + + # Calculate frame range + start_frame = int(start_time * fps) + end_frame = int(end_time * fps) + + # Set to start frame + cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) + + frame_contexts = [] + frame_number = start_frame + processed_count = 0 + + logger.info(f"Analyzing frames {start_frame} to {end_frame} (fps={fps}, skip={self.frame_skip})") + + while frame_number < end_frame: + ret, frame = cap.read() + if not ret: + break + + # Only process every Nth frame for performance (CPU optimization) + if processed_count % self.frame_skip == 0: + timestamp = frame_number / fps + context = analyzer.analyze_frame(frame, timestamp, frame_number, speaking_periods) + frame_contexts.append(context) + + frame_number += 1 + processed_count += 1 + + # Get video dimensions before releasing capture + source_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + source_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + cap.release() + analyzer.close() + + # Determine overall layout mode (most common) + layout_modes = [ctx.layout_mode for ctx in frame_contexts] + if layout_modes: + overall_layout = max(set(layout_modes), key=layout_modes.count) + else: + overall_layout = "single" + + # Calculate crop regions based on contexts + + crop_regions = self._calculate_crop_regions( + frame_contexts, + source_width, + source_height + ) + + return FramingPlan( + frame_contexts=frame_contexts, + crop_regions=crop_regions, + layout_mode=overall_layout, + fps=fps + ) + + def _calculate_crop_regions( + self, + contexts: List[FrameContext], + source_width: int, + source_height: int + ) -> List[CropRegion]: + """ + Calculate smooth crop regions for each frame. + + Args: + contexts: List of frame contexts + source_width: Source video width + source_height: Source video height + + Returns: + List of crop regions + """ + if not contexts: + return [] + + # Calculate ideal crop dimensions maintaining EXACT 9:16 aspect ratio + source_aspect = source_width / source_height + + if source_aspect > self.target_aspect: + # Source is wider - crop horizontally (use full height) + crop_height = source_height + crop_width = int(crop_height / self.target_aspect) + + # Ensure crop width fits within source + if crop_width > source_width: + crop_width = source_width + crop_height = int(crop_width * self.target_aspect) + else: + # Source is taller - crop vertically (use full width) + crop_width = source_width + crop_height = int(crop_width * self.target_aspect) + + # Ensure crop height fits within source + if crop_height > source_height: + crop_height = source_height + crop_width = int(crop_height / self.target_aspect) + + # Calculate center points for each frame + # Since we now always focus on ONE person directly (not averaging), + # we can use the focus point directly without complex validation + center_xs = [] + center_ys = [] + + for ctx in contexts: + if ctx.primary_focus: + # Primary focus is now always a single person's center, never averaged + # This means it will never be on the table/empty space + center_xs.append(ctx.primary_focus[0]) + center_ys.append(ctx.primary_focus[1]) + else: + # Default to center only if no faces detected at all + center_xs.append(source_width // 2) + center_ys.append(source_height // 2) + + # Smooth the center points + if len(center_xs) > self.smoothing_window: + kernel_size = min(self.smoothing_window, len(center_xs)) + if kernel_size % 2 == 0: + kernel_size -= 1 + + center_xs = signal.medfilt(center_xs, kernel_size=kernel_size).tolist() + center_ys = signal.medfilt(center_ys, kernel_size=kernel_size).tolist() + + # Limit velocity (prevent jarring movements) + center_xs = self._limit_velocity(center_xs, self.max_velocity) + center_ys = self._limit_velocity(center_ys, self.max_velocity) + + # Convert to crop regions + crop_regions = [] + for center_x, center_y in zip(center_xs, center_ys): + # Calculate top-left corner + x = int(center_x - crop_width // 2) + y = int(center_y - crop_height // 2) + + # Clamp to valid bounds + x = max(0, min(x, source_width - crop_width)) + y = max(0, min(y, source_height - crop_height)) + + crop_regions.append(CropRegion( + x=x, + y=y, + width=crop_width, + height=crop_height + )) + + return crop_regions + + def _limit_velocity(self, positions: List[float], max_velocity: float) -> List[float]: + """ + Limit the velocity of position changes. + + Args: + positions: List of positions + max_velocity: Maximum allowed change per frame + + Returns: + Smoothed positions + """ + if len(positions) <= 1: + return positions + + limited = [positions[0]] + + for i in range(1, len(positions)): + delta = positions[i] - limited[i - 1] + if abs(delta) > max_velocity: + delta = max_velocity if delta > 0 else -max_velocity + + limited.append(limited[i - 1] + delta) + + return limited + + def apply_framing( + self, + video_clip: VideoFileClip, + framing_plan: FramingPlan, + use_split_screen: bool = False + ) -> VideoClip: + """ + Apply smart framing to a video clip. + + Args: + video_clip: Source video clip + framing_plan: Framing plan to apply + use_split_screen: Whether to use split screen for multiple people + + Returns: + Reframed video clip + """ + # Handle different layout modes + if framing_plan.layout_mode in ["single", "single_speaker"]: + # Single person or single speaker - use focused single framing + return self._apply_single_framing(video_clip, framing_plan) + elif framing_plan.layout_mode == "dual_split" and use_split_screen: + # Two people in conversation - use split screen + return self._apply_split_screen(video_clip, framing_plan) + elif framing_plan.layout_mode == "grid" and use_split_screen: + # 3+ people - use grid layout + return self._apply_grid_layout(video_clip, framing_plan) + else: + # Fallback to single framing + return self._apply_single_framing(video_clip, framing_plan) + + def _apply_single_framing( + self, + video_clip: VideoFileClip, + framing_plan: FramingPlan + ) -> VideoClip: + """ + Apply single-focus framing (following one person or action). + + Args: + video_clip: Source video clip + framing_plan: Framing plan + + Returns: + Reframed video clip + """ + def make_frame(t): + # Get the original frame + frame = video_clip.get_frame(t) + + # Ensure we have valid crop regions + if not framing_plan.crop_regions: + # Fallback: return center crop + h, w = frame.shape[:2] + crop_h = int(w * self.target_aspect) + crop_w = w + if crop_h > h: + crop_h = h + crop_w = int(h / self.target_aspect) + y = (h - crop_h) // 2 + x = (w - crop_w) // 2 + cropped = frame[y:y + crop_h, x:x + crop_w] + else: + # Calculate exact frame index with decimal precision for interpolation + exact_frame_idx = (t * framing_plan.fps) / self.frame_skip + + # Get the two adjacent analyzed frames + idx_floor = int(exact_frame_idx) + idx_ceil = idx_floor + 1 + + # Interpolation factor (0.0 to 1.0) + alpha = exact_frame_idx - idx_floor + + # Clamp indices to valid range + idx_floor = max(0, min(idx_floor, len(framing_plan.crop_regions) - 1)) + idx_ceil = max(0, min(idx_ceil, len(framing_plan.crop_regions) - 1)) + + # Get crop regions + crop1 = framing_plan.crop_regions[idx_floor] + crop2 = framing_plan.crop_regions[idx_ceil] + + # Linear interpolation between crop regions + x = int(crop1.x * (1 - alpha) + crop2.x * alpha) + y = int(crop1.y * (1 - alpha) + crop2.y * alpha) + width = int(crop1.width * (1 - alpha) + crop2.width * alpha) + height = int(crop1.height * (1 - alpha) + crop2.height * alpha) + + # Ensure crop stays within frame bounds + h, w = frame.shape[:2] + x = max(0, min(x, w - width)) + y = max(0, min(y, h - height)) + width = min(width, w - x) + height = min(height, h - y) + + # Crop the frame + cropped = frame[y:y + height, x:x + width] + + # Resize to target dimensions + resized = cv2.resize( + cropped, + (self.target_width, self.target_height), + interpolation=cv2.INTER_LINEAR + ) + + return resized + + # MoviePy 2.x compatible way to create VideoClip + new_clip = VideoClip(duration=video_clip.duration) + new_clip.size = (self.target_width, self.target_height) + new_clip.frame_function = make_frame + return new_clip + + def _apply_split_screen( + self, + video_clip: VideoFileClip, + framing_plan: FramingPlan + ) -> VideoClip: + """ + Apply split screen for two people. + + Args: + video_clip: Source video clip + framing_plan: Framing plan + + Returns: + Split screen video clip + """ + def make_frame(t): + frame = video_clip.get_frame(t) + # Calculate exact frame index with decimal precision for smooth interpolation + exact_frame_idx = (t * framing_plan.fps) / self.frame_skip + frame_idx = int(exact_frame_idx) + + # Ensure we have valid contexts + if not framing_plan.frame_contexts: + # Fallback to simple center crop + h, w = frame.shape[:2] + crop_h = int(w * self.target_aspect) + crop_w = w + if crop_h > h: + crop_h = h + crop_w = int(h / self.target_aspect) + y = (h - crop_h) // 2 + x = (w - crop_w) // 2 + cropped = frame[y:y + crop_h, x:x + crop_w] + return cv2.resize(cropped, (self.target_width, self.target_height), interpolation=cv2.INTER_LINEAR) + + # Clamp index to valid range + frame_idx = max(0, min(frame_idx, len(framing_plan.frame_contexts) - 1)) + context = framing_plan.frame_contexts[frame_idx] + + # Create output frame + output = np.zeros((self.target_height, self.target_width, 3), dtype=np.uint8) + + if len(context.detected_faces) >= 2: + # Split vertically 50/50 (two columns) + half_width = self.target_width // 2 + + # Select the 2 most relevant faces + # Priority: ALWAYS show active speaker first + most confident other person + if context.active_speakers and len(context.active_speakers) >= 1: + # Get the PRIMARY speaker (most confident among active speakers) + speaker_faces = [context.detected_faces[i] for i in context.active_speakers + if i < len(context.detected_faces)] + + primary_speaker = max(speaker_faces, key=lambda f: f.confidence) + + # Get OTHER faces (not the primary speaker) + other_faces = [f for f in context.detected_faces if f != primary_speaker] + + if len(speaker_faces) >= 2: + # Multiple speakers: show primary + second most confident speaker + other_speakers = [f for f in speaker_faces if f != primary_speaker] + secondary_person = max(other_speakers, key=lambda f: f.confidence) + elif other_faces: + # One speaker: show speaker + most confident other person + secondary_person = max(other_faces, key=lambda f: f.confidence) + else: + # Fallback: only one person detected + secondary_person = primary_speaker + + selected_faces = [primary_speaker, secondary_person] + else: + # No speakers: take 2 most confident faces + selected_faces = sorted(context.detected_faces, key=lambda f: f.confidence, reverse=True)[:2] + + # Sort selected faces by horizontal position for consistent left/right placement + faces = sorted(selected_faces, key=lambda f: f.center_x) + left_face = faces[0] + right_face = faces[1] + + # Process each person's frame + for idx, face in enumerate([left_face, right_face]): + # Calculate crop region focused on this person + # Each person gets half the width, full target aspect ratio (9:16) + # This ensures NO distortion when resizing + + # For split screen: each side is half_width x full_height + # We need to maintain 9:16 aspect for each half + half_width = self.target_width // 2 + half_aspect = self.target_height / half_width # Aspect ratio for half + + # Determine crop size based on face with padding + face_width = max(face.width, frame.shape[1] // 4) # At least 1/4 of frame width + crop_width = int(face_width * 2.5) # Add padding around face + crop_height = int(crop_width * half_aspect) # Maintain correct aspect + + # Ensure crop fits in frame, maintaining aspect ratio + max_crop_width = frame.shape[1] // 2 # Half the source width + max_crop_height = frame.shape[0] # Full source height + + # If crop is too wide, scale down proportionally + if crop_width > max_crop_width: + crop_width = max_crop_width + crop_height = int(crop_width * half_aspect) + + # If crop is too tall, scale down proportionally + if crop_height > max_crop_height: + crop_height = max_crop_height + crop_width = int(crop_height / half_aspect) + + # Center crop on face + x = max(0, face.center_x - crop_width // 2) + y = max(0, face.center_y - crop_height // 2) + + # Clamp to frame boundaries + x = min(x, frame.shape[1] - crop_width) + y = min(y, frame.shape[0] - crop_height) + + # Extract and resize crop + cropped = frame[y:y + crop_height, x:x + crop_width] + resized = cv2.resize( + cropped, + (half_width, self.target_height), + interpolation=cv2.INTER_LINEAR + ) + + # Place in output at appropriate horizontal position + x_offset = idx * half_width + output[:, x_offset:x_offset + half_width] = resized + else: + # Fall back to single framing + if framing_plan.crop_regions: + crop_idx = max(0, min(frame_idx, len(framing_plan.crop_regions) - 1)) + crop = framing_plan.crop_regions[crop_idx] + cropped = frame[crop.y:crop.y + crop.height, crop.x:crop.x + crop.width] + else: + # Fallback to center crop if no crop regions available + h, w = frame.shape[:2] + crop_h = int(w * self.target_aspect) + crop_w = w + if crop_h > h: + crop_h = h + crop_w = int(h / self.target_aspect) + y = (h - crop_h) // 2 + x = (w - crop_w) // 2 + cropped = frame[y:y + crop_h, x:x + crop_w] + output = cv2.resize( + cropped, + (self.target_width, self.target_height), + interpolation=cv2.INTER_LINEAR + ) + + return output + + # MoviePy 2.x compatible way to create VideoClip + new_clip = VideoClip(duration=video_clip.duration) + new_clip.size = (self.target_width, self.target_height) + new_clip.frame_function = make_frame + return new_clip + + def _apply_grid_layout( + self, + video_clip: VideoFileClip, + framing_plan: FramingPlan + ) -> VideoClip: + """ + Apply grid layout for 3+ people. + + Args: + video_clip: Source video clip + framing_plan: Framing plan + + Returns: + Grid layout video clip + """ + def make_frame(t): + frame = video_clip.get_frame(t) + # Calculate exact frame index with decimal precision for smooth interpolation + exact_frame_idx = (t * framing_plan.fps) / self.frame_skip + frame_idx = int(exact_frame_idx) + + # Ensure we have valid contexts + if not framing_plan.frame_contexts: + # Fallback to simple center crop + h, w = frame.shape[:2] + crop_h = int(w * self.target_aspect) + crop_w = w + if crop_h > h: + crop_h = h + crop_w = int(h / self.target_aspect) + y = (h - crop_h) // 2 + x = (w - crop_w) // 2 + cropped = frame[y:y + crop_h, x:x + crop_w] + return cv2.resize(cropped, (self.target_width, self.target_height), interpolation=cv2.INTER_LINEAR) + + # Clamp index to valid range + frame_idx = max(0, min(frame_idx, len(framing_plan.frame_contexts) - 1)) + context = framing_plan.frame_contexts[frame_idx] + + output = np.zeros((self.target_height, self.target_width, 3), dtype=np.uint8) + + num_faces = len(context.detected_faces) + + if num_faces >= 3: + # Create 2x2 grid + cell_width = self.target_width // 2 + cell_height = self.target_height // 2 + + for idx, face in enumerate(context.detected_faces[:4]): + # Calculate grid position + row = idx // 2 + col = idx % 2 + + # Each grid cell maintains aspect ratio (square in this case: cell_width = cell_height) + cell_aspect = cell_height / cell_width + + # Crop around face with correct aspect ratio + crop_width = frame.shape[1] // 2 + crop_height = int(crop_width * cell_aspect) + + # Ensure crop fits in frame, maintaining aspect + max_crop_width = frame.shape[1] // 2 + max_crop_height = frame.shape[0] // 2 + + if crop_width > max_crop_width: + crop_width = max_crop_width + crop_height = int(crop_width * cell_aspect) + + if crop_height > max_crop_height: + crop_height = max_crop_height + crop_width = int(crop_height / cell_aspect) + + # Center crop on face + x = max(0, face.center_x - crop_width // 2) + y = max(0, face.center_y - crop_height // 2) + + # Clamp to frame boundaries + x = min(x, frame.shape[1] - crop_width) + y = min(y, frame.shape[0] - crop_height) + + cropped = frame[y:y + crop_height, x:x + crop_width] + resized = cv2.resize( + cropped, + (cell_width, cell_height), + interpolation=cv2.INTER_LINEAR + ) + + # Place in grid + y_offset = row * cell_height + x_offset = col * cell_width + output[y_offset:y_offset + cell_height, x_offset:x_offset + cell_width] = resized + else: + # Fall back to single framing + if framing_plan.crop_regions: + crop_idx = max(0, min(frame_idx, len(framing_plan.crop_regions) - 1)) + crop = framing_plan.crop_regions[crop_idx] + cropped = frame[crop.y:crop.y + crop.height, crop.x:crop.x + crop.width] + else: + # Fallback to center crop if no crop regions available + h, w = frame.shape[:2] + crop_h = int(w * self.target_aspect) + crop_w = w + if crop_h > h: + crop_h = h + crop_w = int(h / self.target_aspect) + y = (h - crop_h) // 2 + x = (w - crop_w) // 2 + cropped = frame[y:y + crop_h, x:x + crop_w] + output = cv2.resize( + cropped, + (self.target_width, self.target_height), + interpolation=cv2.INTER_LINEAR + ) + + return output + + # MoviePy 2.x compatible way to create VideoClip + new_clip = VideoClip(duration=video_clip.duration) + new_clip.size = (self.target_width, self.target_height) + new_clip.frame_function = make_frame + return new_clip + + +def extract_audio_samples(video_path: str, start_time: float, end_time: float) -> Optional[np.ndarray]: + """ + Extract audio samples from video for speech detection. + + Args: + video_path: Path to video file + start_time: Start time in seconds + end_time: End time in seconds + + Returns: + Audio samples array or None if no audio + """ + try: + from moviepy.audio.io.AudioFileClip import AudioFileClip + + with AudioFileClip(video_path) as audio: + segment = audio.subclipped(start_time, end_time) + fps = getattr(segment, 'fps', 44100) + samples = segment.to_soundarray(fps=fps) + return samples + except Exception as exc: + logger.warning(f"Failed to extract audio: {exc}") + return None diff --git a/video_render/transcription.py b/video_render/transcription.py index a175659..5e748bf 100644 --- a/video_render/transcription.py +++ b/video_render/transcription.py @@ -56,7 +56,14 @@ class TranscriptionService: ) return self._model - def transcribe(self, audio_path: Path) -> TranscriptionResult: + def transcribe(self, audio_path: Path, output_dir: Optional[Path] = None) -> TranscriptionResult: + if output_dir is not None: + existing_transcription = self.load(output_dir) + if existing_transcription is not None: + logger.info("Transcrição já existe em %s, reutilizando...", output_dir) + return existing_transcription + + logger.info("Iniciando transcrição do áudio com FasterWhisper...") model = self._load_model() segments, _ = model.transcribe( str(audio_path),