Realiza varios ajustes para melhorar o tracking e o render de video
This commit is contained in:
@@ -46,21 +46,20 @@ class SmartFramer:
|
||||
self,
|
||||
target_width: int = 1080,
|
||||
target_height: int = 1920,
|
||||
frame_skip: int = 2,
|
||||
smoothing_window: int = 15
|
||||
frame_skip: int = 1,
|
||||
smoothing_window: int = 30,
|
||||
max_velocity: int = 20,
|
||||
person_switch_cooldown: int = 999999
|
||||
):
|
||||
self.target_width = target_width
|
||||
self.target_height = target_height
|
||||
self.target_aspect = target_height / target_width
|
||||
|
||||
# Performance parameters
|
||||
self.frame_skip = frame_skip # Process every Nth frame (CPU optimization)
|
||||
|
||||
# Smoothing parameters
|
||||
self.frame_skip = frame_skip
|
||||
self.smoothing_window = smoothing_window
|
||||
self.max_velocity = 30 # pixels per frame (reduced for smoother transitions)
|
||||
self.max_velocity = max_velocity
|
||||
self.person_switch_cooldown = person_switch_cooldown
|
||||
|
||||
logger.info(f"Smart framer initialized (target: {target_width}x{target_height}, frame_skip={frame_skip})")
|
||||
logger.info(f"Smart framer initialized (target: {target_width}x{target_height}, frame_skip={frame_skip}, smoothing={smoothing_window}, velocity={max_velocity}, cooldown={person_switch_cooldown})")
|
||||
|
||||
def create_framing_plan(
|
||||
self,
|
||||
@@ -81,25 +80,21 @@ class SmartFramer:
|
||||
Returns:
|
||||
FramingPlan with all frame contexts and crop regions
|
||||
"""
|
||||
analyzer = ContextAnalyzer()
|
||||
analyzer = ContextAnalyzer(person_switch_cooldown=self.person_switch_cooldown)
|
||||
|
||||
# Detect speaking periods from audio if available
|
||||
speaking_periods = None
|
||||
if audio_samples is not None:
|
||||
speaking_periods = analyzer.audio_detector.detect_speaking_periods(audio_samples)
|
||||
|
||||
# Open video with error suppression for AV1 codec warnings
|
||||
import os
|
||||
os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'loglevel;quiet'
|
||||
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
|
||||
# Calculate frame range
|
||||
start_frame = int(start_time * fps)
|
||||
end_frame = int(end_time * fps)
|
||||
|
||||
# Set to start frame
|
||||
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
|
||||
|
||||
frame_contexts = []
|
||||
@@ -113,7 +108,6 @@ class SmartFramer:
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Only process every Nth frame for performance (CPU optimization)
|
||||
if processed_count % self.frame_skip == 0:
|
||||
timestamp = frame_number / fps
|
||||
context = analyzer.analyze_frame(frame, timestamp, frame_number, speaking_periods)
|
||||
@@ -122,35 +116,36 @@ class SmartFramer:
|
||||
frame_number += 1
|
||||
processed_count += 1
|
||||
|
||||
# Get video dimensions before releasing capture
|
||||
source_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
source_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
cap.release()
|
||||
analyzer.close()
|
||||
|
||||
# Determine overall layout mode (most common)
|
||||
layout_modes = [ctx.layout_mode for ctx in frame_contexts]
|
||||
if layout_modes:
|
||||
overall_layout = max(set(layout_modes), key=layout_modes.count)
|
||||
else:
|
||||
overall_layout = "single"
|
||||
|
||||
# Calculate crop regions based on contexts
|
||||
|
||||
crop_regions = self._calculate_crop_regions(
|
||||
frame_contexts,
|
||||
source_width,
|
||||
source_height
|
||||
)
|
||||
|
||||
return FramingPlan(
|
||||
framing_plan = FramingPlan(
|
||||
frame_contexts=frame_contexts,
|
||||
crop_regions=crop_regions,
|
||||
layout_mode=overall_layout,
|
||||
fps=fps
|
||||
)
|
||||
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
return framing_plan
|
||||
|
||||
def _calculate_crop_regions(
|
||||
self,
|
||||
contexts: List[FrameContext],
|
||||
@@ -171,66 +166,122 @@ class SmartFramer:
|
||||
if not contexts:
|
||||
return []
|
||||
|
||||
# Calculate ideal crop dimensions maintaining EXACT 9:16 aspect ratio
|
||||
source_aspect = source_width / source_height
|
||||
|
||||
if source_aspect > self.target_aspect:
|
||||
# Source is wider - crop horizontally (use full height)
|
||||
crop_height = source_height
|
||||
crop_width = int(crop_height / self.target_aspect)
|
||||
|
||||
# Ensure crop width fits within source
|
||||
if crop_width > source_width:
|
||||
crop_width = source_width
|
||||
crop_height = int(crop_width * self.target_aspect)
|
||||
else:
|
||||
# Source is taller - crop vertically (use full width)
|
||||
crop_width = source_width
|
||||
crop_height = int(crop_width * self.target_aspect)
|
||||
|
||||
# Ensure crop height fits within source
|
||||
if crop_height > source_height:
|
||||
crop_height = source_height
|
||||
crop_width = int(crop_height / self.target_aspect)
|
||||
|
||||
# Calculate center points for each frame
|
||||
# Since we now always focus on ONE person directly (not averaging),
|
||||
# we can use the focus point directly without complex validation
|
||||
center_xs = []
|
||||
center_ys = []
|
||||
safe_zone_margin_x = crop_width * 0.40
|
||||
safe_zone_margin_y = crop_height * 0.40
|
||||
|
||||
for ctx in contexts:
|
||||
if ctx.primary_focus:
|
||||
# Primary focus is now always a single person's center, never averaged
|
||||
# This means it will never be on the table/empty space
|
||||
center_xs.append(ctx.primary_focus[0])
|
||||
center_ys.append(ctx.primary_focus[1])
|
||||
dead_zone_threshold = 100
|
||||
|
||||
if contexts and contexts[0].primary_focus:
|
||||
current_crop_center_x = contexts[0].primary_focus[0]
|
||||
current_crop_center_y = contexts[0].primary_focus[1]
|
||||
else:
|
||||
current_crop_center_x = source_width // 2
|
||||
current_crop_center_y = source_height // 2
|
||||
|
||||
center_xs = [current_crop_center_x]
|
||||
center_ys = [current_crop_center_y]
|
||||
|
||||
for ctx in contexts[1:]:
|
||||
if ctx.primary_focus and ctx.selected_people and len(ctx.detected_faces) > 0:
|
||||
primary_person_idx = ctx.selected_people[0] if ctx.selected_people else 0
|
||||
if primary_person_idx < len(ctx.detected_faces):
|
||||
face = ctx.detected_faces[primary_person_idx]
|
||||
|
||||
face_left = face.x
|
||||
face_right = face.x + face.width
|
||||
face_top = face.y
|
||||
face_bottom = face.y + face.height
|
||||
|
||||
crop_left = current_crop_center_x - crop_width // 2
|
||||
crop_right = current_crop_center_x + crop_width // 2
|
||||
crop_top = current_crop_center_y - crop_height // 2
|
||||
crop_bottom = current_crop_center_y + crop_height // 2
|
||||
|
||||
face_rel_left = face_left - crop_left
|
||||
face_rel_right = face_right - crop_left
|
||||
face_rel_top = face_top - crop_top
|
||||
face_rel_bottom = face_bottom - crop_top
|
||||
|
||||
face_left_safe = face_rel_left >= safe_zone_margin_x
|
||||
face_right_safe = face_rel_right <= (crop_width - safe_zone_margin_x)
|
||||
face_top_safe = face_rel_top >= safe_zone_margin_y
|
||||
face_bottom_safe = face_rel_bottom <= (crop_height - safe_zone_margin_y)
|
||||
|
||||
face_fully_visible = face_left_safe and face_right_safe and face_top_safe and face_bottom_safe
|
||||
|
||||
if face_fully_visible:
|
||||
center_xs.append(current_crop_center_x)
|
||||
center_ys.append(current_crop_center_y)
|
||||
else:
|
||||
shift_x = 0
|
||||
shift_y = 0
|
||||
|
||||
if not face_left_safe:
|
||||
shift_x = face_rel_left - safe_zone_margin_x
|
||||
elif not face_right_safe:
|
||||
shift_x = face_rel_right - (crop_width - safe_zone_margin_x)
|
||||
|
||||
if not face_top_safe:
|
||||
shift_y = face_rel_top - safe_zone_margin_y
|
||||
elif not face_bottom_safe:
|
||||
shift_y = face_rel_bottom - (crop_height - safe_zone_margin_y)
|
||||
|
||||
if abs(shift_x) > dead_zone_threshold:
|
||||
current_crop_center_x += shift_x
|
||||
if abs(shift_y) > dead_zone_threshold:
|
||||
current_crop_center_y += shift_y
|
||||
|
||||
center_xs.append(current_crop_center_x)
|
||||
center_ys.append(current_crop_center_y)
|
||||
else:
|
||||
center_xs.append(current_crop_center_x)
|
||||
center_ys.append(current_crop_center_y)
|
||||
else:
|
||||
# Default to center only if no faces detected at all
|
||||
center_xs.append(source_width // 2)
|
||||
center_ys.append(source_height // 2)
|
||||
center_xs.append(current_crop_center_x)
|
||||
center_ys.append(current_crop_center_y)
|
||||
|
||||
# Smooth the center points
|
||||
if len(center_xs) > self.smoothing_window:
|
||||
kernel_size = min(self.smoothing_window, len(center_xs))
|
||||
if kernel_size % 2 == 0:
|
||||
kernel_size -= 1
|
||||
if len(center_xs) > 1:
|
||||
alpha = 0.002
|
||||
smoothed_xs = [center_xs[0]]
|
||||
smoothed_ys = [center_ys[0]]
|
||||
for i in range(1, len(center_xs)):
|
||||
if center_xs[i] != center_xs[i-1] or center_ys[i] != center_ys[i-1]:
|
||||
smoothed_xs.append(alpha * center_xs[i] + (1 - alpha) * smoothed_xs[i-1])
|
||||
smoothed_ys.append(alpha * center_ys[i] + (1 - alpha) * smoothed_ys[i-1])
|
||||
else:
|
||||
smoothed_xs.append(smoothed_xs[i-1])
|
||||
smoothed_ys.append(smoothed_ys[i-1])
|
||||
center_xs = smoothed_xs
|
||||
center_ys = smoothed_ys
|
||||
|
||||
center_xs = signal.medfilt(center_xs, kernel_size=kernel_size).tolist()
|
||||
center_ys = signal.medfilt(center_ys, kernel_size=kernel_size).tolist()
|
||||
center_xs = self._limit_velocity(center_xs, 2)
|
||||
center_ys = self._limit_velocity(center_ys, 2)
|
||||
|
||||
# Limit velocity (prevent jarring movements)
|
||||
center_xs = self._limit_velocity(center_xs, self.max_velocity)
|
||||
center_ys = self._limit_velocity(center_ys, self.max_velocity)
|
||||
center_xs = self._apply_dead_zone(center_xs, 5)
|
||||
center_ys = self._apply_dead_zone(center_ys, 5)
|
||||
|
||||
# Convert to crop regions
|
||||
crop_regions = []
|
||||
for center_x, center_y in zip(center_xs, center_ys):
|
||||
# Calculate top-left corner
|
||||
x = int(center_x - crop_width // 2)
|
||||
y = int(center_y - crop_height // 2)
|
||||
|
||||
# Clamp to valid bounds
|
||||
x = max(0, min(x, source_width - crop_width))
|
||||
y = max(0, min(y, source_height - crop_height))
|
||||
|
||||
@@ -241,8 +292,37 @@ class SmartFramer:
|
||||
height=crop_height
|
||||
))
|
||||
|
||||
center_xs.clear()
|
||||
center_ys.clear()
|
||||
|
||||
return crop_regions
|
||||
|
||||
def _apply_dead_zone(self, positions: List[float], threshold: float) -> List[float]:
|
||||
"""
|
||||
Apply dead zone to eliminate micro-movements.
|
||||
If change is smaller than threshold, keep previous position.
|
||||
|
||||
Args:
|
||||
positions: List of positions
|
||||
threshold: Minimum change needed to move (pixels)
|
||||
|
||||
Returns:
|
||||
Positions with dead zone applied
|
||||
"""
|
||||
if len(positions) <= 1:
|
||||
return positions
|
||||
|
||||
filtered = [positions[0]]
|
||||
|
||||
for i in range(1, len(positions)):
|
||||
delta = abs(positions[i] - filtered[i - 1])
|
||||
if delta < threshold:
|
||||
filtered.append(filtered[i - 1])
|
||||
else:
|
||||
filtered.append(positions[i])
|
||||
|
||||
return filtered
|
||||
|
||||
def _limit_velocity(self, positions: List[float], max_velocity: float) -> List[float]:
|
||||
"""
|
||||
Limit the velocity of position changes.
|
||||
@@ -271,33 +351,20 @@ class SmartFramer:
|
||||
def apply_framing(
|
||||
self,
|
||||
video_clip: VideoFileClip,
|
||||
framing_plan: FramingPlan,
|
||||
use_split_screen: bool = False
|
||||
framing_plan: FramingPlan
|
||||
) -> VideoClip:
|
||||
"""
|
||||
Apply smart framing to a video clip.
|
||||
Always uses single-person focus (no split screen).
|
||||
|
||||
Args:
|
||||
video_clip: Source video clip
|
||||
framing_plan: Framing plan to apply
|
||||
use_split_screen: Whether to use split screen for multiple people
|
||||
|
||||
Returns:
|
||||
Reframed video clip
|
||||
"""
|
||||
# Handle different layout modes
|
||||
if framing_plan.layout_mode in ["single", "single_speaker"]:
|
||||
# Single person or single speaker - use focused single framing
|
||||
return self._apply_single_framing(video_clip, framing_plan)
|
||||
elif framing_plan.layout_mode == "dual_split" and use_split_screen:
|
||||
# Two people in conversation - use split screen
|
||||
return self._apply_split_screen(video_clip, framing_plan)
|
||||
elif framing_plan.layout_mode == "grid" and use_split_screen:
|
||||
# 3+ people - use grid layout
|
||||
return self._apply_grid_layout(video_clip, framing_plan)
|
||||
else:
|
||||
# Fallback to single framing
|
||||
return self._apply_single_framing(video_clip, framing_plan)
|
||||
return self._apply_single_framing(video_clip, framing_plan)
|
||||
|
||||
def _apply_single_framing(
|
||||
self,
|
||||
@@ -315,12 +382,9 @@ class SmartFramer:
|
||||
Reframed video clip
|
||||
"""
|
||||
def make_frame(t):
|
||||
# Get the original frame
|
||||
frame = video_clip.get_frame(t)
|
||||
|
||||
# Ensure we have valid crop regions
|
||||
if not framing_plan.crop_regions:
|
||||
# Fallback: return center crop
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
@@ -331,41 +395,32 @@ class SmartFramer:
|
||||
x = (w - crop_w) // 2
|
||||
cropped = frame[y:y + crop_h, x:x + crop_w]
|
||||
else:
|
||||
# Calculate exact frame index with decimal precision for interpolation
|
||||
exact_frame_idx = (t * framing_plan.fps) / self.frame_skip
|
||||
|
||||
# Get the two adjacent analyzed frames
|
||||
idx_floor = int(exact_frame_idx)
|
||||
idx_ceil = idx_floor + 1
|
||||
|
||||
# Interpolation factor (0.0 to 1.0)
|
||||
alpha = exact_frame_idx - idx_floor
|
||||
|
||||
# Clamp indices to valid range
|
||||
idx_floor = max(0, min(idx_floor, len(framing_plan.crop_regions) - 1))
|
||||
idx_ceil = max(0, min(idx_ceil, len(framing_plan.crop_regions) - 1))
|
||||
|
||||
# Get crop regions
|
||||
crop1 = framing_plan.crop_regions[idx_floor]
|
||||
crop2 = framing_plan.crop_regions[idx_ceil]
|
||||
|
||||
# Linear interpolation between crop regions
|
||||
x = int(crop1.x * (1 - alpha) + crop2.x * alpha)
|
||||
y = int(crop1.y * (1 - alpha) + crop2.y * alpha)
|
||||
width = int(crop1.width * (1 - alpha) + crop2.width * alpha)
|
||||
height = int(crop1.height * (1 - alpha) + crop2.height * alpha)
|
||||
|
||||
# Ensure crop stays within frame bounds
|
||||
h, w = frame.shape[:2]
|
||||
x = max(0, min(x, w - width))
|
||||
y = max(0, min(y, h - height))
|
||||
width = min(width, w - x)
|
||||
height = min(height, h - y)
|
||||
|
||||
# Crop the frame
|
||||
cropped = frame[y:y + height, x:x + width]
|
||||
|
||||
# Resize to target dimensions
|
||||
resized = cv2.resize(
|
||||
cropped,
|
||||
(self.target_width, self.target_height),
|
||||
@@ -374,7 +429,6 @@ class SmartFramer:
|
||||
|
||||
return resized
|
||||
|
||||
# MoviePy 2.x compatible way to create VideoClip
|
||||
new_clip = VideoClip(duration=video_clip.duration)
|
||||
new_clip.size = (self.target_width, self.target_height)
|
||||
new_clip.frame_function = make_frame
|
||||
@@ -397,13 +451,10 @@ class SmartFramer:
|
||||
"""
|
||||
def make_frame(t):
|
||||
frame = video_clip.get_frame(t)
|
||||
# Calculate exact frame index with decimal precision for smooth interpolation
|
||||
exact_frame_idx = (t * framing_plan.fps) / self.frame_skip
|
||||
frame_idx = int(exact_frame_idx)
|
||||
|
||||
# Ensure we have valid contexts
|
||||
if not framing_plan.frame_contexts:
|
||||
# Fallback to simple center crop
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
@@ -415,107 +466,81 @@ class SmartFramer:
|
||||
cropped = frame[y:y + crop_h, x:x + crop_w]
|
||||
return cv2.resize(cropped, (self.target_width, self.target_height), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Clamp index to valid range
|
||||
frame_idx = max(0, min(frame_idx, len(framing_plan.frame_contexts) - 1))
|
||||
context = framing_plan.frame_contexts[frame_idx]
|
||||
|
||||
# Create output frame
|
||||
output = np.zeros((self.target_height, self.target_width, 3), dtype=np.uint8)
|
||||
|
||||
if len(context.detected_faces) >= 2:
|
||||
# Split vertically 50/50 (two columns)
|
||||
half_width = self.target_width // 2
|
||||
if context.selected_people and len(context.selected_people) >= 2:
|
||||
selected_faces = [context.detected_faces[i] for i in context.selected_people[:2]
|
||||
if i < len(context.detected_faces)]
|
||||
|
||||
# Select the 2 most relevant faces
|
||||
# Priority: ALWAYS show active speaker first + most confident other person
|
||||
if context.active_speakers and len(context.active_speakers) >= 1:
|
||||
# Get the PRIMARY speaker (most confident among active speakers)
|
||||
speaker_faces = [context.detected_faces[i] for i in context.active_speakers
|
||||
if i < len(context.detected_faces)]
|
||||
if len(selected_faces) >= 2:
|
||||
faces = sorted(selected_faces, key=lambda f: f.center_x)
|
||||
left_face = faces[0]
|
||||
right_face = faces[1]
|
||||
|
||||
primary_speaker = max(speaker_faces, key=lambda f: f.confidence)
|
||||
for idx, face in enumerate([left_face, right_face]):
|
||||
|
||||
# Get OTHER faces (not the primary speaker)
|
||||
other_faces = [f for f in context.detected_faces if f != primary_speaker]
|
||||
half_width = self.target_width // 2
|
||||
half_aspect = self.target_height / half_width # Aspect ratio for half
|
||||
|
||||
if len(speaker_faces) >= 2:
|
||||
# Multiple speakers: show primary + second most confident speaker
|
||||
other_speakers = [f for f in speaker_faces if f != primary_speaker]
|
||||
secondary_person = max(other_speakers, key=lambda f: f.confidence)
|
||||
elif other_faces:
|
||||
# One speaker: show speaker + most confident other person
|
||||
secondary_person = max(other_faces, key=lambda f: f.confidence)
|
||||
else:
|
||||
# Fallback: only one person detected
|
||||
secondary_person = primary_speaker
|
||||
face_width = max(face.width, frame.shape[1] // 4) # At least 1/4 of frame width
|
||||
crop_width = int(face_width * 2.5) # Add padding around face
|
||||
crop_height = int(crop_width * half_aspect) # Maintain correct aspect
|
||||
|
||||
selected_faces = [primary_speaker, secondary_person]
|
||||
max_crop_width = frame.shape[1] // 2 # Half the source width
|
||||
max_crop_height = frame.shape[0] # Full source height
|
||||
|
||||
if crop_width > max_crop_width:
|
||||
crop_width = max_crop_width
|
||||
crop_height = int(crop_width * half_aspect)
|
||||
|
||||
if crop_height > max_crop_height:
|
||||
crop_height = max_crop_height
|
||||
crop_width = int(crop_height / half_aspect)
|
||||
|
||||
x = max(0, face.center_x - crop_width // 2)
|
||||
y = max(0, face.center_y - crop_height // 2)
|
||||
|
||||
x = min(x, frame.shape[1] - crop_width)
|
||||
y = min(y, frame.shape[0] - crop_height)
|
||||
|
||||
cropped = frame[y:y + crop_height, x:x + crop_width]
|
||||
resized = cv2.resize(
|
||||
cropped,
|
||||
(half_width, self.target_height),
|
||||
interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
x_offset = idx * half_width
|
||||
output[:, x_offset:x_offset + half_width] = resized
|
||||
else:
|
||||
# No speakers: take 2 most confident faces
|
||||
selected_faces = sorted(context.detected_faces, key=lambda f: f.confidence, reverse=True)[:2]
|
||||
|
||||
# Sort selected faces by horizontal position for consistent left/right placement
|
||||
faces = sorted(selected_faces, key=lambda f: f.center_x)
|
||||
left_face = faces[0]
|
||||
right_face = faces[1]
|
||||
|
||||
# Process each person's frame
|
||||
for idx, face in enumerate([left_face, right_face]):
|
||||
# Calculate crop region focused on this person
|
||||
# Each person gets half the width, full target aspect ratio (9:16)
|
||||
# This ensures NO distortion when resizing
|
||||
|
||||
# For split screen: each side is half_width x full_height
|
||||
# We need to maintain 9:16 aspect for each half
|
||||
half_width = self.target_width // 2
|
||||
half_aspect = self.target_height / half_width # Aspect ratio for half
|
||||
|
||||
# Determine crop size based on face with padding
|
||||
face_width = max(face.width, frame.shape[1] // 4) # At least 1/4 of frame width
|
||||
crop_width = int(face_width * 2.5) # Add padding around face
|
||||
crop_height = int(crop_width * half_aspect) # Maintain correct aspect
|
||||
|
||||
# Ensure crop fits in frame, maintaining aspect ratio
|
||||
max_crop_width = frame.shape[1] // 2 # Half the source width
|
||||
max_crop_height = frame.shape[0] # Full source height
|
||||
|
||||
# If crop is too wide, scale down proportionally
|
||||
if crop_width > max_crop_width:
|
||||
crop_width = max_crop_width
|
||||
crop_height = int(crop_width * half_aspect)
|
||||
|
||||
# If crop is too tall, scale down proportionally
|
||||
if crop_height > max_crop_height:
|
||||
crop_height = max_crop_height
|
||||
crop_width = int(crop_height / half_aspect)
|
||||
|
||||
# Center crop on face
|
||||
x = max(0, face.center_x - crop_width // 2)
|
||||
y = max(0, face.center_y - crop_height // 2)
|
||||
|
||||
# Clamp to frame boundaries
|
||||
x = min(x, frame.shape[1] - crop_width)
|
||||
y = min(y, frame.shape[0] - crop_height)
|
||||
|
||||
# Extract and resize crop
|
||||
cropped = frame[y:y + crop_height, x:x + crop_width]
|
||||
resized = cv2.resize(
|
||||
if framing_plan.crop_regions:
|
||||
crop_idx = max(0, min(frame_idx, len(framing_plan.crop_regions) - 1))
|
||||
crop = framing_plan.crop_regions[crop_idx]
|
||||
cropped = frame[crop.y:crop.y + crop.height, crop.x:crop.x + crop.width]
|
||||
else:
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
if crop_h > h:
|
||||
crop_h = h
|
||||
crop_w = int(h / self.target_aspect)
|
||||
y = (h - crop_h) // 2
|
||||
x = (w - crop_w) // 2
|
||||
cropped = frame[y:y + crop_h, x:x + crop_w]
|
||||
output = cv2.resize(
|
||||
cropped,
|
||||
(half_width, self.target_height),
|
||||
(self.target_width, self.target_height),
|
||||
interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
# Place in output at appropriate horizontal position
|
||||
x_offset = idx * half_width
|
||||
output[:, x_offset:x_offset + half_width] = resized
|
||||
else:
|
||||
# Fall back to single framing
|
||||
if framing_plan.crop_regions:
|
||||
crop_idx = max(0, min(frame_idx, len(framing_plan.crop_regions) - 1))
|
||||
crop = framing_plan.crop_regions[crop_idx]
|
||||
cropped = frame[crop.y:crop.y + crop.height, crop.x:crop.x + crop.width]
|
||||
else:
|
||||
# Fallback to center crop if no crop regions available
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
@@ -533,7 +558,6 @@ class SmartFramer:
|
||||
|
||||
return output
|
||||
|
||||
# MoviePy 2.x compatible way to create VideoClip
|
||||
new_clip = VideoClip(duration=video_clip.duration)
|
||||
new_clip.size = (self.target_width, self.target_height)
|
||||
new_clip.frame_function = make_frame
|
||||
@@ -556,13 +580,10 @@ class SmartFramer:
|
||||
"""
|
||||
def make_frame(t):
|
||||
frame = video_clip.get_frame(t)
|
||||
# Calculate exact frame index with decimal precision for smooth interpolation
|
||||
exact_frame_idx = (t * framing_plan.fps) / self.frame_skip
|
||||
frame_idx = int(exact_frame_idx)
|
||||
|
||||
# Ensure we have valid contexts
|
||||
if not framing_plan.frame_contexts:
|
||||
# Fallback to simple center crop
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
@@ -574,7 +595,6 @@ class SmartFramer:
|
||||
cropped = frame[y:y + crop_h, x:x + crop_w]
|
||||
return cv2.resize(cropped, (self.target_width, self.target_height), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Clamp index to valid range
|
||||
frame_idx = max(0, min(frame_idx, len(framing_plan.frame_contexts) - 1))
|
||||
context = framing_plan.frame_contexts[frame_idx]
|
||||
|
||||
@@ -583,23 +603,18 @@ class SmartFramer:
|
||||
num_faces = len(context.detected_faces)
|
||||
|
||||
if num_faces >= 3:
|
||||
# Create 2x2 grid
|
||||
cell_width = self.target_width // 2
|
||||
cell_height = self.target_height // 2
|
||||
|
||||
for idx, face in enumerate(context.detected_faces[:4]):
|
||||
# Calculate grid position
|
||||
row = idx // 2
|
||||
col = idx % 2
|
||||
|
||||
# Each grid cell maintains aspect ratio (square in this case: cell_width = cell_height)
|
||||
cell_aspect = cell_height / cell_width
|
||||
|
||||
# Crop around face with correct aspect ratio
|
||||
crop_width = frame.shape[1] // 2
|
||||
crop_height = int(crop_width * cell_aspect)
|
||||
|
||||
# Ensure crop fits in frame, maintaining aspect
|
||||
max_crop_width = frame.shape[1] // 2
|
||||
max_crop_height = frame.shape[0] // 2
|
||||
|
||||
@@ -611,11 +626,9 @@ class SmartFramer:
|
||||
crop_height = max_crop_height
|
||||
crop_width = int(crop_height / cell_aspect)
|
||||
|
||||
# Center crop on face
|
||||
x = max(0, face.center_x - crop_width // 2)
|
||||
y = max(0, face.center_y - crop_height // 2)
|
||||
|
||||
# Clamp to frame boundaries
|
||||
x = min(x, frame.shape[1] - crop_width)
|
||||
y = min(y, frame.shape[0] - crop_height)
|
||||
|
||||
@@ -626,18 +639,15 @@ class SmartFramer:
|
||||
interpolation=cv2.INTER_LINEAR
|
||||
)
|
||||
|
||||
# Place in grid
|
||||
y_offset = row * cell_height
|
||||
x_offset = col * cell_width
|
||||
output[y_offset:y_offset + cell_height, x_offset:x_offset + cell_width] = resized
|
||||
else:
|
||||
# Fall back to single framing
|
||||
if framing_plan.crop_regions:
|
||||
crop_idx = max(0, min(frame_idx, len(framing_plan.crop_regions) - 1))
|
||||
crop = framing_plan.crop_regions[crop_idx]
|
||||
cropped = frame[crop.y:crop.y + crop.height, crop.x:crop.x + crop.width]
|
||||
else:
|
||||
# Fallback to center crop if no crop regions available
|
||||
h, w = frame.shape[:2]
|
||||
crop_h = int(w * self.target_aspect)
|
||||
crop_w = w
|
||||
@@ -655,7 +665,6 @@ class SmartFramer:
|
||||
|
||||
return output
|
||||
|
||||
# MoviePy 2.x compatible way to create VideoClip
|
||||
new_clip = VideoClip(duration=video_clip.duration)
|
||||
new_clip.size = (self.target_width, self.target_height)
|
||||
new_clip.frame_function = make_frame
|
||||
|
||||
Reference in New Issue
Block a user