Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@
|
|
| 9 |
- Memory management & cleanup
|
| 10 |
- SDXL / Playground / OpenAI backgrounds
|
| 11 |
- Gradio UI with "CHAPTER" dividers
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
# =============================================================================
|
|
@@ -902,7 +903,7 @@ def process_video_chunks(self, video_path: str, processor_func, **kwargs) -> str
|
|
| 902 |
return self._merge_chunks(processed, fps, width, height)
|
| 903 |
|
| 904 |
# =============================================================================
|
| 905 |
-
# CHAPTER 9: MAIN PIPELINE (SAM2 β MatAnyone β Composite)
|
| 906 |
# =============================================================================
|
| 907 |
def process_video_main(
|
| 908 |
video_path: str,
|
|
@@ -1003,52 +1004,130 @@ def process_video_main(
|
|
| 1003 |
grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
|
| 1004 |
bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 1005 |
|
| 1006 |
-
#
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1014 |
def composite_frame(get_frame, t):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1015 |
frame = get_frame(t).astype(np.float32) / 255.0
|
| 1016 |
-
alpha_t = min(t, max(0.0, (alpha_clip.duration or 0) - 0.01)) if (alpha_clip.duration and alpha_clip.duration > 0) else 0.0
|
| 1017 |
-
a = alpha_clip.get_frame(alpha_t)
|
| 1018 |
-
if a.ndim == 3:
|
| 1019 |
-
a = a[:, :, 0]
|
| 1020 |
-
a = a.astype(np.float32) / 255.0
|
| 1021 |
-
|
| 1022 |
hh, ww = frame.shape[:2]
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
fg_canvas = np.zeros_like(frame, dtype=np.float32)
|
| 1029 |
a_canvas = np.zeros((hh, ww), dtype=np.float32)
|
| 1030 |
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1036 |
if xs1 <= xs0 or ys1 <= ys0:
|
|
|
|
|
|
|
| 1037 |
return (bg_rgb * 255).astype(np.uint8)
|
| 1038 |
|
| 1039 |
-
|
| 1040 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
|
| 1042 |
-
|
| 1043 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1044 |
|
|
|
|
| 1045 |
if feather_px > 0:
|
| 1046 |
-
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1050 |
comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
|
| 1051 |
-
|
|
|
|
|
|
|
| 1052 |
|
| 1053 |
progress(0.7, desc="Compositing")
|
| 1054 |
final_clip = original_clip.fl(composite_frame)
|
|
@@ -1085,6 +1164,7 @@ def composite_frame(get_frame, t):
|
|
| 1085 |
messages.append("β
Done")
|
| 1086 |
stats = memory_manager.get_memory_stats()
|
| 1087 |
messages.append(f"π CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
|
|
|
|
| 1088 |
progress(1.0, desc="Done")
|
| 1089 |
return str(output_path), "\n".join(messages)
|
| 1090 |
|
|
|
|
| 9 |
- Memory management & cleanup
|
| 10 |
- SDXL / Playground / OpenAI backgrounds
|
| 11 |
- Gradio UI with "CHAPTER" dividers
|
| 12 |
+
- FIXED: Enhanced positioning with debug logging and coordinate precision
|
| 13 |
"""
|
| 14 |
|
| 15 |
# =============================================================================
|
|
|
|
| 903 |
return self._merge_chunks(processed, fps, width, height)
|
| 904 |
|
| 905 |
# =============================================================================
|
| 906 |
+
# CHAPTER 9: MAIN PIPELINE (SAM2 β MatAnyone β Composite) - FIXED VERSION
|
| 907 |
# =============================================================================
|
| 908 |
def process_video_main(
|
| 909 |
video_path: str,
|
|
|
|
| 1004 |
grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
|
| 1005 |
bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
|
| 1006 |
|
| 1007 |
+
# FIXED: Enhanced placement parameters with validation and debugging
|
| 1008 |
+
placement = placement or {}
|
| 1009 |
+
px = max(0.0, min(1.0, float(placement.get("x", 0.5))))
|
| 1010 |
+
py = max(0.0, min(1.0, float(placement.get("y", 0.75))))
|
| 1011 |
+
ps = max(0.3, min(2.0, float(placement.get("scale", 1.0))))
|
| 1012 |
+
feather_px = max(0, min(50, int(placement.get("feather", 3))))
|
| 1013 |
+
|
| 1014 |
+
# Debug logging for placement parameters
|
| 1015 |
+
logger.info(f"POSITIONING DEBUG: px={px:.3f}, py={py:.3f}, ps={ps:.3f}, feather={feather_px}")
|
| 1016 |
+
logger.info(f"VIDEO DIMENSIONS: {w}x{h}")
|
| 1017 |
+
logger.info(f"TARGET CENTER: ({int(px * w)}, {int(py * h)})")
|
| 1018 |
+
|
| 1019 |
+
frame_count = 0
|
| 1020 |
def composite_frame(get_frame, t):
|
| 1021 |
+
nonlocal frame_count
|
| 1022 |
+
frame_count += 1
|
| 1023 |
+
|
| 1024 |
+
# Get original frame
|
| 1025 |
frame = get_frame(t).astype(np.float32) / 255.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1026 |
hh, ww = frame.shape[:2]
|
| 1027 |
+
|
| 1028 |
+
# FIXED: Better alpha temporal synchronization
|
| 1029 |
+
alpha_duration = getattr(alpha_clip, 'duration', None)
|
| 1030 |
+
if alpha_duration and alpha_duration > 0:
|
| 1031 |
+
# Ensure we don't go beyond alpha video duration
|
| 1032 |
+
alpha_t = min(t, alpha_duration - 0.01)
|
| 1033 |
+
alpha_t = max(0.0, alpha_t)
|
| 1034 |
+
else:
|
| 1035 |
+
alpha_t = 0.0
|
| 1036 |
+
|
| 1037 |
+
try:
|
| 1038 |
+
a = alpha_clip.get_frame(alpha_t)
|
| 1039 |
+
# Handle multi-channel alpha
|
| 1040 |
+
if a.ndim == 3:
|
| 1041 |
+
a = a[:, :, 0]
|
| 1042 |
+
a = a.astype(np.float32) / 255.0
|
| 1043 |
+
|
| 1044 |
+
# FIXED: Ensure alpha matches frame dimensions exactly
|
| 1045 |
+
if a.shape != (hh, ww):
|
| 1046 |
+
logger.warning(f"Alpha size mismatch: {a.shape} vs {(hh, ww)}, resizing...")
|
| 1047 |
+
a = cv2.resize(a, (ww, hh), interpolation=cv2.INTER_LINEAR)
|
| 1048 |
+
|
| 1049 |
+
except Exception as e:
|
| 1050 |
+
logger.error(f"Alpha frame error at t={t:.3f}: {e}")
|
| 1051 |
+
return (bg_rgb * 255).astype(np.uint8)
|
| 1052 |
|
| 1053 |
+
# FIXED: Calculate scaled dimensions with better rounding
|
| 1054 |
+
sw = max(1, round(ww * ps)) # Use round instead of int for better precision
|
| 1055 |
+
sh = max(1, round(hh * ps))
|
| 1056 |
+
|
| 1057 |
+
# FIXED: Scale both frame and alpha consistently
|
| 1058 |
+
try:
|
| 1059 |
+
fg_scaled = cv2.resize(frame, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
|
| 1060 |
+
a_scaled = cv2.resize(a, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
|
| 1061 |
+
except Exception as e:
|
| 1062 |
+
logger.error(f"Scaling error: {e}")
|
| 1063 |
+
return (bg_rgb * 255).astype(np.uint8)
|
| 1064 |
+
|
| 1065 |
+
# Create canvases
|
| 1066 |
fg_canvas = np.zeros_like(frame, dtype=np.float32)
|
| 1067 |
a_canvas = np.zeros((hh, ww), dtype=np.float32)
|
| 1068 |
|
| 1069 |
+
# FIXED: More precise center calculations
|
| 1070 |
+
cx = round(px * ww)
|
| 1071 |
+
cy = round(py * hh)
|
| 1072 |
+
|
| 1073 |
+
# FIXED: Use floor division for consistent centering
|
| 1074 |
+
x0 = cx - sw // 2
|
| 1075 |
+
y0 = cy - sh // 2
|
| 1076 |
+
|
| 1077 |
+
# Debug logging for first few frames
|
| 1078 |
+
if frame_count <= 3:
|
| 1079 |
+
logger.info(f"FRAME {frame_count}: scaled_size=({sw}, {sh}), center=({cx}, {cy}), top_left=({x0}, {y0})")
|
| 1080 |
+
|
| 1081 |
+
# FIXED: Robust bounds checking with edge case handling
|
| 1082 |
+
xs0 = max(0, x0)
|
| 1083 |
+
ys0 = max(0, y0)
|
| 1084 |
+
xs1 = min(ww, x0 + sw)
|
| 1085 |
+
ys1 = min(hh, y0 + sh)
|
| 1086 |
+
|
| 1087 |
+
# Check for valid placement region
|
| 1088 |
if xs1 <= xs0 or ys1 <= ys0:
|
| 1089 |
+
if frame_count <= 3:
|
| 1090 |
+
logger.warning(f"Subject outside bounds: dest=({xs0},{ys0})-({xs1},{ys1})")
|
| 1091 |
return (bg_rgb * 255).astype(np.uint8)
|
| 1092 |
|
| 1093 |
+
# FIXED: Calculate source region with bounds validation
|
| 1094 |
+
src_x0 = xs0 - x0 # Will be 0 if x0 >= 0, positive if x0 < 0
|
| 1095 |
+
src_y0 = ys0 - y0 # Will be 0 if y0 >= 0, positive if y0 < 0
|
| 1096 |
+
src_x1 = src_x0 + (xs1 - xs0)
|
| 1097 |
+
src_y1 = src_y0 + (ys1 - ys0)
|
| 1098 |
+
|
| 1099 |
+
# Validate source bounds
|
| 1100 |
+
if (src_x1 > sw or src_y1 > sh or src_x0 < 0 or src_y0 < 0 or
|
| 1101 |
+
src_x1 <= src_x0 or src_y1 <= src_y0):
|
| 1102 |
+
if frame_count <= 3:
|
| 1103 |
+
logger.error(f"Invalid source region: ({src_x0},{src_y0})-({src_x1},{src_y1}) for {sw}x{sh} scaled")
|
| 1104 |
+
return (bg_rgb * 255).astype(np.uint8)
|
| 1105 |
|
| 1106 |
+
# FIXED: Safe canvas placement with error handling
|
| 1107 |
+
try:
|
| 1108 |
+
fg_canvas[ys0:ys1, xs0:xs1, :] = fg_scaled[src_y0:src_y1, src_x0:src_x1, :]
|
| 1109 |
+
a_canvas[ys0:ys1, xs0:xs1] = a_scaled[src_y0:src_y1, src_x0:src_x1]
|
| 1110 |
+
except Exception as e:
|
| 1111 |
+
logger.error(f"Canvas placement failed: {e}")
|
| 1112 |
+
logger.error(f"Dest: [{ys0}:{ys1}, {xs0}:{xs1}], Src: [{src_y0}:{src_y1}, {src_x0}:{src_x1}]")
|
| 1113 |
+
return (bg_rgb * 255).astype(np.uint8)
|
| 1114 |
|
| 1115 |
+
# FIXED: Apply feathering with bounds checking
|
| 1116 |
if feather_px > 0:
|
| 1117 |
+
kernel_size = max(3, feather_px * 2 + 1)
|
| 1118 |
+
if kernel_size % 2 == 0:
|
| 1119 |
+
kernel_size += 1 # Ensure odd kernel size
|
| 1120 |
+
try:
|
| 1121 |
+
a_canvas = cv2.GaussianBlur(a_canvas, (kernel_size, kernel_size), feather_px / 3.0)
|
| 1122 |
+
except Exception as e:
|
| 1123 |
+
logger.warning(f"Feathering failed: {e}")
|
| 1124 |
+
|
| 1125 |
+
# FIXED: Composite with proper alpha handling
|
| 1126 |
+
a3 = np.expand_dims(a_canvas, axis=2) # More explicit than [:, :, None]
|
| 1127 |
comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
|
| 1128 |
+
result = np.clip(comp * 255, 0, 255).astype(np.uint8)
|
| 1129 |
+
|
| 1130 |
+
return result
|
| 1131 |
|
| 1132 |
progress(0.7, desc="Compositing")
|
| 1133 |
final_clip = original_clip.fl(composite_frame)
|
|
|
|
| 1164 |
messages.append("β
Done")
|
| 1165 |
stats = memory_manager.get_memory_stats()
|
| 1166 |
messages.append(f"π CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
|
| 1167 |
+
messages.append(f"π― Processed {frame_count} frames with placement ({px:.2f}, {py:.2f}) @ {ps:.2f}x scale")
|
| 1168 |
progress(1.0, desc="Done")
|
| 1169 |
return str(output_path), "\n".join(messages)
|
| 1170 |
|