MogensR commited on
Commit
69ba459
Β·
verified Β·
1 Parent(s): 8179154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -33
app.py CHANGED
@@ -9,6 +9,7 @@
9
  - Memory management & cleanup
10
  - SDXL / Playground / OpenAI backgrounds
11
  - Gradio UI with "CHAPTER" dividers
 
12
  """
13
 
14
  # =============================================================================
@@ -902,7 +903,7 @@ def process_video_chunks(self, video_path: str, processor_func, **kwargs) -> str
902
  return self._merge_chunks(processed, fps, width, height)
903
 
904
  # =============================================================================
905
- # CHAPTER 9: MAIN PIPELINE (SAM2 β†’ MatAnyone β†’ Composite)
906
  # =============================================================================
907
  def process_video_main(
908
  video_path: str,
@@ -1003,52 +1004,130 @@ def process_video_main(
1003
  grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
1004
  bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
1005
 
1006
- # Placement defaults
1007
- px = float((placement or {}).get("x", 0.5))
1008
- py = float((placement or {}).get("y", 0.75))
1009
- ps = float((placement or {}).get("scale", 1.0))
1010
- feather_px = int((placement or {}).get("feather", 3))
1011
- px = max(0.0, min(1.0, px)); py = max(0.0, min(1.0, py))
1012
- ps = max(0.3, min(2.0, ps)); feather_px = max(0, min(50, feather_px))
1013
-
 
 
 
 
 
1014
  def composite_frame(get_frame, t):
 
 
 
 
1015
  frame = get_frame(t).astype(np.float32) / 255.0
1016
- alpha_t = min(t, max(0.0, (alpha_clip.duration or 0) - 0.01)) if (alpha_clip.duration and alpha_clip.duration > 0) else 0.0
1017
- a = alpha_clip.get_frame(alpha_t)
1018
- if a.ndim == 3:
1019
- a = a[:, :, 0]
1020
- a = a.astype(np.float32) / 255.0
1021
-
1022
  hh, ww = frame.shape[:2]
1023
- sw = max(1, int(ww * ps))
1024
- sh = max(1, int(hh * ps))
1025
- fg_scaled = cv2.resize(frame, (sw, sh), interpolation=cv2.INTER_LINEAR)
1026
- a_scaled = cv2.resize(a, (sw, sh), interpolation=cv2.INTER_LINEAR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028
  fg_canvas = np.zeros_like(frame, dtype=np.float32)
1029
  a_canvas = np.zeros((hh, ww), dtype=np.float32)
1030
 
1031
- cx = int(px * ww); cy = int(py * hh)
1032
- x0 = int(cx - sw // 2); y0 = int(cy - sh // 2)
1033
-
1034
- xs0, ys0 = max(0, x0), max(0, y0)
1035
- xs1, ys1 = min(ww, x0 + sw), min(hh, y0 + sh)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1036
  if xs1 <= xs0 or ys1 <= ys0:
 
 
1037
  return (bg_rgb * 255).astype(np.uint8)
1038
 
1039
- src_x0 = xs0 - x0; src_y0 = ys0 - y0
1040
- src_x1 = src_x0 + (xs1 - xs0); src_y1 = src_y0 + (ys1 - ys0)
 
 
 
 
 
 
 
 
 
 
1041
 
1042
- fg_canvas[ys0:ys1, xs0:xs1, :] = fg_scaled[src_y0:src_y1, src_x0:src_x1, :]
1043
- a_canvas[ys0:ys1, xs0:xs1] = a_scaled[src_y0:src_y1, src_x0:src_x1]
 
 
 
 
 
 
1044
 
 
1045
  if feather_px > 0:
1046
- k = (feather_px * 2 + 1)
1047
- a_canvas = cv2.GaussianBlur(a_canvas, (k, k), feather_px)
1048
-
1049
- a3 = a_canvas[:, :, None]
 
 
 
 
 
 
1050
  comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
1051
- return np.clip(comp * 255, 0, 255).astype(np.uint8)
 
 
1052
 
1053
  progress(0.7, desc="Compositing")
1054
  final_clip = original_clip.fl(composite_frame)
@@ -1085,6 +1164,7 @@ def composite_frame(get_frame, t):
1085
  messages.append("βœ… Done")
1086
  stats = memory_manager.get_memory_stats()
1087
  messages.append(f"πŸ“Š CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
 
1088
  progress(1.0, desc="Done")
1089
  return str(output_path), "\n".join(messages)
1090
 
 
9
  - Memory management & cleanup
10
  - SDXL / Playground / OpenAI backgrounds
11
  - Gradio UI with "CHAPTER" dividers
12
+ - FIXED: Enhanced positioning with debug logging and coordinate precision
13
  """
14
 
15
  # =============================================================================
 
903
  return self._merge_chunks(processed, fps, width, height)
904
 
905
  # =============================================================================
906
+ # CHAPTER 9: MAIN PIPELINE (SAM2 β†’ MatAnyone β†’ Composite) - FIXED VERSION
907
  # =============================================================================
908
  def process_video_main(
909
  video_path: str,
 
1004
  grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
1005
  bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
1006
 
1007
+ # FIXED: Enhanced placement parameters with validation and debugging
1008
+ placement = placement or {}
1009
+ px = max(0.0, min(1.0, float(placement.get("x", 0.5))))
1010
+ py = max(0.0, min(1.0, float(placement.get("y", 0.75))))
1011
+ ps = max(0.3, min(2.0, float(placement.get("scale", 1.0))))
1012
+ feather_px = max(0, min(50, int(placement.get("feather", 3))))
1013
+
1014
+ # Debug logging for placement parameters
1015
+ logger.info(f"POSITIONING DEBUG: px={px:.3f}, py={py:.3f}, ps={ps:.3f}, feather={feather_px}")
1016
+ logger.info(f"VIDEO DIMENSIONS: {w}x{h}")
1017
+ logger.info(f"TARGET CENTER: ({int(px * w)}, {int(py * h)})")
1018
+
1019
+ frame_count = 0
1020
  def composite_frame(get_frame, t):
1021
+ nonlocal frame_count
1022
+ frame_count += 1
1023
+
1024
+ # Get original frame
1025
  frame = get_frame(t).astype(np.float32) / 255.0
 
 
 
 
 
 
1026
  hh, ww = frame.shape[:2]
1027
+
1028
+ # FIXED: Better alpha temporal synchronization
1029
+ alpha_duration = getattr(alpha_clip, 'duration', None)
1030
+ if alpha_duration and alpha_duration > 0:
1031
+ # Ensure we don't go beyond alpha video duration
1032
+ alpha_t = min(t, alpha_duration - 0.01)
1033
+ alpha_t = max(0.0, alpha_t)
1034
+ else:
1035
+ alpha_t = 0.0
1036
+
1037
+ try:
1038
+ a = alpha_clip.get_frame(alpha_t)
1039
+ # Handle multi-channel alpha
1040
+ if a.ndim == 3:
1041
+ a = a[:, :, 0]
1042
+ a = a.astype(np.float32) / 255.0
1043
+
1044
+ # FIXED: Ensure alpha matches frame dimensions exactly
1045
+ if a.shape != (hh, ww):
1046
+ logger.warning(f"Alpha size mismatch: {a.shape} vs {(hh, ww)}, resizing...")
1047
+ a = cv2.resize(a, (ww, hh), interpolation=cv2.INTER_LINEAR)
1048
+
1049
+ except Exception as e:
1050
+ logger.error(f"Alpha frame error at t={t:.3f}: {e}")
1051
+ return (bg_rgb * 255).astype(np.uint8)
1052
 
1053
+ # FIXED: Calculate scaled dimensions with better rounding
1054
+ sw = max(1, round(ww * ps)) # Use round instead of int for better precision
1055
+ sh = max(1, round(hh * ps))
1056
+
1057
+ # FIXED: Scale both frame and alpha consistently
1058
+ try:
1059
+ fg_scaled = cv2.resize(frame, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
1060
+ a_scaled = cv2.resize(a, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
1061
+ except Exception as e:
1062
+ logger.error(f"Scaling error: {e}")
1063
+ return (bg_rgb * 255).astype(np.uint8)
1064
+
1065
+ # Create canvases
1066
  fg_canvas = np.zeros_like(frame, dtype=np.float32)
1067
  a_canvas = np.zeros((hh, ww), dtype=np.float32)
1068
 
1069
+ # FIXED: More precise center calculations
1070
+ cx = round(px * ww)
1071
+ cy = round(py * hh)
1072
+
1073
+ # FIXED: Use floor division for consistent centering
1074
+ x0 = cx - sw // 2
1075
+ y0 = cy - sh // 2
1076
+
1077
+ # Debug logging for first few frames
1078
+ if frame_count <= 3:
1079
+ logger.info(f"FRAME {frame_count}: scaled_size=({sw}, {sh}), center=({cx}, {cy}), top_left=({x0}, {y0})")
1080
+
1081
+ # FIXED: Robust bounds checking with edge case handling
1082
+ xs0 = max(0, x0)
1083
+ ys0 = max(0, y0)
1084
+ xs1 = min(ww, x0 + sw)
1085
+ ys1 = min(hh, y0 + sh)
1086
+
1087
+ # Check for valid placement region
1088
  if xs1 <= xs0 or ys1 <= ys0:
1089
+ if frame_count <= 3:
1090
+ logger.warning(f"Subject outside bounds: dest=({xs0},{ys0})-({xs1},{ys1})")
1091
  return (bg_rgb * 255).astype(np.uint8)
1092
 
1093
+ # FIXED: Calculate source region with bounds validation
1094
+ src_x0 = xs0 - x0 # Will be 0 if x0 >= 0, positive if x0 < 0
1095
+ src_y0 = ys0 - y0 # Will be 0 if y0 >= 0, positive if y0 < 0
1096
+ src_x1 = src_x0 + (xs1 - xs0)
1097
+ src_y1 = src_y0 + (ys1 - ys0)
1098
+
1099
+ # Validate source bounds
1100
+ if (src_x1 > sw or src_y1 > sh or src_x0 < 0 or src_y0 < 0 or
1101
+ src_x1 <= src_x0 or src_y1 <= src_y0):
1102
+ if frame_count <= 3:
1103
+ logger.error(f"Invalid source region: ({src_x0},{src_y0})-({src_x1},{src_y1}) for {sw}x{sh} scaled")
1104
+ return (bg_rgb * 255).astype(np.uint8)
1105
 
1106
+ # FIXED: Safe canvas placement with error handling
1107
+ try:
1108
+ fg_canvas[ys0:ys1, xs0:xs1, :] = fg_scaled[src_y0:src_y1, src_x0:src_x1, :]
1109
+ a_canvas[ys0:ys1, xs0:xs1] = a_scaled[src_y0:src_y1, src_x0:src_x1]
1110
+ except Exception as e:
1111
+ logger.error(f"Canvas placement failed: {e}")
1112
+ logger.error(f"Dest: [{ys0}:{ys1}, {xs0}:{xs1}], Src: [{src_y0}:{src_y1}, {src_x0}:{src_x1}]")
1113
+ return (bg_rgb * 255).astype(np.uint8)
1114
 
1115
+ # FIXED: Apply feathering with bounds checking
1116
  if feather_px > 0:
1117
+ kernel_size = max(3, feather_px * 2 + 1)
1118
+ if kernel_size % 2 == 0:
1119
+ kernel_size += 1 # Ensure odd kernel size
1120
+ try:
1121
+ a_canvas = cv2.GaussianBlur(a_canvas, (kernel_size, kernel_size), feather_px / 3.0)
1122
+ except Exception as e:
1123
+ logger.warning(f"Feathering failed: {e}")
1124
+
1125
+ # FIXED: Composite with proper alpha handling
1126
+ a3 = np.expand_dims(a_canvas, axis=2) # More explicit than [:, :, None]
1127
  comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
1128
+ result = np.clip(comp * 255, 0, 255).astype(np.uint8)
1129
+
1130
+ return result
1131
 
1132
  progress(0.7, desc="Compositing")
1133
  final_clip = original_clip.fl(composite_frame)
 
1164
  messages.append("βœ… Done")
1165
  stats = memory_manager.get_memory_stats()
1166
  messages.append(f"πŸ“Š CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
1167
+ messages.append(f"🎯 Processed {frame_count} frames with placement ({px:.2f}, {py:.2f}) @ {ps:.2f}x scale")
1168
  progress(1.0, desc="Done")
1169
  return str(output_path), "\n".join(messages)
1170