Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Sep 12

Commit

69ba459

verified ·

1 Parent(s): 8179154

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -33

app.py CHANGED Viewed

@@ -9,6 +9,7 @@
 - Memory management & cleanup
 - SDXL / Playground / OpenAI backgrounds
 - Gradio UI with "CHAPTER" dividers
 """
 # =============================================================================
@@ -902,7 +903,7 @@ def process_video_chunks(self, video_path: str, processor_func, **kwargs) -> str
         return self._merge_chunks(processed, fps, width, height)
 # =============================================================================
-# CHAPTER 9: MAIN PIPELINE (SAM2 → MatAnyone → Composite)
 # =============================================================================
 def process_video_main(
     video_path: str,
@@ -1003,52 +1004,130 @@ def process_video_main(
                 grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
                 bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
-            # Placement defaults
-            px = float((placement or {}).get("x", 0.5))
-            py = float((placement or {}).get("y", 0.75))
-            ps = float((placement or {}).get("scale", 1.0))
-            feather_px = int((placement or {}).get("feather", 3))
-            px = max(0.0, min(1.0, px)); py = max(0.0, min(1.0, py))
-            ps = max(0.3, min(2.0, ps)); feather_px = max(0, min(50, feather_px))
             def composite_frame(get_frame, t):
                 frame = get_frame(t).astype(np.float32) / 255.0
-                alpha_t = min(t, max(0.0, (alpha_clip.duration or 0) - 0.01)) if (alpha_clip.duration and alpha_clip.duration > 0) else 0.0
-                a = alpha_clip.get_frame(alpha_t)
-                if a.ndim == 3:
-                    a = a[:, :, 0]
-                a = a.astype(np.float32) / 255.0
                 hh, ww = frame.shape[:2]
-                sw = max(1, int(ww * ps))
-                sh = max(1, int(hh * ps))
-                fg_scaled = cv2.resize(frame, (sw, sh), interpolation=cv2.INTER_LINEAR)
-                a_scaled = cv2.resize(a, (sw, sh), interpolation=cv2.INTER_LINEAR)
                 fg_canvas = np.zeros_like(frame, dtype=np.float32)
                 a_canvas = np.zeros((hh, ww), dtype=np.float32)
-                cx = int(px * ww); cy = int(py * hh)
-                x0 = int(cx - sw // 2); y0 = int(cy - sh // 2)
-                xs0, ys0 = max(0, x0), max(0, y0)
-                xs1, ys1 = min(ww, x0 + sw), min(hh, y0 + sh)
                 if xs1 <= xs0 or ys1 <= ys0:
                     return (bg_rgb * 255).astype(np.uint8)
-                src_x0 = xs0 - x0; src_y0 = ys0 - y0
-                src_x1 = src_x0 + (xs1 - xs0); src_y1 = src_y0 + (ys1 - ys0)
-                fg_canvas[ys0:ys1, xs0:xs1, :] = fg_scaled[src_y0:src_y1, src_x0:src_x1, :]
-                a_canvas[ys0:ys1, xs0:xs1] = a_scaled[src_y0:src_y1, src_x0:src_x1]
                 if feather_px > 0:
-                    k = (feather_px * 2 + 1)
-                    a_canvas = cv2.GaussianBlur(a_canvas, (k, k), feather_px)
-                a3 = a_canvas[:, :, None]
                 comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
-                return np.clip(comp * 255, 0, 255).astype(np.uint8)
             progress(0.7, desc="Compositing")
             final_clip = original_clip.fl(composite_frame)
@@ -1085,6 +1164,7 @@ def composite_frame(get_frame, t):
             messages.append("✅ Done")
             stats = memory_manager.get_memory_stats()
             messages.append(f"📊 CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
             progress(1.0, desc="Done")
             return str(output_path), "\n".join(messages)

 - Memory management & cleanup
 - SDXL / Playground / OpenAI backgrounds
 - Gradio UI with "CHAPTER" dividers
+- FIXED: Enhanced positioning with debug logging and coordinate precision
 """
 # =============================================================================
         return self._merge_chunks(processed, fps, width, height)
 # =============================================================================
+# CHAPTER 9: MAIN PIPELINE (SAM2 → MatAnyone → Composite) - FIXED VERSION
 # =============================================================================
 def process_video_main(
     video_path: str,
                 grad = _make_vertical_gradient(w, h, (200, 205, 215), (160, 170, 190))
                 bg_rgb = cv2.cvtColor(grad, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+            # FIXED: Enhanced placement parameters with validation and debugging
+            placement = placement or {}
+            px = max(0.0, min(1.0, float(placement.get("x", 0.5))))
+            py = max(0.0, min(1.0, float(placement.get("y", 0.75))))
+            ps = max(0.3, min(2.0, float(placement.get("scale", 1.0))))
+            feather_px = max(0, min(50, int(placement.get("feather", 3))))
+            # Debug logging for placement parameters
+            logger.info(f"POSITIONING DEBUG: px={px:.3f}, py={py:.3f}, ps={ps:.3f}, feather={feather_px}")
+            logger.info(f"VIDEO DIMENSIONS: {w}x{h}")
+            logger.info(f"TARGET CENTER: ({int(px * w)}, {int(py * h)})")
+            frame_count = 0
             def composite_frame(get_frame, t):
+                nonlocal frame_count
+                frame_count += 1
+                # Get original frame
                 frame = get_frame(t).astype(np.float32) / 255.0
                 hh, ww = frame.shape[:2]
+                # FIXED: Better alpha temporal synchronization
+                alpha_duration = getattr(alpha_clip, 'duration', None)
+                if alpha_duration and alpha_duration > 0:
+                    # Ensure we don't go beyond alpha video duration
+                    alpha_t = min(t, alpha_duration - 0.01)
+                    alpha_t = max(0.0, alpha_t)
+                else:
+                    alpha_t = 0.0
+                try:
+                    a = alpha_clip.get_frame(alpha_t)
+                    # Handle multi-channel alpha
+                    if a.ndim == 3:
+                        a = a[:, :, 0]
+                    a = a.astype(np.float32) / 255.0
+                    # FIXED: Ensure alpha matches frame dimensions exactly
+                    if a.shape != (hh, ww):
+                        logger.warning(f"Alpha size mismatch: {a.shape} vs {(hh, ww)}, resizing...")
+                        a = cv2.resize(a, (ww, hh), interpolation=cv2.INTER_LINEAR)
+                except Exception as e:
+                    logger.error(f"Alpha frame error at t={t:.3f}: {e}")
+                    return (bg_rgb * 255).astype(np.uint8)
+                # FIXED: Calculate scaled dimensions with better rounding
+                sw = max(1, round(ww * ps))  # Use round instead of int for better precision
+                sh = max(1, round(hh * ps))
+                # FIXED: Scale both frame and alpha consistently
+                try:
+                    fg_scaled = cv2.resize(frame, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
+                    a_scaled = cv2.resize(a, (sw, sh), interpolation=cv2.INTER_AREA if ps < 1.0 else cv2.INTER_LINEAR)
+                except Exception as e:
+                    logger.error(f"Scaling error: {e}")
+                    return (bg_rgb * 255).astype(np.uint8)
+                # Create canvases
                 fg_canvas = np.zeros_like(frame, dtype=np.float32)
                 a_canvas = np.zeros((hh, ww), dtype=np.float32)
+                # FIXED: More precise center calculations
+                cx = round(px * ww)
+                cy = round(py * hh)
+                # FIXED: Use floor division for consistent centering
+                x0 = cx - sw // 2
+                y0 = cy - sh // 2
+                # Debug logging for first few frames
+                if frame_count <= 3:
+                    logger.info(f"FRAME {frame_count}: scaled_size=({sw}, {sh}), center=({cx}, {cy}), top_left=({x0}, {y0})")
+                # FIXED: Robust bounds checking with edge case handling
+                xs0 = max(0, x0)
+                ys0 = max(0, y0)
+                xs1 = min(ww, x0 + sw)
+                ys1 = min(hh, y0 + sh)
+                # Check for valid placement region
                 if xs1 <= xs0 or ys1 <= ys0:
+                    if frame_count <= 3:
+                        logger.warning(f"Subject outside bounds: dest=({xs0},{ys0})-({xs1},{ys1})")
                     return (bg_rgb * 255).astype(np.uint8)
+                # FIXED: Calculate source region with bounds validation
+                src_x0 = xs0 - x0  # Will be 0 if x0 >= 0, positive if x0 < 0
+                src_y0 = ys0 - y0  # Will be 0 if y0 >= 0, positive if y0 < 0
+                src_x1 = src_x0 + (xs1 - xs0)
+                src_y1 = src_y0 + (ys1 - ys0)
+                # Validate source bounds
+                if (src_x1 > sw or src_y1 > sh or src_x0 < 0 or src_y0 < 0 or
+                    src_x1 <= src_x0 or src_y1 <= src_y0):
+                    if frame_count <= 3:
+                        logger.error(f"Invalid source region: ({src_x0},{src_y0})-({src_x1},{src_y1}) for {sw}x{sh} scaled")
+                    return (bg_rgb * 255).astype(np.uint8)
+                # FIXED: Safe canvas placement with error handling
+                try:
+                    fg_canvas[ys0:ys1, xs0:xs1, :] = fg_scaled[src_y0:src_y1, src_x0:src_x1, :]
+                    a_canvas[ys0:ys1, xs0:xs1] = a_scaled[src_y0:src_y1, src_x0:src_x1]
+                except Exception as e:
+                    logger.error(f"Canvas placement failed: {e}")
+                    logger.error(f"Dest: [{ys0}:{ys1}, {xs0}:{xs1}], Src: [{src_y0}:{src_y1}, {src_x0}:{src_x1}]")
+                    return (bg_rgb * 255).astype(np.uint8)
+                # FIXED: Apply feathering with bounds checking
                 if feather_px > 0:
+                    kernel_size = max(3, feather_px * 2 + 1)
+                    if kernel_size % 2 == 0:
+                        kernel_size += 1  # Ensure odd kernel size
+                    try:
+                        a_canvas = cv2.GaussianBlur(a_canvas, (kernel_size, kernel_size), feather_px / 3.0)
+                    except Exception as e:
+                        logger.warning(f"Feathering failed: {e}")
+                # FIXED: Composite with proper alpha handling
+                a3 = np.expand_dims(a_canvas, axis=2)  # More explicit than [:, :, None]
                 comp = a3 * fg_canvas + (1.0 - a3) * bg_rgb
+                result = np.clip(comp * 255, 0, 255).astype(np.uint8)
+                return result
             progress(0.7, desc="Compositing")
             final_clip = original_clip.fl(composite_frame)
             messages.append("✅ Done")
             stats = memory_manager.get_memory_stats()
             messages.append(f"📊 CPU {stats.cpu_memory_mb:.1f}MB, GPU {stats.gpu_memory_mb:.1f}MB")
+            messages.append(f"🎯 Processed {frame_count} frames with placement ({px:.2f}, {py:.2f}) @ {ps:.2f}x scale")
             progress(1.0, desc="Done")
             return str(output_path), "\n".join(messages)