Spaces:

Digitaljoint
/

ProofCheck

Sleeping

App Files Files Community

Yaz Hobooti commited on Sep 27, 2025

Commit

87425a1

1 Parent(s): ad64d27

Fix barcode detection: remove HAS_BARCODE gate, fix DPI filtering, fix coordinate mapping, prefer ZXing-CPP

Browse files

Files changed (2) hide show

app.py +125 -102
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -961,27 +961,39 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
     return []
 def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
-    variants=[pil, ImageOps.grayscale(pil), _binarize(pil)]
-    # upsample small images with NEAREST to keep bars crisp
-    w,h=pil.size
-    if max(w,h)<1600:
-        up=pil.resize((w*2,h*2), resample=Image.NEAREST)
-        variants += [up, _binarize(up)]
-    for v in variants:
-        # ZXing first (broad coverage), then ZBar, then DMTX, then cv2 QR
-        res = _decode_zxing(v)
-        if res: return res
-        res = _decode_zbar(v)
-        if res: return res
-        res = _decode_dmtx(v)
-        if res: return res
-        res = _decode_cv2_qr(v)
-        if res: return res
-        # try rotations
-        for angle in (90,180,270):
-            r=v.rotate(angle, expand=True)
-            res = _decode_zxing(r) or _decode_zbar(r) or _decode_dmtx(r) or _decode_cv2_qr(r)
-            if res: return res
     return []
 def _pix_to_pil(pix) -> Image.Image:
@@ -1118,73 +1130,69 @@ def compare_pdfs(file_a, file_b):
         # Debug: Print spell check results
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
-        if HAS_BARCODE:
-            # Use new barcode detection from barcode_reader
-            try:
-                codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
-                codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
-                # Convert to old format for compatibility
-                bar_a, info_a = [], []
-                bar_b, info_b = [], []
-                for code in codes_a:
-                    if "error" not in code:
-                        # Create a simple box for visualization (center of polygon)
-                        if "polygon" in code:
-                            pts = np.array(code["polygon"])
-                            x1, y1 = pts.min(axis=0)
-                            x2, y2 = pts.max(axis=0)
-                            box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
-                            bar_a.append(box)
-                            info_a.append({
-                                "type": code.get("type", ""),
-                                "data": code.get("text", ""),
-                                "left": int(x1),
-                                "top": int(y1),
-                                "width": int(x2-x1),
-                                "height": int(y2-y1),
-                                "valid": True,
-                                "page": code.get("page", 0) + 1,
-                                "source": code.get("source", ""),
-                                "engine": code.get("engine", "")
-                            })
-                for code in codes_b:
-                    if "error" not in code:
-                        # Create a simple box for visualization (center of polygon)
-                        if "polygon" in code:
-                            pts = np.array(code["polygon"])
-                            x1, y1 = pts.min(axis=0)
-                            x2, y2 = pts.max(axis=0)
-                            box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
-                            bar_b.append(box)
-                            info_b.append({
-                                "type": code.get("type", ""),
-                                "data": code.get("text", ""),
-                                "left": int(x1),
-                                "top": int(y1),
-                                "width": int(x2-x1),
-                                "height": int(y2-y1),
-                                "valid": True,
-                                "page": code.get("page", 0) + 1,
-                                "source": code.get("source", ""),
-                                "engine": code.get("engine", "")
-                            })
-                # Debug: Print barcode detection results
-                print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
-                print(f"Raw codes_a: {len(codes_a)} items")
-                print(f"Raw codes_b: {len(codes_b)} items")
-                if codes_a:
-                    print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
-                if codes_b:
-                    print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
-            except Exception as e:
-                print(f"Barcode detection error: {e}")
-                bar_a, info_a = [], []
-                bar_b, info_b = [], []
-        else:
             bar_a, info_a = [], []
             bar_b, info_b = [], []
@@ -1419,36 +1427,51 @@ def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tup
         y_offset = 0
         for page_idx in range(num_pages):
             page = doc[page_idx]
             # Compute scale so that rendered width matches target_width when provided
             if target_width:
-                page_width_pts = float(page.rect.width)  # points (72 dpi)
                 scale = max(1.0, target_width / page_width_pts)
             else:
-                # fallback dpi ~600
-                scale = 600.0 / 72.0
             try:
                 pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
             except TypeError:
                 pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
             pil = _pix_to_pil(pix)
             pw, ph = pil.size
             hits = _decode_variants(pil)
             for r in hits:
                 x1 = int(r.get("left", 0))
-                y1 = int(r.get("top", 0)) + y_offset
-                w = int(r.get("width", 0))
-                h = int(r.get("height", 0))
                 x2 = x1 + w
                 y2 = y1 + h
-                b = Box(y1, x1, y2, x2, w * h)
-                # Exclude bottom 115mm for combined image if we know full height; else per-page
-                if image_size and _is_in_excluded_bottom_area(b, image_size[1]):
-                    continue
-                if not image_size and _is_in_excluded_bottom_area(b, ph):
                     continue
-                boxes.append(b)
-                sym, payload = r.get("type", ""), r.get("data", "")
-                infos.append({**r, "valid": _validate(sym, payload), "page": page_idx + 1, "source": f"page@scale{scale:.2f}"})
             y_offset += ph
         doc.close()
     except Exception:

     return []
 def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
+    """
+    Try a few light variants. If we upscale, scale detections back to the original size.
+    We avoid rotations here to keep coordinates aligned with the original image.
+    """
+    variants = []
+    w, h = pil.size
+    # base variants @1.0x
+    variants.append(("orig", pil, 1.0))
+    variants.append(("gray", ImageOps.grayscale(pil).convert("RGB"), 1.0))
+    variants.append(("bin", _binarize(pil).convert("RGB"), 1.0))
+    # upsample small pages, then scale back coords
+    if max(w, h) < 1600:
+        up2 = pil.resize((w*2, h*2), resample=Image.NEAREST)
+        variants.append(("up2", up2, 2.0))
+        variants.append(("up2_bin", _binarize(up2).convert("RGB"), 2.0))
+    for tag, vimg, sc in variants:
+        # Prefer ZXing, then ZBar, then DMTX, then OpenCV-QR
+        res = _decode_zxing(vimg) or _decode_zbar(vimg) or _decode_dmtx(vimg) or _decode_cv2_qr(vimg)
+        if not res:
+            continue
+        # Scale results back to original size when needed
+        if sc != 1.0:
+            for r in res:
+                r["left"]   = int(round(r.get("left", 0) / sc))
+                r["top"]    = int(round(r.get("top",  0) / sc))
+                r["width"]  = int(round(r.get("width", 0) / sc))
+                r["height"] = int(round(r.get("height",0) / sc))
+        return res
     return []
 def _pix_to_pil(pix) -> Image.Image:
         # Debug: Print spell check results
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
+        # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
+        try:
+            codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
+            codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
+            # Convert to old format for compatibility
+            bar_a, info_a = [], []
+            bar_b, info_b = [], []
+            for code in codes_a:
+                if "error" not in code:
+                    # Create a simple box for visualization (center of polygon)
+                    if "polygon" in code:
+                        pts = np.array(code["polygon"])
+                        x1, y1 = pts.min(axis=0)
+                        x2, y2 = pts.max(axis=0)
+                        box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
+                        bar_a.append(box)
+                        info_a.append({
+                            "type": code.get("type", ""),
+                            "data": code.get("text", ""),
+                            "left": int(x1),
+                            "top": int(y1),
+                            "width": int(x2-x1),
+                            "height": int(y2-y1),
+                            "valid": True,
+                            "page": code.get("page", 0) + 1,
+                            "source": code.get("source", ""),
+                            "engine": code.get("engine", "")
+                        })
+            for code in codes_b:
+                if "error" not in code:
+                    # Create a simple box for visualization (center of polygon)
+                    if "polygon" in code:
+                        pts = np.array(code["polygon"])
+                        x1, y1 = pts.min(axis=0)
+                        x2, y2 = pts.max(axis=0)
+                        box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
+                        bar_b.append(box)
+                        info_b.append({
+                            "type": code.get("type", ""),
+                            "data": code.get("text", ""),
+                            "left": int(x1),
+                            "top": int(y1),
+                            "width": int(x2-x1),
+                            "height": int(y2-y1),
+                            "valid": True,
+                            "page": code.get("page", 0) + 1,
+                            "source": code.get("source", ""),
+                            "engine": code.get("engine", "")
+                        })
+            # Debug: Print barcode detection results
+            print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
+            print(f"Raw codes_a: {len(codes_a)} items")
+            print(f"Raw codes_b: {len(codes_b)} items")
+            if codes_a:
+                print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
+            if codes_b:
+                print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
+        except Exception as e:
+            print(f"Barcode detection error: {e}")
             bar_a, info_a = [], []
             bar_b, info_b = [], []
         y_offset = 0
         for page_idx in range(num_pages):
             page = doc[page_idx]
             # Compute scale so that rendered width matches target_width when provided
             if target_width:
+                page_width_pts = float(page.rect.width)  # 72 dpi units
                 scale = max(1.0, target_width / page_width_pts)
             else:
+                scale = 600.0 / 72.0  # ~600 dpi default
             try:
                 pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
             except TypeError:
                 pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
             pil = _pix_to_pil(pix)
             pw, ph = pil.size
+            effective_dpi = 72.0 * scale  # <-- this is the real DPI for this rendered page
             hits = _decode_variants(pil)
             for r in hits:
                 x1 = int(r.get("left", 0))
+                y1 = int(r.get("top", 0))
+                w  = int(r.get("width", 0))
+                h  = int(r.get("height", 0))
                 x2 = x1 + w
                 y2 = y1 + h
+                # Per-page box (before stacking)
+                per_page_box = Box(y1, x1, y2, x2, w*h)
+                # Exclude the bottom 115mm of THIS PAGE using the correct DPI
+                if _is_in_excluded_bottom_area(per_page_box, ph, excluded_height_mm=115.0, dpi=int(effective_dpi)):
                     continue
+                # Map to combined image by adding the current page's y-offset
+                combined_box = Box(y1 + y_offset, x1, y2 + y_offset, x2, w*h)
+                boxes.append(combined_box)
+                sym, payload = r.get("type",""), r.get("data","")
+                infos.append({
+                    **r,
+                    "valid": _validate(sym, payload),
+                    "page": page_idx + 1,
+                    "source": f"page@dpi{int(effective_dpi)}"
+                })
             y_offset += ph
         doc.close()
     except Exception:

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-numpy==1.26.4
 pillow>=10.3
-pymupdf>=1.24.9
 opencv-contrib-python-headless==4.10.0.84
-zxing-cpp==2.2.0
 pdf2image
 gradio
 pytesseract

+zxing-cpp==2.2.0
+pymupdf>=1.24
 pillow>=10.3
+numpy==1.26.4
 opencv-contrib-python-headless==4.10.0.84
 pdf2image
 gradio
 pytesseract