Yaz Hobooti commited on
Commit
87425a1
·
1 Parent(s): ad64d27

Fix barcode detection: remove HAS_BARCODE gate, fix DPI filtering, fix coordinate mapping, prefer ZXing-CPP

Browse files
Files changed (2) hide show
  1. app.py +125 -102
  2. requirements.txt +3 -3
app.py CHANGED
@@ -961,27 +961,39 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
961
  return []
962
 
963
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
964
- variants=[pil, ImageOps.grayscale(pil), _binarize(pil)]
965
- # upsample small images with NEAREST to keep bars crisp
966
- w,h=pil.size
967
- if max(w,h)<1600:
968
- up=pil.resize((w*2,h*2), resample=Image.NEAREST)
969
- variants += [up, _binarize(up)]
970
- for v in variants:
971
- # ZXing first (broad coverage), then ZBar, then DMTX, then cv2 QR
972
- res = _decode_zxing(v)
973
- if res: return res
974
- res = _decode_zbar(v)
975
- if res: return res
976
- res = _decode_dmtx(v)
977
- if res: return res
978
- res = _decode_cv2_qr(v)
979
- if res: return res
980
- # try rotations
981
- for angle in (90,180,270):
982
- r=v.rotate(angle, expand=True)
983
- res = _decode_zxing(r) or _decode_zbar(r) or _decode_dmtx(r) or _decode_cv2_qr(r)
984
- if res: return res
 
 
 
 
 
 
 
 
 
 
 
 
985
  return []
986
 
987
  def _pix_to_pil(pix) -> Image.Image:
@@ -1118,73 +1130,69 @@ def compare_pdfs(file_a, file_b):
1118
  # Debug: Print spell check results
1119
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1120
 
1121
- if HAS_BARCODE:
1122
- # Use new barcode detection from barcode_reader
1123
- try:
1124
- codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
1125
- codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
1126
-
1127
- # Convert to old format for compatibility
1128
- bar_a, info_a = [], []
1129
- bar_b, info_b = [], []
1130
-
1131
- for code in codes_a:
1132
- if "error" not in code:
1133
- # Create a simple box for visualization (center of polygon)
1134
- if "polygon" in code:
1135
- pts = np.array(code["polygon"])
1136
- x1, y1 = pts.min(axis=0)
1137
- x2, y2 = pts.max(axis=0)
1138
- box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1139
- bar_a.append(box)
1140
- info_a.append({
1141
- "type": code.get("type", ""),
1142
- "data": code.get("text", ""),
1143
- "left": int(x1),
1144
- "top": int(y1),
1145
- "width": int(x2-x1),
1146
- "height": int(y2-y1),
1147
- "valid": True,
1148
- "page": code.get("page", 0) + 1,
1149
- "source": code.get("source", ""),
1150
- "engine": code.get("engine", "")
1151
- })
1152
-
1153
- for code in codes_b:
1154
- if "error" not in code:
1155
- # Create a simple box for visualization (center of polygon)
1156
- if "polygon" in code:
1157
- pts = np.array(code["polygon"])
1158
- x1, y1 = pts.min(axis=0)
1159
- x2, y2 = pts.max(axis=0)
1160
- box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1161
- bar_b.append(box)
1162
- info_b.append({
1163
- "type": code.get("type", ""),
1164
- "data": code.get("text", ""),
1165
- "left": int(x1),
1166
- "top": int(y1),
1167
- "width": int(x2-x1),
1168
- "height": int(y2-y1),
1169
- "valid": True,
1170
- "page": code.get("page", 0) + 1,
1171
- "source": code.get("source", ""),
1172
- "engine": code.get("engine", "")
1173
- })
1174
-
1175
- # Debug: Print barcode detection results
1176
- print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
1177
- print(f"Raw codes_a: {len(codes_a)} items")
1178
- print(f"Raw codes_b: {len(codes_b)} items")
1179
- if codes_a:
1180
- print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
1181
- if codes_b:
1182
- print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
1183
- except Exception as e:
1184
- print(f"Barcode detection error: {e}")
1185
- bar_a, info_a = [], []
1186
- bar_b, info_b = [], []
1187
- else:
1188
  bar_a, info_a = [], []
1189
  bar_b, info_b = [], []
1190
 
@@ -1419,36 +1427,51 @@ def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tup
1419
  y_offset = 0
1420
  for page_idx in range(num_pages):
1421
  page = doc[page_idx]
 
1422
  # Compute scale so that rendered width matches target_width when provided
1423
  if target_width:
1424
- page_width_pts = float(page.rect.width) # points (72 dpi)
1425
  scale = max(1.0, target_width / page_width_pts)
1426
  else:
1427
- # fallback dpi ~600
1428
- scale = 600.0 / 72.0
1429
  try:
1430
  pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
1431
  except TypeError:
1432
  pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
 
1433
  pil = _pix_to_pil(pix)
1434
  pw, ph = pil.size
 
 
1435
  hits = _decode_variants(pil)
1436
  for r in hits:
1437
  x1 = int(r.get("left", 0))
1438
- y1 = int(r.get("top", 0)) + y_offset
1439
- w = int(r.get("width", 0))
1440
- h = int(r.get("height", 0))
1441
  x2 = x1 + w
1442
  y2 = y1 + h
1443
- b = Box(y1, x1, y2, x2, w * h)
1444
- # Exclude bottom 115mm for combined image if we know full height; else per-page
1445
- if image_size and _is_in_excluded_bottom_area(b, image_size[1]):
1446
- continue
1447
- if not image_size and _is_in_excluded_bottom_area(b, ph):
 
1448
  continue
1449
- boxes.append(b)
1450
- sym, payload = r.get("type", ""), r.get("data", "")
1451
- infos.append({**r, "valid": _validate(sym, payload), "page": page_idx + 1, "source": f"page@scale{scale:.2f}"})
 
 
 
 
 
 
 
 
 
 
1452
  y_offset += ph
1453
  doc.close()
1454
  except Exception:
 
961
  return []
962
 
963
  def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
964
+ """
965
+ Try a few light variants. If we upscale, scale detections back to the original size.
966
+ We avoid rotations here to keep coordinates aligned with the original image.
967
+ """
968
+ variants = []
969
+ w, h = pil.size
970
+
971
+ # base variants @1.0x
972
+ variants.append(("orig", pil, 1.0))
973
+ variants.append(("gray", ImageOps.grayscale(pil).convert("RGB"), 1.0))
974
+ variants.append(("bin", _binarize(pil).convert("RGB"), 1.0))
975
+
976
+ # upsample small pages, then scale back coords
977
+ if max(w, h) < 1600:
978
+ up2 = pil.resize((w*2, h*2), resample=Image.NEAREST)
979
+ variants.append(("up2", up2, 2.0))
980
+ variants.append(("up2_bin", _binarize(up2).convert("RGB"), 2.0))
981
+
982
+ for tag, vimg, sc in variants:
983
+ # Prefer ZXing, then ZBar, then DMTX, then OpenCV-QR
984
+ res = _decode_zxing(vimg) or _decode_zbar(vimg) or _decode_dmtx(vimg) or _decode_cv2_qr(vimg)
985
+ if not res:
986
+ continue
987
+
988
+ # Scale results back to original size when needed
989
+ if sc != 1.0:
990
+ for r in res:
991
+ r["left"] = int(round(r.get("left", 0) / sc))
992
+ r["top"] = int(round(r.get("top", 0) / sc))
993
+ r["width"] = int(round(r.get("width", 0) / sc))
994
+ r["height"] = int(round(r.get("height",0) / sc))
995
+ return res
996
+
997
  return []
998
 
999
  def _pix_to_pil(pix) -> Image.Image:
 
1130
  # Debug: Print spell check results
1131
  print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
1132
 
1133
+ # Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
1134
+ try:
1135
+ codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
1136
+ codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
1137
+
1138
+ # Convert to old format for compatibility
1139
+ bar_a, info_a = [], []
1140
+ bar_b, info_b = [], []
1141
+
1142
+ for code in codes_a:
1143
+ if "error" not in code:
1144
+ # Create a simple box for visualization (center of polygon)
1145
+ if "polygon" in code:
1146
+ pts = np.array(code["polygon"])
1147
+ x1, y1 = pts.min(axis=0)
1148
+ x2, y2 = pts.max(axis=0)
1149
+ box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1150
+ bar_a.append(box)
1151
+ info_a.append({
1152
+ "type": code.get("type", ""),
1153
+ "data": code.get("text", ""),
1154
+ "left": int(x1),
1155
+ "top": int(y1),
1156
+ "width": int(x2-x1),
1157
+ "height": int(y2-y1),
1158
+ "valid": True,
1159
+ "page": code.get("page", 0) + 1,
1160
+ "source": code.get("source", ""),
1161
+ "engine": code.get("engine", "")
1162
+ })
1163
+
1164
+ for code in codes_b:
1165
+ if "error" not in code:
1166
+ # Create a simple box for visualization (center of polygon)
1167
+ if "polygon" in code:
1168
+ pts = np.array(code["polygon"])
1169
+ x1, y1 = pts.min(axis=0)
1170
+ x2, y2 = pts.max(axis=0)
1171
+ box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
1172
+ bar_b.append(box)
1173
+ info_b.append({
1174
+ "type": code.get("type", ""),
1175
+ "data": code.get("text", ""),
1176
+ "left": int(x1),
1177
+ "top": int(y1),
1178
+ "width": int(x2-x1),
1179
+ "height": int(y2-y1),
1180
+ "valid": True,
1181
+ "page": code.get("page", 0) + 1,
1182
+ "source": code.get("source", ""),
1183
+ "engine": code.get("engine", "")
1184
+ })
1185
+
1186
+ # Debug: Print barcode detection results
1187
+ print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
1188
+ print(f"Raw codes_a: {len(codes_a)} items")
1189
+ print(f"Raw codes_b: {len(codes_b)} items")
1190
+ if codes_a:
1191
+ print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
1192
+ if codes_b:
1193
+ print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
1194
+ except Exception as e:
1195
+ print(f"Barcode detection error: {e}")
 
 
 
 
1196
  bar_a, info_a = [], []
1197
  bar_b, info_b = [], []
1198
 
 
1427
  y_offset = 0
1428
  for page_idx in range(num_pages):
1429
  page = doc[page_idx]
1430
+
1431
  # Compute scale so that rendered width matches target_width when provided
1432
  if target_width:
1433
+ page_width_pts = float(page.rect.width) # 72 dpi units
1434
  scale = max(1.0, target_width / page_width_pts)
1435
  else:
1436
+ scale = 600.0 / 72.0 # ~600 dpi default
1437
+
1438
  try:
1439
  pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
1440
  except TypeError:
1441
  pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
1442
+
1443
  pil = _pix_to_pil(pix)
1444
  pw, ph = pil.size
1445
+ effective_dpi = 72.0 * scale # <-- this is the real DPI for this rendered page
1446
+
1447
  hits = _decode_variants(pil)
1448
  for r in hits:
1449
  x1 = int(r.get("left", 0))
1450
+ y1 = int(r.get("top", 0))
1451
+ w = int(r.get("width", 0))
1452
+ h = int(r.get("height", 0))
1453
  x2 = x1 + w
1454
  y2 = y1 + h
1455
+
1456
+ # Per-page box (before stacking)
1457
+ per_page_box = Box(y1, x1, y2, x2, w*h)
1458
+
1459
+ # Exclude the bottom 115mm of THIS PAGE using the correct DPI
1460
+ if _is_in_excluded_bottom_area(per_page_box, ph, excluded_height_mm=115.0, dpi=int(effective_dpi)):
1461
  continue
1462
+
1463
+ # Map to combined image by adding the current page's y-offset
1464
+ combined_box = Box(y1 + y_offset, x1, y2 + y_offset, x2, w*h)
1465
+ boxes.append(combined_box)
1466
+
1467
+ sym, payload = r.get("type",""), r.get("data","")
1468
+ infos.append({
1469
+ **r,
1470
+ "valid": _validate(sym, payload),
1471
+ "page": page_idx + 1,
1472
+ "source": f"page@dpi{int(effective_dpi)}"
1473
+ })
1474
+
1475
  y_offset += ph
1476
  doc.close()
1477
  except Exception:
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
- numpy==1.26.4
 
2
  pillow>=10.3
3
- pymupdf>=1.24.9
4
  opencv-contrib-python-headless==4.10.0.84
5
- zxing-cpp==2.2.0
6
  pdf2image
7
  gradio
8
  pytesseract
 
1
+ zxing-cpp==2.2.0
2
+ pymupdf>=1.24
3
  pillow>=10.3
4
+ numpy==1.26.4
5
  opencv-contrib-python-headless==4.10.0.84
 
6
  pdf2image
7
  gradio
8
  pytesseract