Spaces:
Sleeping
Sleeping
Yaz Hobooti
commited on
Commit
·
87425a1
1
Parent(s):
ad64d27
Fix barcode detection: remove HAS_BARCODE gate, fix DPI filtering, fix coordinate mapping, prefer ZXing-CPP
Browse files- app.py +125 -102
- requirements.txt +3 -3
app.py
CHANGED
|
@@ -961,27 +961,39 @@ def _decode_cv2_qr(pil: Image.Image) -> List[Dict[str,Any]]:
|
|
| 961 |
return []
|
| 962 |
|
| 963 |
def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
| 970 |
-
|
| 971 |
-
|
| 972 |
-
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 985 |
return []
|
| 986 |
|
| 987 |
def _pix_to_pil(pix) -> Image.Image:
|
|
@@ -1118,73 +1130,69 @@ def compare_pdfs(file_a, file_b):
|
|
| 1118 |
# Debug: Print spell check results
|
| 1119 |
print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
|
| 1120 |
|
| 1121 |
-
if
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
if codes_a
|
| 1180 |
-
|
| 1181 |
-
if codes_b
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
print(f"Barcode detection error: {e}")
|
| 1185 |
-
bar_a, info_a = [], []
|
| 1186 |
-
bar_b, info_b = [], []
|
| 1187 |
-
else:
|
| 1188 |
bar_a, info_a = [], []
|
| 1189 |
bar_b, info_b = [], []
|
| 1190 |
|
|
@@ -1419,36 +1427,51 @@ def find_barcode_boxes_and_info_from_pdf(pdf_path: str, image_size: Optional[Tup
|
|
| 1419 |
y_offset = 0
|
| 1420 |
for page_idx in range(num_pages):
|
| 1421 |
page = doc[page_idx]
|
|
|
|
| 1422 |
# Compute scale so that rendered width matches target_width when provided
|
| 1423 |
if target_width:
|
| 1424 |
-
page_width_pts = float(page.rect.width) #
|
| 1425 |
scale = max(1.0, target_width / page_width_pts)
|
| 1426 |
else:
|
| 1427 |
-
#
|
| 1428 |
-
|
| 1429 |
try:
|
| 1430 |
pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
|
| 1431 |
except TypeError:
|
| 1432 |
pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
|
|
|
|
| 1433 |
pil = _pix_to_pil(pix)
|
| 1434 |
pw, ph = pil.size
|
|
|
|
|
|
|
| 1435 |
hits = _decode_variants(pil)
|
| 1436 |
for r in hits:
|
| 1437 |
x1 = int(r.get("left", 0))
|
| 1438 |
-
y1 = int(r.get("top", 0))
|
| 1439 |
-
w
|
| 1440 |
-
h
|
| 1441 |
x2 = x1 + w
|
| 1442 |
y2 = y1 + h
|
| 1443 |
-
|
| 1444 |
-
#
|
| 1445 |
-
|
| 1446 |
-
|
| 1447 |
-
|
|
|
|
| 1448 |
continue
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1452 |
y_offset += ph
|
| 1453 |
doc.close()
|
| 1454 |
except Exception:
|
|
|
|
| 961 |
return []
|
| 962 |
|
| 963 |
def _decode_variants(pil: Image.Image) -> List[Dict[str,Any]]:
|
| 964 |
+
"""
|
| 965 |
+
Try a few light variants. If we upscale, scale detections back to the original size.
|
| 966 |
+
We avoid rotations here to keep coordinates aligned with the original image.
|
| 967 |
+
"""
|
| 968 |
+
variants = []
|
| 969 |
+
w, h = pil.size
|
| 970 |
+
|
| 971 |
+
# base variants @1.0x
|
| 972 |
+
variants.append(("orig", pil, 1.0))
|
| 973 |
+
variants.append(("gray", ImageOps.grayscale(pil).convert("RGB"), 1.0))
|
| 974 |
+
variants.append(("bin", _binarize(pil).convert("RGB"), 1.0))
|
| 975 |
+
|
| 976 |
+
# upsample small pages, then scale back coords
|
| 977 |
+
if max(w, h) < 1600:
|
| 978 |
+
up2 = pil.resize((w*2, h*2), resample=Image.NEAREST)
|
| 979 |
+
variants.append(("up2", up2, 2.0))
|
| 980 |
+
variants.append(("up2_bin", _binarize(up2).convert("RGB"), 2.0))
|
| 981 |
+
|
| 982 |
+
for tag, vimg, sc in variants:
|
| 983 |
+
# Prefer ZXing, then ZBar, then DMTX, then OpenCV-QR
|
| 984 |
+
res = _decode_zxing(vimg) or _decode_zbar(vimg) or _decode_dmtx(vimg) or _decode_cv2_qr(vimg)
|
| 985 |
+
if not res:
|
| 986 |
+
continue
|
| 987 |
+
|
| 988 |
+
# Scale results back to original size when needed
|
| 989 |
+
if sc != 1.0:
|
| 990 |
+
for r in res:
|
| 991 |
+
r["left"] = int(round(r.get("left", 0) / sc))
|
| 992 |
+
r["top"] = int(round(r.get("top", 0) / sc))
|
| 993 |
+
r["width"] = int(round(r.get("width", 0) / sc))
|
| 994 |
+
r["height"] = int(round(r.get("height",0) / sc))
|
| 995 |
+
return res
|
| 996 |
+
|
| 997 |
return []
|
| 998 |
|
| 999 |
def _pix_to_pil(pix) -> Image.Image:
|
|
|
|
| 1130 |
# Debug: Print spell check results
|
| 1131 |
print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
|
| 1132 |
|
| 1133 |
+
# Always attempt barcode scan. The PDF path uses ZXing-CPP / pyzbar / dmtx / cv2 if available.
|
| 1134 |
+
try:
|
| 1135 |
+
codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
|
| 1136 |
+
codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
|
| 1137 |
+
|
| 1138 |
+
# Convert to old format for compatibility
|
| 1139 |
+
bar_a, info_a = [], []
|
| 1140 |
+
bar_b, info_b = [], []
|
| 1141 |
+
|
| 1142 |
+
for code in codes_a:
|
| 1143 |
+
if "error" not in code:
|
| 1144 |
+
# Create a simple box for visualization (center of polygon)
|
| 1145 |
+
if "polygon" in code:
|
| 1146 |
+
pts = np.array(code["polygon"])
|
| 1147 |
+
x1, y1 = pts.min(axis=0)
|
| 1148 |
+
x2, y2 = pts.max(axis=0)
|
| 1149 |
+
box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
|
| 1150 |
+
bar_a.append(box)
|
| 1151 |
+
info_a.append({
|
| 1152 |
+
"type": code.get("type", ""),
|
| 1153 |
+
"data": code.get("text", ""),
|
| 1154 |
+
"left": int(x1),
|
| 1155 |
+
"top": int(y1),
|
| 1156 |
+
"width": int(x2-x1),
|
| 1157 |
+
"height": int(y2-y1),
|
| 1158 |
+
"valid": True,
|
| 1159 |
+
"page": code.get("page", 0) + 1,
|
| 1160 |
+
"source": code.get("source", ""),
|
| 1161 |
+
"engine": code.get("engine", "")
|
| 1162 |
+
})
|
| 1163 |
+
|
| 1164 |
+
for code in codes_b:
|
| 1165 |
+
if "error" not in code:
|
| 1166 |
+
# Create a simple box for visualization (center of polygon)
|
| 1167 |
+
if "polygon" in code:
|
| 1168 |
+
pts = np.array(code["polygon"])
|
| 1169 |
+
x1, y1 = pts.min(axis=0)
|
| 1170 |
+
x2, y2 = pts.max(axis=0)
|
| 1171 |
+
box = Box(y1=int(y1), x1=int(x1), y2=int(y2), x2=int(x2), area=int((x2-x1)*(y2-y1)))
|
| 1172 |
+
bar_b.append(box)
|
| 1173 |
+
info_b.append({
|
| 1174 |
+
"type": code.get("type", ""),
|
| 1175 |
+
"data": code.get("text", ""),
|
| 1176 |
+
"left": int(x1),
|
| 1177 |
+
"top": int(y1),
|
| 1178 |
+
"width": int(x2-x1),
|
| 1179 |
+
"height": int(y2-y1),
|
| 1180 |
+
"valid": True,
|
| 1181 |
+
"page": code.get("page", 0) + 1,
|
| 1182 |
+
"source": code.get("source", ""),
|
| 1183 |
+
"engine": code.get("engine", "")
|
| 1184 |
+
})
|
| 1185 |
+
|
| 1186 |
+
# Debug: Print barcode detection results
|
| 1187 |
+
print(f"Barcode detection results - A: {len(bar_a)} codes, B: {len(bar_b)} codes")
|
| 1188 |
+
print(f"Raw codes_a: {len(codes_a)} items")
|
| 1189 |
+
print(f"Raw codes_b: {len(codes_b)} items")
|
| 1190 |
+
if codes_a:
|
| 1191 |
+
print(f"Sample code_a: {codes_a[0] if codes_a else 'None'}")
|
| 1192 |
+
if codes_b:
|
| 1193 |
+
print(f"Sample code_b: {codes_b[0] if codes_b else 'None'}")
|
| 1194 |
+
except Exception as e:
|
| 1195 |
+
print(f"Barcode detection error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
bar_a, info_a = [], []
|
| 1197 |
bar_b, info_b = [], []
|
| 1198 |
|
|
|
|
| 1427 |
y_offset = 0
|
| 1428 |
for page_idx in range(num_pages):
|
| 1429 |
page = doc[page_idx]
|
| 1430 |
+
|
| 1431 |
# Compute scale so that rendered width matches target_width when provided
|
| 1432 |
if target_width:
|
| 1433 |
+
page_width_pts = float(page.rect.width) # 72 dpi units
|
| 1434 |
scale = max(1.0, target_width / page_width_pts)
|
| 1435 |
else:
|
| 1436 |
+
scale = 600.0 / 72.0 # ~600 dpi default
|
| 1437 |
+
|
| 1438 |
try:
|
| 1439 |
pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), colorspace=fitz.csGRAY, alpha=False)
|
| 1440 |
except TypeError:
|
| 1441 |
pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
|
| 1442 |
+
|
| 1443 |
pil = _pix_to_pil(pix)
|
| 1444 |
pw, ph = pil.size
|
| 1445 |
+
effective_dpi = 72.0 * scale # <-- this is the real DPI for this rendered page
|
| 1446 |
+
|
| 1447 |
hits = _decode_variants(pil)
|
| 1448 |
for r in hits:
|
| 1449 |
x1 = int(r.get("left", 0))
|
| 1450 |
+
y1 = int(r.get("top", 0))
|
| 1451 |
+
w = int(r.get("width", 0))
|
| 1452 |
+
h = int(r.get("height", 0))
|
| 1453 |
x2 = x1 + w
|
| 1454 |
y2 = y1 + h
|
| 1455 |
+
|
| 1456 |
+
# Per-page box (before stacking)
|
| 1457 |
+
per_page_box = Box(y1, x1, y2, x2, w*h)
|
| 1458 |
+
|
| 1459 |
+
# Exclude the bottom 115mm of THIS PAGE using the correct DPI
|
| 1460 |
+
if _is_in_excluded_bottom_area(per_page_box, ph, excluded_height_mm=115.0, dpi=int(effective_dpi)):
|
| 1461 |
continue
|
| 1462 |
+
|
| 1463 |
+
# Map to combined image by adding the current page's y-offset
|
| 1464 |
+
combined_box = Box(y1 + y_offset, x1, y2 + y_offset, x2, w*h)
|
| 1465 |
+
boxes.append(combined_box)
|
| 1466 |
+
|
| 1467 |
+
sym, payload = r.get("type",""), r.get("data","")
|
| 1468 |
+
infos.append({
|
| 1469 |
+
**r,
|
| 1470 |
+
"valid": _validate(sym, payload),
|
| 1471 |
+
"page": page_idx + 1,
|
| 1472 |
+
"source": f"page@dpi{int(effective_dpi)}"
|
| 1473 |
+
})
|
| 1474 |
+
|
| 1475 |
y_offset += ph
|
| 1476 |
doc.close()
|
| 1477 |
except Exception:
|
requirements.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
pillow>=10.3
|
| 3 |
-
|
| 4 |
opencv-contrib-python-headless==4.10.0.84
|
| 5 |
-
zxing-cpp==2.2.0
|
| 6 |
pdf2image
|
| 7 |
gradio
|
| 8 |
pytesseract
|
|
|
|
| 1 |
+
zxing-cpp==2.2.0
|
| 2 |
+
pymupdf>=1.24
|
| 3 |
pillow>=10.3
|
| 4 |
+
numpy==1.26.4
|
| 5 |
opencv-contrib-python-headless==4.10.0.84
|
|
|
|
| 6 |
pdf2image
|
| 7 |
gradio
|
| 8 |
pytesseract
|