Spaces:

Digitaljoint
/

ProofCheck

Sleeping

ProofCheck / barcode_reader.py

Yaz Hobooti

Upgrade barcode reader to v2: multi-scale, multi-rotate, tiling, and tight rotated boxes for better recall

80fae67 3 months ago

13.4 kB

	# barcode_reader.py — v2 (recall+tight boxes)
	from __future__ import annotations
	import io, os
	from typing import Any, Dict, List, Tuple, Optional

	import numpy as np
	from PIL import Image
	import cv2

	# ---------- Engines ----------
	_HAS_ZXING = False
	try:
	import zxingcpp # pip install zxing-cpp
	_HAS_ZXING = True
	except Exception:
	zxingcpp = None
	_HAS_ZXING = False

	_HAS_OCV_BARCODE = hasattr(cv2, "barcode") and hasattr(getattr(cv2, "barcode"), "BarcodeDetector")

	# ---------- PDF (PyMuPDF) ----------
	try:
	import fitz # PyMuPDF
	_HAS_PYMUPDF = True
	except Exception:
	fitz = None
	_HAS_PYMUPDF = False


	# =========================
	# Utils
	# =========================

	def _to_bgr(img: Image.Image) -> np.ndarray:
	arr = np.array(img.convert("RGB"))
	return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)

	def _as_gray(arr_bgr: np.ndarray) -> np.ndarray:
	return cv2.cvtColor(arr_bgr, cv2.COLOR_BGR2GRAY)

	def _unrotate_points(pts: np.ndarray, rot: int, orig_w: int, orig_h: int) -> np.ndarray:
	"""Map points from a np.rot90(rot) view back to the original image coordinate frame."""
	p = pts.copy()
	if rot == 0:
	return p
	elif rot == 1: # 90° CCW
	x = orig_w - p[:, 1]
	y = p[:, 0]
	return np.stack([x, y], axis=1)
	elif rot == 2: # 180°
	x = orig_w - p[:, 0]
	y = orig_h - p[:, 1]
	return np.stack([x, y], axis=1)
	elif rot == 3: # 270° CCW
	x = p[:, 1]
	y = orig_h - p[:, 0]
	return np.stack([x, y], axis=1)
	return p

	def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
	"""
	Normalize into 4 points. If fewer, approximate with minAreaRect on bbox.
	"""
	try:
	p = np.array(pts, dtype=np.float32).reshape(-1, 2)
	if p.shape[0] >= 4:
	p = p[:4]
	else:
	# fallback to bbox
	x1, y1 = p.min(axis=0)
	x2, y2 = p.max(axis=0)
	box = np.array([[x1, y1],[x2, y1],[x2, y2],[x1, y2]], dtype=np.float32)
	p = box
	except Exception:
	p = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype=np.float32)
	return p.astype(float).tolist()

	def _tight_rotated_box(poly: List[List[float]]) -> np.ndarray:
	"""
	Return a 4x2 polygon representing the tight rotated rectangle around input polygon.
	"""
	pts = np.array(poly, dtype=np.float32)
	rect = cv2.minAreaRect(pts) # (center, (w,h), angle)
	box = cv2.boxPoints(rect) # 4x2
	return box.astype(np.float32)

	def _dedupe(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
	"""
	Deduplicate by (text, type) + polygon IoU.
	"""
	keep: List[Dict[str, Any]] = []

	def iou(a, b):
	ax = np.array(a["polygon"], dtype=np.float32)
	bx = np.array(b["polygon"], dtype=np.float32)
	a_min, a_max = ax.min(axis=0), ax.max(axis=0)
	b_min, b_max = bx.min(axis=0), bx.max(axis=0)
	inter_min = np.maximum(a_min, b_min)
	inter_max = np.minimum(a_max, b_max)
	wh = np.maximum(inter_max - inter_min, 0)
	inter = wh[0] * wh[1]
	a_area = (a_max - a_min).prod()
	b_area = (b_max - b_min).prod()
	union = max(a_area + b_area - inter, 1e-6)
	return float(inter / union)

	for r in results:
	dup = False
	for k in keep:
	if r["text"] == k["text"] and r["type"] == k["type"] and iou(r, k) > 0.7:
	dup = True
	break
	if not dup:
	keep.append(r)
	return keep

	def _zxing_hints():
	if not _HAS_ZXING:
	return None
	hints = zxingcpp.DecodeHints()
	hints.try_harder = True # improves 1D at small sizes
	hints.try_rotate = False # we rotate ourselves explicitly
	# Optionally restrict formats if you know your set, e.g. Code128\|EAN_13\|QR_CODE
	# hints.formats = zxingcpp.BarcodeFormat.Any (default)
	return hints


	# =========================
	# Candidate generation (recall booster)
	# =========================

	def _candidate_views(bgr: np.ndarray) -> List[Tuple[np.ndarray, float, int, Tuple[int,int]]]:
	"""
	Yield variants: (image, scale, rot, orig_wh).
	Rot ∈ {0,1,2,3} represents np.rot90(k=rot) CCW rotations.
	Scale ∈ {1.0, 1.5, 2.0} (skip big upscales for large inputs).
	"""
	H, W = bgr.shape[:2]
	scales = [1.0, 1.5]
	if max(H, W) < 1400:
	scales.append(2.0)

	out: List[Tuple[np.ndarray, float, int, Tuple[int,int]]] = []
	for rot in (0, 1, 2, 3):
	img_rot = np.ascontiguousarray(np.rot90(bgr, k=rot)) if rot else bgr
	for s in scales:
	if s != 1.0:
	img_s = cv2.resize(img_rot, (0,0), fx=s, fy=s, interpolation=cv2.INTER_CUBIC)
	else:
	img_s = img_rot

	# Three light preprocess variants to help different symbologies:
	# - raw
	# - mild sharpen
	# - CLAHE on gray
	out.append((img_s, s, rot, (W, H)))

	k = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]], dtype=np.float32)
	sharp = cv2.filter2D(img_s, -1, k)
	out.append((sharp, s, rot, (W, H)))

	g = _as_gray(img_s)
	clahe = cv2.createCLAHE(2.5, (8,8)).apply(g)
	clahe_rgb = cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR)
	out.append((clahe_rgb, s, rot, (W, H)))
	return out


	def _tile_views(bgr: np.ndarray, grid: int = 2) -> List[Tuple[np.ndarray, Tuple[int,int]]]:
	"""
	Optional small-ROI tiles (helps tiny/many codes): returns list of (tile_bgr, (x0,y0))
	"""
	H, W = bgr.shape[:2]
	tiles: List[Tuple[np.ndarray, Tuple[int,int]]] = []
	step_x = W // grid
	step_y = H // grid
	overlap_x = step_x // 6
	overlap_y = step_y // 6

	for iy in range(grid):
	for ix in range(grid):
	x0 = max(ix * step_x - overlap_x, 0)
	y0 = max(iy * step_y - overlap_y, 0)
	x1 = min((ix + 1) * step_x + overlap_x, W)
	y1 = min((iy + 1) * step_y + overlap_y, H)
	tiles.append((bgr[y0:y1, x0:x1], (x0, y0)))
	return tiles


	# =========================
	# Decoders
	# =========================

	def _decode_zxing_with_views(bgr: np.ndarray) -> List[Dict[str, Any]]:
	if not _HAS_ZXING:
	return []
	hints = _zxing_hints()
	agg: List[Dict[str, Any]] = []

	for img, scale, rot, (W, H) in _candidate_views(bgr):
	try:
	res = zxingcpp.read_barcodes(img, hints=hints)
	except Exception:
	continue
	for r in res or []:
	if not (r and getattr(r, "text", None)):
	continue
	try:
	fmt = getattr(r.format, "name", str(r.format))
	except Exception:
	fmt = str(r.format)
	# Collect points (ZXing gives a quadrilateral for most types)
	pts = []
	try:
	pos = r.position
	pts = np.array([[float(pt.x), float(pt.y)] for pt in pos], dtype=np.float32)
	except Exception:
	pts = np.empty((0,2), dtype=np.float32)

	# Map back to original frame
	if pts.size:
	pts = pts / float(scale)
	pts = _unrotate_points(pts, rot, W, H)
	poly = _norm_polygon(pts, W, H)
	else:
	poly = _norm_polygon([], W, H)

	agg.append({
	"engine": "zxingcpp",
	"type": fmt,
	"text": r.text or "",
	"polygon": poly,
	})

	if agg: # good enough
	break
	return agg


	def _decode_opencv_with_views(bgr: np.ndarray) -> List[Dict[str, Any]]:
	if not _HAS_OCV_BARCODE:
	return []
	det = cv2.barcode.BarcodeDetector()
	agg: List[Dict[str, Any]] = []

	for img, scale, rot, (W, H) in _candidate_views(bgr):
	gray = _as_gray(img)
	ok, infos, types, corners = det.detectAndDecode(gray)
	if not ok:
	continue
	for txt, typ, pts in zip(infos, types, corners):
	if not txt:
	continue
	pts = np.array(pts, dtype=np.float32).reshape(-1,2)
	# map back to original frame
	pts = pts / float(scale)
	pts = _unrotate_points(pts, rot, W, H)
	poly = _norm_polygon(pts, W, H)
	agg.append({
	"engine": "opencv_barcode",
	"type": typ,
	"text": txt,
	"polygon": poly,
	})
	if agg:
	break
	return agg


	def _decode_any(bgr: np.ndarray) -> List[Dict[str, Any]]:
	res = _decode_zxing_with_views(bgr)
	if res:
	return res
	res = _decode_opencv_with_views(bgr)
	if res:
	return res

	# Last-ditch: small tiles for tiny/overlapping codes
	hits: List[Dict[str, Any]] = []
	for tile, (x0, y0) in _tile_views(bgr, grid=2):
	sub = _decode_zxing_with_views(tile) or _decode_opencv_with_views(tile)
	for h in sub:
	poly = np.array(h["polygon"], dtype=np.float32)
	poly[:, 0] += x0
	poly[:, 1] += y0
	h["polygon"] = poly.tolist()
	hits.append(h)
	return hits


	# =========================
	# Image & PDF readers
	# =========================

	def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) -> List[Tuple[int, np.ndarray]]:
	if not _HAS_PYMUPDF:
	return []
	out: List[Tuple[int, np.ndarray]] = []
	doc = fitz.open(path)
	pages = range(len(doc)) if page_index is None else [page_index]
	for pno in pages:
	page = doc[pno]
	for info in page.get_images(full=True):
	xref = info[0]
	pix = fitz.Pixmap(doc, xref)
	if pix.n >= 4:
	pix = fitz.Pixmap(fitz.csRGB, pix)
	pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
	out.append((pno, _to_bgr(pil)))
	doc.close()
	return out

	def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
	if not _HAS_PYMUPDF:
	raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
	doc = fitz.open(path)
	if page >= len(doc):
	n = len(doc); doc.close()
	raise ValueError(f"Page {page} out of range; PDF has {n} pages.")
	pg = doc[page]
	scale = dpi / 72.0
	mat = fitz.Matrix(scale, scale)
	pix = pg.get_pixmap(matrix=mat, alpha=False)
	pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
	doc.close()
	return _to_bgr(pil)

	def _decode_image_path(path: str) -> List[Dict[str, Any]]:
	pil = Image.open(path).convert("RGB")
	bgr = _to_bgr(pil)
	hits = _decode_any(bgr)
	for h in hits:
	h.update({"source": "image", "page": 0})
	return _dedupe(hits)

	def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
	results: List[Dict[str, Any]] = []
	for pno, img_bgr in _pdf_extract_xobject_images(path):
	hits = _decode_any(img_bgr)
	for h in hits:
	h.update({"source": "pdf_xobject_image", "page": pno})
	results.extend(hits)
	if results:
	return _dedupe(results)

	if not _HAS_PYMUPDF:
	return []
	doc = fitz.open(path)
	n = min(len(doc), max_pages)
	doc.close()
	for dpi in raster_dpis:
	for pno in range(n):
	img_bgr = _pdf_render_page(path, pno, dpi=dpi)
	hits = _decode_any(img_bgr)
	for h in hits:
	h.update({"source": f"pdf_raster_{dpi}dpi", "page": pno})
	results.extend(hits)
	if results:
	break
	return _dedupe(results)


	# =========================
	# Public API
	# =========================

	def read_barcodes_from_path(path: str,
	max_pages: int = 8,
	raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
	ext = os.path.splitext(path.lower())[1]
	if ext == ".pdf":
	return _decode_pdf_path(path, max_pages=max_pages, raster_dpis=raster_dpis)
	else:
	return _decode_image_path(path)


	# =========================
	# Drawing helpers (tight boxes)
	# =========================

	def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]], color=(0,255,0), thickness: int = 2) -> np.ndarray:
	"""
	Draw a tight, rotated rectangle around each code (green by default),
	plus a small label (TYPE:TEXT) near the first corner.
	"""
	out = bgr.copy()
	for d in detections:
	poly = np.array(d["polygon"], dtype=np.float32).reshape(-1, 2)
	box = _tight_rotated_box(poly) if poly.shape[0] >= 2 else poly
	box_i = box.astype(np.int32).reshape(-1, 1, 2)
	cv2.polylines(out, [box_i], True, color, thickness, cv2.LINE_AA)
	x, y = int(box[0,0]), int(box[0,1])
	label = f'{d.get("type","")}: {d.get("text","")}'
	cv2.putText(out, label[:48], (x, max(14, y-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,50,255), 1, cv2.LINE_AA)
	return out

	def render_preview_bgr(path: str, page: int = 0, dpi: int = 220) -> np.ndarray:
	ext = os.path.splitext(path.lower())[1]
	if ext == ".pdf":
	return _pdf_render_page(path, page=page, dpi=dpi)
	pil = Image.open(path).convert("RGB")
	return _to_bgr(pil)