MogensR commited on
Commit
a705dd3
·
1 Parent(s): 76580be

chore: update Dockerfile pins, add .dockerignore, README, and update_pins.py

Browse files
Files changed (2) hide show
  1. Dockerfile.bak +142 -0
  2. update_pins.py +197 -0
Dockerfile.bak ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===============================
2
+ # BackgroundFX Pro — Dockerfile
3
+ # Hugging Face Spaces Pro (GPU)
4
+ # ===============================
5
+
6
+ # CUDA base image (T4-friendly). Build stage has NO GPU access.
7
+ FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
8
+
9
+ # --- Build args (override in Space Settings → Build args) ---
10
+ # Pin external repos for reproducible builds
11
+ ARG SAM2_SHA=__PIN_ME__
12
+ ARG MATANYONE_SHA=__PIN_ME__
13
+
14
+ # Weights to pre-warm (public models only)
15
+ ARG SAM2_MODEL_ID=facebook/sam2
16
+ ARG SAM2_VARIANT=sam2_hiera_large # sam2_hiera_small | sam2_hiera_base | sam2_hiera_large
17
+ ARG MATANY_REPO_ID=PeiqingYang/MatAnyone
18
+ ARG MATANY_FILENAME=matanyone_v1.0.pth
19
+
20
+ # Optional: repo URLs (used for tarball fetch)
21
+ ARG SAM2_REPO=https://github.com/facebookresearch/segment-anything-2
22
+ ARG MATANYONE_REPO=https://github.com/pq-yang/MatAnyone
23
+
24
+ # --- Global env hygiene ---
25
+ ENV DEBIAN_FRONTEND=noninteractive \
26
+ PIP_NO_CACHE_DIR=1 \
27
+ PYTHONDONTWRITEBYTECODE=1 \
28
+ PYTHONUNBUFFERED=1
29
+
30
+ # --- Create non-root user (uid 1000 required by HF) ---
31
+ RUN useradd -m -u 1000 user
32
+ ENV HOME=/home/user
33
+ WORKDIR ${HOME}/app
34
+
35
+ # --- System packages (incl. Python) ---
36
+ # NOTE: the base CUDA image has no Python/pip. Install them here.
37
+ USER root
38
+ RUN apt-get update && apt-get install -y --no-install-recommends \
39
+ python3 python3-pip python3-venv python3-dev python-is-python3 \
40
+ build-essential \
41
+ git git-lfs ca-certificates curl \
42
+ ffmpeg libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1 \
43
+ && git lfs install \
44
+ && rm -rf /var/lib/apt/lists/*
45
+ USER user
46
+ ENV PATH=${HOME}/.local/bin:$PATH
47
+
48
+ # --- Python & CUDA wheels (Torch cu121) ---
49
+ RUN python3 -m pip install --upgrade pip setuptools wheel
50
+ # Use explicit cu121 index to match CUDA 12.x runtime
51
+ RUN python3 -m pip install --index-url https://download.pytorch.org/whl/cu121 \
52
+ torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121
53
+
54
+ # Minimal deps needed BEFORE pre-warm (download weights at build time)
55
+ RUN python3 -m pip install huggingface_hub==0.24.6
56
+
57
+ # --- Fetch external repos (SAM2 & MatAnyone) via tarballs (robust, no git needed) ---
58
+ RUN set -eux; \
59
+ mkdir -p third_party && cd third_party; \
60
+ if [ "${SAM2_SHA}" != "__PIN_ME__" ]; then \
61
+ echo "Fetching SAM2 commit ${SAM2_SHA} as archive"; \
62
+ curl -L "${SAM2_REPO}/archive/${SAM2_SHA}.tar.gz" -o sam2.tar.gz; \
63
+ else \
64
+ echo "Fetching SAM2 default branch as archive"; \
65
+ curl -L "${SAM2_REPO}/archive/refs/heads/main.tar.gz" -o sam2.tar.gz || \
66
+ curl -L "${SAM2_REPO}/archive/refs/heads/master.tar.gz" -o sam2.tar.gz; \
67
+ fi; \
68
+ tar -xzf sam2.tar.gz; rm sam2.tar.gz; \
69
+ mv segment-anything-2-* sam2
70
+
71
+ RUN set -eux; \
72
+ cd third_party; \
73
+ if [ "${MATANYONE_SHA}" != "__PIN_ME__" ]; then \
74
+ echo "Fetching MatAnyone commit ${MATANYONE_SHA} as archive"; \
75
+ curl -L "${MATANYONE_REPO}/archive/${MATANYONE_SHA}.tar.gz" -o matany.tar.gz; \
76
+ else \
77
+ echo "Fetching MatAnyone default branch as archive"; \
78
+ curl -L "${MATANYONE_REPO}/archive/refs/heads/main.tar.gz" -o matany.tar.gz || \
79
+ curl -L "${MATANYONE_REPO}/archive/refs/heads/master.tar.gz" -o matany.tar.gz; \
80
+ fi; \
81
+ tar -xzf matany.tar.gz; rm matany.tar.gz; \
82
+ mv MatAnyone-* matanyone
83
+
84
+ # --- Pre-warm model weights into image cache (public models only) ---
85
+ # NOTE: Build time has no access to private tokens on Spaces for gated models.
86
+ ENV HF_HOME_BUILD=${HOME}/.cache/huggingface
87
+ RUN python3 - <<'PY'
88
+ import os
89
+ from pathlib import Path
90
+ from huggingface_hub import hf_hub_download
91
+
92
+ SAM2_MODEL_ID = os.environ.get("SAM2_MODEL_ID", "facebook/sam2")
93
+ SAM2_VARIANT = os.environ.get("SAM2_VARIANT", "sam2_hiera_large")
94
+ MATANY_REPO_ID = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
95
+ MATANY_FILENAME = os.environ.get("MATANY_FILENAME", "matanyone_v1.0.pth")
96
+
97
+ VARIANT_FILES = {
98
+ "sam2_hiera_small": "sam2_hiera_small.pt",
99
+ "sam2_hiera_base": "sam2_hiera_base.pt",
100
+ "sam2_hiera_large": "sam2_hiera_large.pt",
101
+ }
102
+ ckpt_name = VARIANT_FILES.get(SAM2_VARIANT, VARIANT_FILES["sam2_hiera_large"])
103
+
104
+ cache_dir = os.environ.get("HF_HOME_BUILD", str(Path.home() / ".cache" / "huggingface"))
105
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
106
+
107
+ print(f"[PREWARM] SAM2: repo={SAM2_MODEL_ID}, file={ckpt_name}")
108
+ p1 = hf_hub_download(repo_id=SAM2_MODEL_ID, filename=ckpt_name, local_dir=cache_dir)
109
+ print(f"[PREWARM] -> {p1}")
110
+
111
+ print(f"[PREWARM] MatAnyone: repo={MATANY_REPO_ID}, file={MATANY_FILENAME}")
112
+ p2 = hf_hub_download(repo_id=MATANY_REPO_ID, filename=MATANY_FILENAME, local_dir=cache_dir)
113
+ print(f"[PREWARM] -> {p2}")
114
+
115
+ print("[PREWARM] Done.")
116
+ PY
117
+
118
+ # --- App Python deps ---
119
+ COPY --chown=user requirements.txt ./requirements.txt
120
+ RUN python3 -m pip install -r requirements.txt
121
+
122
+ # --- App code ---
123
+ COPY --chown=user . ${HOME}/app
124
+
125
+ # --- Runtime environment ---
126
+ # Caches in /data persist across Space restarts
127
+ ENV OMP_NUM_THREADS=2 \
128
+ TOKENIZERS_PARALLELISM=false \
129
+ HF_HOME=/data/.cache/huggingface \
130
+ TORCH_HOME=/data/.cache/torch \
131
+ MPLCONFIGDIR=/data/.cache/matplotlib \
132
+ PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
133
+ PYTHONPATH="$PYTHONPATH:${HOME}/app/third_party/sam2:${HOME}/app/third_party/matanyone" \
134
+ PORT=7860 \
135
+ SAM2_MODEL_ID=${SAM2_MODEL_ID} \
136
+ SAM2_VARIANT=${SAM2_VARIANT} \
137
+ MATANY_REPO_ID=${MATANY_REPO_ID} \
138
+ MATANY_FILENAME=${MATANY_FILENAME}
139
+
140
+ # --- Networking / Entrypoint ---
141
+ EXPOSE 7860
142
+ CMD ["python3", "app.py"]
update_pins.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ update_pins.py
4
+ - Fetch newest SHAs (release tag or default branch) for SAM2 + MatAnyone
5
+ - Update ARG lines in Dockerfile: SAM2_SHA / MATANYONE_SHA
6
+ - Supports dry-run and manual pins
7
+ - Uses GitHub API; set GITHUB_TOKEN to avoid rate limits (optional)
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import sys
13
+ import json
14
+ import argparse
15
+ from urllib.parse import urlparse
16
+ import requests
17
+ from datetime import datetime, timezone
18
+ from shutil import copyfile
19
+
20
+ DOCKERFILE_PATH = "Dockerfile"
21
+
22
+ # Default repos (must match your Dockerfile ARGs)
23
+ SAM2_REPO_URL = "https://github.com/facebookresearch/segment-anything-2"
24
+ MATANY_REPO_URL = "https://github.com/pq-yang/MatAnyone"
25
+
26
+ SESSION = requests.Session()
27
+ if os.getenv("GITHUB_TOKEN"):
28
+ SESSION.headers.update({"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}"})
29
+ SESSION.headers.update({
30
+ "Accept": "application/vnd.github+json",
31
+ "User-Agent": "update-pins-script"
32
+ })
33
+
34
+ def gh_owner_repo(repo_url: str):
35
+ p = urlparse(repo_url)
36
+ parts = p.path.strip("/").split("/")
37
+ if len(parts) < 2:
38
+ raise ValueError(f"Invalid repo URL: {repo_url}")
39
+ return parts[0], parts[1]
40
+
41
+ def gh_api(path: str):
42
+ url = f"https://api.github.com{path}"
43
+ r = SESSION.get(url, timeout=30)
44
+ if r.status_code >= 400:
45
+ raise RuntimeError(f"GitHub API error {r.status_code}: {r.text}")
46
+ return r.json()
47
+
48
+ def get_latest_release_sha(repo_url: str) -> tuple[str, str]:
49
+ """Return (ref_desc, commit_sha) using latest release tag."""
50
+ owner, repo = gh_owner_repo(repo_url)
51
+ try:
52
+ rel = gh_api(f"/repos/{owner}/{repo}/releases/latest")
53
+ tag = rel["tag_name"]
54
+ # Resolve tag to commit
55
+ ref = gh_api(f"/repos/{owner}/{repo}/git/ref/tags/{tag}")
56
+ obj = ref["object"]
57
+ if obj["type"] == "tag":
58
+ tag_obj = gh_api(f"/repos/{owner}/{repo}/git/tags/{obj['sha']}")
59
+ sha = tag_obj["object"]["sha"]
60
+ else:
61
+ sha = obj["sha"]
62
+ return (f"release:{tag}", sha)
63
+ except Exception as e:
64
+ raise RuntimeError(f"Could not get latest release for {repo}: {e}")
65
+
66
+ def get_latest_default_branch_sha(repo_url: str) -> tuple[str, str]:
67
+ """Return (ref_desc, commit_sha) using the default branch head."""
68
+ owner, repo = gh_owner_repo(repo_url)
69
+ info = gh_api(f"/repos/{owner}/{repo}")
70
+ default_branch = info["default_branch"]
71
+ branch = gh_api(f"/repos/{owner}/{repo}/branches/{default_branch}")
72
+ sha = branch["commit"]["sha"]
73
+ return (f"branch:{default_branch}", sha)
74
+
75
+ def get_sha_for_ref(repo_url: str, ref: str) -> tuple[str, str]:
76
+ """
77
+ Resolve any Git ref (branch name, tag name, or commit SHA) to a commit SHA.
78
+ """
79
+ owner, repo = gh_owner_repo(repo_url)
80
+ # If it's already a full SHA, just return it
81
+ if re.fullmatch(r"[0-9a-f]{40}", ref):
82
+ return (f"commit:{ref[:7]}", ref)
83
+ # Try branches/<ref>, then tags/<ref>, then commits/<ref>
84
+ for kind, path in [
85
+ ("branch", f"/repos/{owner}/{repo}/branches/{ref}"),
86
+ ("tag", f"/repos/{owner}/{repo}/git/ref/tags/{ref}"),
87
+ ("commit", f"/repos/{owner}/{repo}/commits/{ref}")
88
+ ]:
89
+ try:
90
+ data = gh_api(path)
91
+ if kind == "branch":
92
+ return (f"branch:{ref}", data["commit"]["sha"])
93
+ if kind == "tag":
94
+ obj = data["object"]
95
+ if obj["type"] == "tag":
96
+ tag_obj = gh_api(f"/repos/{owner}/{repo}/git/tags/{obj['sha']}")
97
+ return (f"tag:{ref}", tag_obj["object"]["sha"])
98
+ else:
99
+ return (f"tag:{ref}", obj["sha"])
100
+ if kind == "commit":
101
+ return (f"commit:{ref[:7]}", data["sha"])
102
+ except Exception:
103
+ continue
104
+ raise RuntimeError(f"Could not resolve ref '{ref}' for {repo}")
105
+
106
+ def update_dockerfile_arg(dockerfile_text: str, arg_name: str, new_value: str) -> str:
107
+ """
108
+ Replace a line like:
109
+ ARG SAM2_SHA=...
110
+ with:
111
+ ARG SAM2_SHA=<new_value>
112
+ """
113
+ pattern = rf"^(ARG\s+{re.escape(arg_name)}=).*$"
114
+
115
+ # Use a callable replacement to avoid backreference ambiguity (e.g., \12)
116
+ def repl(m: re.Match) -> str:
117
+ return m.group(1) + new_value
118
+
119
+ new_text, n = re.subn(pattern, repl, dockerfile_text, flags=re.MULTILINE)
120
+ if n == 0:
121
+ raise RuntimeError(f"ARG {arg_name}=… line not found in Dockerfile.")
122
+ return new_text
123
+
124
+ def main():
125
+ ap = argparse.ArgumentParser(description="Update pinned SHAs in Dockerfile.")
126
+ ap.add_argument("--mode", choices=["release", "default-branch"], default="release",
127
+ help="Where to pull pins from (latest GitHub release tag or default branch head).")
128
+ ap.add_argument("--sam2-ref", help="Explicit ref for SAM2 (tag/branch/sha). Overrides --mode.")
129
+ ap.add_argument("--matany-ref", help="Explicit ref for MatAnyone (tag/branch/sha). Overrides --mode.")
130
+ ap.add_argument("--dockerfile", default=DOCKERFILE_PATH, help="Path to Dockerfile.")
131
+ ap.add_argument("--dry-run", action="store_true", help="Show changes but do not write file.")
132
+ ap.add_argument("--json", action="store_true", help="Print resulting pins as JSON.")
133
+ ap.add_argument("--no-backup", action="store_true", help="Do not create a Dockerfile.bak backup.")
134
+ args = ap.parse_args()
135
+
136
+ # Resolve SHAs
137
+ if args.sam2_ref:
138
+ sam2_refdesc, sam2_sha = get_sha_for_ref(SAM2_REPO_URL, args.sam2_ref)
139
+ else:
140
+ sam2_refdesc, sam2_sha = (
141
+ get_latest_release_sha(SAM2_REPO_URL) if args.mode == "release"
142
+ else get_latest_default_branch_sha(SAM2_REPO_URL)
143
+ )
144
+
145
+ if args.matany_ref:
146
+ mat_refdesc, mat_sha = get_sha_for_ref(MATANY_REPO_URL, args.matany_ref)
147
+ else:
148
+ mat_refdesc, mat_sha = (
149
+ get_latest_release_sha(MATANY_REPO_URL) if args.mode == "release"
150
+ else get_latest_default_branch_sha(MATANY_REPO_URL)
151
+ )
152
+
153
+ result = {
154
+ "timestamp": datetime.now(timezone.utc).isoformat(),
155
+ "mode": args.mode,
156
+ "SAM2": {"repo": SAM2_REPO_URL, "ref": sam2_refdesc, "sha": sam2_sha},
157
+ "MatAnyone": {"repo": MATANY_REPO_URL, "ref": mat_refdesc, "sha": mat_sha},
158
+ }
159
+
160
+ # Show pins
161
+ if args.json:
162
+ print(json.dumps(result, indent=2))
163
+ else:
164
+ print(f"[Pins] SAM2 -> {sam2_refdesc} -> {sam2_sha}")
165
+ print(f"[Pins] MatAnyone -> {mat_refdesc} -> {mat_sha}")
166
+
167
+ # Read Dockerfile
168
+ if not os.path.isfile(args.dockerfile):
169
+ raise FileNotFoundError(f"Dockerfile not found at: {args.dockerfile}")
170
+ with open(args.dockerfile, "r", encoding="utf-8") as f:
171
+ text = f.read()
172
+
173
+ # Update lines
174
+ text = update_dockerfile_arg(text, "SAM2_SHA", sam2_sha)
175
+ text = update_dockerfile_arg(text, "MATANYONE_SHA", mat_sha)
176
+
177
+ if args.dry_run:
178
+ print("\n--- Dockerfile (preview) ---\n")
179
+ print(text)
180
+ return
181
+
182
+ # Backup
183
+ if not args.no_backup:
184
+ copyfile(args.dockerfile, args.dockerfile + ".bak")
185
+
186
+ # Write
187
+ with open(args.dockerfile, "w", encoding="utf-8") as f:
188
+ f.write(text)
189
+
190
+ print(f"\n✅ Updated {args.dockerfile} with new pins.")
191
+
192
+ if __name__ == "__main__":
193
+ try:
194
+ main()
195
+ except Exception as e:
196
+ print(f"\n❌ Error: {e}", file=sys.stderr)
197
+ sys.exit(1)