Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running

Luigi commited on Oct 9

Commit

de64679

verified ·

1 Parent(s): 4418827

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,10 +27,10 @@ cancel_event = threading.Event()
 # ------------------------------
 MODELS = {
     # Models with 14B+ parameters
-    "Apriel-1.5-15b-Thinker": {
-        "repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
-        "description": "A 15B multimodal reasoning model from ServiceNow’s Apriel series. Achieves SOTA performance on text and image reasoning (52 on Artificial Analysis index, 68 on Tau2 Bench Telecom, 62 on IFBench) despite undergoing only text SFT—no image fine-tuning. Fits on a single GPU and competes with models 10× its size like Deepseek R1 and Gemini-Flash."
-    },
     "Qwen3-14B": {
         "repo_id": "Qwen/Qwen3-14B",
         "description": "Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."

 # ------------------------------
 MODELS = {
     # Models with 14B+ parameters
+    # "Apriel-1.5-15b-Thinker": {
+    #     "repo_id": "ServiceNow-AI/Apriel-1.5-15b-Thinker",
+    #     "description": "A 15B multimodal reasoning model from ServiceNow’s Apriel series. Achieves SOTA performance on text and image reasoning (52 on Artificial Analysis index, 68 on Tau2 Bench Telecom, 62 on IFBench) despite undergoing only text SFT—no image fine-tuning. Fits on a single GPU and competes with models 10× its size like Deepseek R1 and Gemini-Flash."
+    # },
     "Qwen3-14B": {
         "repo_id": "Qwen/Qwen3-14B",
         "description": "Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."