Spaces:

triflix
/

brainfuncall

Running

App Files Files Community

triflix commited on 21 days ago

Commit

b3f0838

verified ·

1 Parent(s): d14886f

Update main.py

Browse files

Files changed (1) hide show

main.py +48 -75

main.py CHANGED Viewed

@@ -11,119 +11,92 @@ from huggingface_hub import login
 # ==========================================
 # 1. APP SETUP
 # ==========================================
-app = FastAPI(
-    title="FunctionGemma Brain API",
-    version="1.0.0",
-)
-# Global variables
 MODEL_ID = "google/functiongemma-270m-it"
 tokenizer = None
 model = None
 # ==========================================
-# 2. DATA MODELS
 # ==========================================
-class ChatRequest(BaseModel):
-    query: str = Field(..., min_length=1, max_length=4096)
-    tools: List[Dict[str, Any]]
-    include_date: bool = True
-class HealthResponse(BaseModel):
-    status: str
-    model: str
-    auth_status: str
 # ==========================================
-# 3. STARTUP (Auth + Load Model)
 # ==========================================
 @app.on_event("startup")
 async def startup():
     global tokenizer, model
-    # A. Authenticate using Environment Variable
-    print("🔐 Checking for HF_TOKEN...")
     hf_token = os.getenv("HF_TOKEN")
-    if not hf_token:
-        print("❌ Error: HF_TOKEN environment variable is missing.")
-        raise RuntimeError("HF_TOKEN environment variable is missing in Space Settings.")
-    try:
-        login(token=hf_token)
-        print("✅ Authentication successful.")
-    except Exception as e:
-        print(f"❌ Authentication Failed: {e}")
-        raise RuntimeError(f"Hugging Face login failed: {e}")
-    # B. Load Model
-    print(f"🧠 Loading Model: {MODEL_ID}...")
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            device_map="cpu",
-            torch_dtype=torch.float32,
-        )
-        print("✅ Model Loaded Successfully.")
-    except Exception as e:
-        print(f"❌ Model Load Failed: {e}")
-        raise RuntimeError(f"Model load failed: {e}")
 # ==========================================
 # 4. API ENDPOINT
 # ==========================================
 @app.post("/generate")
 async def generate_function_call(request: ChatRequest):
-    if model is None or tokenizer is None:
-        raise HTTPException(status_code=503, detail="Model not ready")
     try:
-        # System context
-        system_content = (
-            "You are a model that can do function calling with the following functions."
-        )
         if request.include_date:
             today = datetime.date.today().isoformat()
             system_content += f" Today is {today}."
-        messages = [
-            {"role": "system", "content": system_content},
-            {"role": "user", "content": request.query},
-        ]
         inputs = tokenizer.apply_chat_template(
             messages,
             tools=request.tools,
             add_generation_prompt=True,
-            return_tensors="pt",
             return_dict=True,
         )
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=128,
-            do_sample=False,  # deterministic
-        )
-        generated_text = tokenizer.decode(
-            outputs[0][len(inputs["input_ids"][0]):],
-            skip_special_tokens=True,
-        )
         return {"response": generated_text}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/", response_model=HealthResponse)
-def health_check():
-    return {
-        "status": "running",
-        "model": MODEL_ID,
-        "auth_status": "secure_env",
-    }

 # ==========================================
 # 1. APP SETUP
 # ==========================================
+app = FastAPI(title="FunctionGemma Brain API", version="1.0.0")
 MODEL_ID = "google/functiongemma-270m-it"
 tokenizer = None
 model = None
 # ==========================================
+# 2. FEW-SHOT EXAMPLES (The Teacher)
 # ==========================================
+# We teach the model the correct tool names here.
+# This list simulates a previous conversation so the model knows what to do.
+FEW_SHOT_MESSAGES = [
+    # Example 1: Counting/Stats
+    {"role": "user", "content": "How many regions are there?"},
+    {"role": "model", "content": "<start_function_call>call:get_aggregate_stats{target_entity:revenue_region}<end_function_call>"},
+    # Example 2: Specific Search
+    {"role": "user", "content": "What is the water level in Aadale dam?"},
+    {"role": "model", "content": "<start_function_call>call:search_specific_dam{dam_name:Aadale}<end_function_call>"},
+    # Example 3: Filtering
+    {"role": "user", "content": "Show me Major dams in Pune."},
+    {"role": "model", "content": "<start_function_call>call:filter_dams{district:Pune,project_type:Major}<end_function_call>"},
+    # Example 4: Irrelevant Question (Teach it to NOT call functions for random stuff)
+    {"role": "user", "content": "What is the capital of France?"},
+    {"role": "model", "content": "I cannot answer that as it is not related to the dam database."}
+]
 # ==========================================
+# 3. STARTUP
 # ==========================================
 @app.on_event("startup")
 async def startup():
     global tokenizer, model
     hf_token = os.getenv("HF_TOKEN")
+    if not hf_token: raise RuntimeError("HF_TOKEN missing")
+    login(token=hf_token)
+    print(f"🧠 Loading {MODEL_ID}...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="cpu", torch_dtype=torch.float32)
+    print("✅ Model Loaded.")
 # ==========================================
 # 4. API ENDPOINT
 # ==========================================
+class ChatRequest(BaseModel):
+    query: str
+    tools: List[Dict[str, Any]]
+    include_date: bool = True
 @app.post("/generate")
 async def generate_function_call(request: ChatRequest):
+    if not model: raise HTTPException(status_code=503, detail="Model loading")
     try:
+        # 1. System Prompt
+        system_content = "You are a model that can do function calling with the following functions."
         if request.include_date:
             today = datetime.date.today().isoformat()
             system_content += f" Today is {today}."
+        # 2. Construct History: System -> Examples -> Current User Query
+        messages = [{"role": "system", "content": system_content}]
+        # Inject the examples!
+        messages.extend(FEW_SHOT_MESSAGES)
+        # Add the actual user query
+        messages.append({"role": "user", "content": request.query})
+        # 3. Tokenize
         inputs = tokenizer.apply_chat_template(
             messages,
             tools=request.tools,
             add_generation_prompt=True,
             return_dict=True,
+            return_tensors="pt",
         )
+        # 4. Generate
+        outputs = model.generate(**inputs, max_new_tokens=128, do_sample=False)
+        generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
         return {"response": generated_text}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))