Spaces:

MCP-1st-Birthday
/

MudabbirAI

Running

App Files Files Community

youssefleb commited on 25 days ago

Commit

6e30edd

verified ·

1 Parent(s): e5b1c26

Update mcp_servers.py

Browse files

Files changed (1) hide show

mcp_servers.py +82 -10

mcp_servers.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# mcp_servers.py (FIXED: Robust 'str' handling & Enhanced Extraction)
 import asyncio
 import json
 import re
@@ -12,10 +12,58 @@ from personas import PERSONAS_DATA
 EVALUATION_PROMPT_TEMPLATE = load_prompt(config.PROMPT_FILES["evaluator"])
 def extract_json(text: str) -> dict:
     """Robustly extracts JSON from text."""
     try:
-        # 1. Try simple block extraction first
         clean_text = text.strip()
         if "```json" in clean_text:
              clean_text = clean_text.split("```json")[1].split("```")[0].strip()
@@ -23,10 +71,7 @@ def extract_json(text: str) -> dict:
              clean_text = clean_text.split("```")[1].split("```")[0].strip()
         return json.loads(clean_text)
     except (json.JSONDecodeError, IndexError):
-        # 2. Fallback: Regex to find the outermost JSON object
         try:
-            # Looks for { ... } but matching closest brackets is hard with regex.
-            # This regex grabs from the first { to the last }
             match = re.search(r'(\{[\s\S]*\})', text)
             if match:
                 return json.loads(match.group(1))
@@ -44,13 +89,34 @@ class BusinessSolutionEvaluator:
     async def evaluate(self, problem: str, solution_text: str) -> dict:
         print(f"Evaluating solution (live): {solution_text[:50]}...")
-        prompt = EVALUATION_PROMPT_TEMPLATE.format(problem=problem, solution_text=solution_text)
         try:
             response = await self.gemini_model.generate_content_async(
-                prompt,
                 generation_config=genai.types.GenerationConfig(
-                    response_mime_type="application/json"
                 )
             )
@@ -80,9 +146,12 @@ class AgentCalibrator:
         self.sponsor_llms = list(self.api_clients.keys())
         print(f"AgentCalibrator initialized with enabled clients: {self.sponsor_llms}")
     async def calibrate_team(self, problem: str) -> Tuple[Dict[str, Any], List[str], List[Dict[str, Any]]]:
         print(f"Running LIVE calibration test for specialist team on {self.sponsor_llms}...")
         error_log = []
         if not self.sponsor_llms:
              raise Exception("AgentCalibrator cannot run: No LLM clients are configured.")
@@ -108,7 +177,8 @@ class AgentCalibrator:
                 tasks.append(self.run_calibration_test(problem, role, llm_name, persona, test_problem))
         results = await asyncio.gather(*tasks)
         best_llms = {}
         role_metrics = config.CALIBRATION_CONFIG["role_metrics"]
@@ -152,7 +222,9 @@ class AgentCalibrator:
             "Monitor": {"persona": config.CALIBRATION_CONFIG["roles_to_test"]["Monitor"], "llm": best_llms["Monitor"]}
         }
         print(f"Calibration complete (live). Team plan: {team_plan}")
-        return team_plan, error_log, results
     async def run_calibration_test(self, problem, role, llm_name, persona, test_problem):
         print(f"...Calibrating {role} on {llm_name}...")

+# mcp_servers.py (FIXED: Prompt-Schema Alignment + Detailed Logging)
 import asyncio
 import json
 import re
 EVALUATION_PROMPT_TEMPLATE = load_prompt(config.PROMPT_FILES["evaluator"])
+# --- DEFINING THE SCHEMA TO FORCE JUSTIFICATIONS ---
+# This forces the LLM to return a specific JSON structure with "score" and "justification"
+EVALUATION_SCHEMA = {
+    "type": "OBJECT",
+    "properties": {
+        "Novelty": {
+            "type": "OBJECT",
+            "properties": {
+                "score": {"type": "INTEGER"},
+                "justification": {"type": "STRING"}
+            },
+            "required": ["score", "justification"]
+        },
+        "Usefulness_Feasibility": {
+            "type": "OBJECT",
+            "properties": {
+                "score": {"type": "INTEGER"},
+                "justification": {"type": "STRING"}
+            },
+            "required": ["score", "justification"]
+        },
+        "Flexibility": {
+            "type": "OBJECT",
+            "properties": {
+                "score": {"type": "INTEGER"},
+                "justification": {"type": "STRING"}
+            },
+            "required": ["score", "justification"]
+        },
+        "Elaboration": {
+            "type": "OBJECT",
+            "properties": {
+                "score": {"type": "INTEGER"},
+                "justification": {"type": "STRING"}
+            },
+            "required": ["score", "justification"]
+        },
+        "Cultural_Appropriateness": {
+            "type": "OBJECT",
+            "properties": {
+                "score": {"type": "INTEGER"},
+                "justification": {"type": "STRING"}
+            },
+            "required": ["score", "justification"]
+        }
+    },
+    "required": ["Novelty", "Usefulness_Feasibility", "Flexibility", "Elaboration", "Cultural_Appropriateness"]
+}
 def extract_json(text: str) -> dict:
     """Robustly extracts JSON from text."""
     try:
         clean_text = text.strip()
         if "```json" in clean_text:
              clean_text = clean_text.split("```json")[1].split("```")[0].strip()
              clean_text = clean_text.split("```")[1].split("```")[0].strip()
         return json.loads(clean_text)
     except (json.JSONDecodeError, IndexError):
         try:
             match = re.search(r'(\{[\s\S]*\})', text)
             if match:
                 return json.loads(match.group(1))
     async def evaluate(self, problem: str, solution_text: str) -> dict:
         print(f"Evaluating solution (live): {solution_text[:50]}...")
+        # 1. Base Prompt from the clean text file
+        base_prompt = EVALUATION_PROMPT_TEMPLATE.format(problem=problem, solution_text=solution_text)
+        # 2. INJECT STRONG INSTRUCTION
+        # This prevents the model from regurgitating examples found in the prompt file
+        schema_instruction = """
+        [IMPORTANT SYSTEM INSTRUCTION]
+        Ignore any previous examples of JSON formatting in this prompt.
+        You MUST strictly follow the Output Schema provided below.
+        For EACH of the 5 metrics (Novelty, Usefulness_Feasibility, etc.), you must provide an object with TWO fields:
+        1. "score": An integer from 1 to 5.
+        2. "justification": A specific sentence explaining why you gave that score.
+        Do not output a list. Return a single JSON object describing the solution above.
+        """
+        final_prompt = base_prompt + schema_instruction
         try:
+            # --- ENFORCE SCHEMA ---
             response = await self.gemini_model.generate_content_async(
+                final_prompt,
                 generation_config=genai.types.GenerationConfig(
+                    response_mime_type="application/json",
+                    response_schema=EVALUATION_SCHEMA
                 )
             )
         self.sponsor_llms = list(self.api_clients.keys())
         print(f"AgentCalibrator initialized with enabled clients: {self.sponsor_llms}")
+    # --- UPDATED: Return detailed results for logging ---
     async def calibrate_team(self, problem: str) -> Tuple[Dict[str, Any], List[str], List[Dict[str, Any]]]:
         print(f"Running LIVE calibration test for specialist team on {self.sponsor_llms}...")
         error_log = []
+        detailed_results = [] # To capture full calibration data
         if not self.sponsor_llms:
              raise Exception("AgentCalibrator cannot run: No LLM clients are configured.")
                 tasks.append(self.run_calibration_test(problem, role, llm_name, persona, test_problem))
         results = await asyncio.gather(*tasks)
+        detailed_results = results # Store the full results here
         best_llms = {}
         role_metrics = config.CALIBRATION_CONFIG["role_metrics"]
             "Monitor": {"persona": config.CALIBRATION_CONFIG["roles_to_test"]["Monitor"], "llm": best_llms["Monitor"]}
         }
         print(f"Calibration complete (live). Team plan: {team_plan}")
+        # Return 3 values: The plan, errors, and the full trace
+        return team_plan, error_log, detailed_results
     async def run_calibration_test(self, problem, role, llm_name, persona, test_problem):
         print(f"...Calibrating {role} on {llm_name}...")