youssefleb commited on
Commit
85f50db
·
verified ·
1 Parent(s): 19f5032

Update mcp_servers.py

Browse files
Files changed (1) hide show
  1. mcp_servers.py +76 -79
mcp_servers.py CHANGED
@@ -1,30 +1,11 @@
1
- # mcp_servers.py (Corrected for GOOGLE_API_KEY)
2
  import asyncio
3
- import os
4
- import httpx
5
  import json
6
- import google.generativeai as genai
7
- import anthropic
8
- import openai
9
  from personas import PERSONAS_DATA
10
-
11
- # --- 1. Load API Keys from Blaxel Secrets ---
12
- # --- THIS IS THE FIX ---
13
- GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY") # Use the secret name from your screenshot
14
- # ---
15
- ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
16
- SAMBANOVA_API_KEY = os.getenv("SAMBANOVA_API_KEY")
17
- SAMBANOVA_BASE_URL = os.getenv("SAMBANOVA_BASE_URL", "https://api.sambanova.ai/v1")
18
-
19
- # --- 2. Configure API Clients ---
20
- genai.configure(api_key=GEMINI_API_KEY)
21
- gemini_model = genai.GenerativeModel('gemini-1.5-pro-latest')
22
- anthropic_client = anthropic.AsyncAnthropic(api_key=ANTHROPIC_API_KEY)
23
-
24
- sambanova_client = openai.AsyncOpenAI(
25
- api_key=SAMBANOVA_API_KEY,
26
- base_url=SAMBANOVA_BASE_URL
27
- )
28
 
29
  # This is the prompt from your 'LLM judges prompt v3.0.docx'
30
  EVALUATION_PROMPT_TEMPLATE = """
@@ -63,26 +44,30 @@ You MUST return *only* a valid JSON object in the following format:
63
  class BusinessSolutionEvaluator:
64
  """Implements the "LLM-as-a-Judge" with a live call to Gemini."""
65
 
 
 
 
 
 
 
66
  async def evaluate(self, problem: str, solution_text: str) -> dict:
67
  print(f"Evaluating solution (live): {solution_text[:50]}...")
68
-
69
  prompt = EVALUATION_PROMPT_TEMPLATE.format(problem=problem, solution_text=solution_text)
70
 
71
  try:
72
- response = await gemini_model.generate_content_async(
73
  prompt,
74
  generation_config=genai.types.GenerationConfig(
75
  response_mime_type="application/json"
76
  )
77
  )
78
-
79
  json_text = response.text.strip().replace("```json", "").replace("```", "")
80
  v_fitness = json.loads(json_text)
81
-
82
  print(f"Evaluation complete (live): {v_fitness}")
83
  return v_fitness
84
  except Exception as e:
85
  print(f"ERROR: BusinessSolutionEvaluator failed: {e}")
 
86
  return {
87
  "Novelty": {"score": 1, "justification": "Error during evaluation."},
88
  "Usefulness_Feasibility": {"score": 1, "justification": "Error during evaluation."},
@@ -91,52 +76,29 @@ class BusinessSolutionEvaluator:
91
  "Cultural_Appropriateness": {"score": 1, "justification": "Error during evaluation."}
92
  }
93
 
94
- # --- 3. Unified API Call Function ---
95
- async def get_llm_response(client_name: str, system_prompt: str, user_prompt: str) -> str:
96
- """A single function to handle calling any of the three sponsor LLMs."""
97
- try:
98
- if client_name == "Gemini":
99
- chat = gemini_model.start_chat(history=[
100
- {'role': 'user', 'parts': [system_prompt]},
101
- {'role': 'model', 'parts': ["Understood. I will act as this persona."]}
102
- ])
103
- response = await chat.send_message_async(user_prompt)
104
- return response.text
105
-
106
- elif client_name == "Anthropic":
107
- response = await anthropic_client.messages.create(
108
- model="claude-3-opus-20240229",
109
- max_tokens=2048,
110
- system=system_prompt,
111
- messages=[{"role": "user", "content": user_prompt}]
112
- )
113
- return response.content[0].text
114
-
115
- elif client_name == "SambaNova":
116
- completion = await sambanova_client.chat.completions.create(
117
- model="Meta-Llama-3.1-8B-Instruct",
118
- messages=[
119
- {"role": "system", "content": system_prompt},
120
- {"role": "user", "content": user_prompt}
121
- ]
122
- )
123
- return completion.choices[0].message.content
124
-
125
- except Exception as e:
126
- print(f"ERROR: API call to {client_name} failed: {e}")
127
- return f"Error generating response from {client_name}."
128
-
129
-
130
  class AgentCalibrator:
131
- """Tests the sponsor LLMs with live API calls."""
132
 
133
- def __init__(self, evaluator: BusinessSolutionEvaluator):
134
  self.evaluator = evaluator
135
- self.sponsor_llms = ["Gemini", "Anthropic", "SambaNova"]
 
 
 
136
 
137
  async def calibrate_team(self, problem: str) -> dict:
138
- print("Running LIVE calibration test for specialist team...")
139
 
 
 
 
 
 
 
 
 
 
 
140
  roles_to_test = {
141
  "Plant": PERSONAS_DATA["Culture_5"]["description"],
142
  "Implementer": PERSONAS_DATA["Culture_Expert"]["description"],
@@ -147,8 +109,8 @@ class AgentCalibrator:
147
 
148
  tasks = []
149
  for role, persona in roles_to_test.items():
150
- for llm in self.sponsor_llms:
151
- tasks.append(self.run_calibration_test(problem, role, llm, persona, test_problem))
152
 
153
  results = await asyncio.gather(*tasks)
154
 
@@ -161,7 +123,7 @@ class AgentCalibrator:
161
 
162
  for role in roles_to_test.keys():
163
  best_score = -1
164
- best_llm = "None"
165
  for res in results:
166
  if res["role"] == role:
167
  metric = role_metrics[role]
@@ -180,16 +142,51 @@ class AgentCalibrator:
180
  print(f"Calibration complete (live). Team plan: {team_plan}")
181
  return team_plan
182
 
183
- async def run_calibration_test(self, problem, role, llm, persona, test_problem):
184
  """Helper to run a single test and evaluation."""
185
- print(f"...Calibrating {role} on {llm}...")
186
- solution = await get_llm_response(llm, persona, test_problem)
 
 
187
  if "Error generating response" in solution:
188
- return {"role": role, "llM": llm, "score": {
189
- "Novelty": {"score": 0},
190
- "Usefulness_Feasibility": {"score": 0},
191
- "Cultural_Appropriateness": {"score": 0}
192
  }}
193
 
194
  score = await self.evaluator.evaluate(problem, solution)
195
- return {"role": role, "llm": llm, "score": score}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mcp_servers.py (New Secure & Robust Version)
2
  import asyncio
 
 
3
  import json
4
+ from typing import Dict, Optional
 
 
5
  from personas import PERSONAS_DATA
6
+ import google.generativeai as genai
7
+ from anthropic import AsyncAnthropic
8
+ from openai import AsyncOpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # This is the prompt from your 'LLM judges prompt v3.0.docx'
11
  EVALUATION_PROMPT_TEMPLATE = """
 
44
  class BusinessSolutionEvaluator:
45
  """Implements the "LLM-as-a-Judge" with a live call to Gemini."""
46
 
47
+ def __init__(self, gemini_client: Optional[genai.GenerativeModel]):
48
+ # 1. Get the pre-initialized client from the main agent
49
+ if not gemini_client:
50
+ raise ValueError("BusinessSolutionEvaluator requires a Google/Gemini client to function.")
51
+ self.gemini_model = gemini_client
52
+
53
  async def evaluate(self, problem: str, solution_text: str) -> dict:
54
  print(f"Evaluating solution (live): {solution_text[:50]}...")
 
55
  prompt = EVALUATION_PROMPT_TEMPLATE.format(problem=problem, solution_text=solution_text)
56
 
57
  try:
58
+ response = await self.gemini_model.generate_content_async(
59
  prompt,
60
  generation_config=genai.types.GenerationConfig(
61
  response_mime_type="application/json"
62
  )
63
  )
 
64
  json_text = response.text.strip().replace("```json", "").replace("```", "")
65
  v_fitness = json.loads(json_text)
 
66
  print(f"Evaluation complete (live): {v_fitness}")
67
  return v_fitness
68
  except Exception as e:
69
  print(f"ERROR: BusinessSolutionEvaluator failed: {e}")
70
+ # Fallback in case of API error
71
  return {
72
  "Novelty": {"score": 1, "justification": "Error during evaluation."},
73
  "Usefulness_Feasibility": {"score": 1, "justification": "Error during evaluation."},
 
76
  "Cultural_Appropriateness": {"score": 1, "justification": "Error during evaluation."}
77
  }
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  class AgentCalibrator:
80
+ """Tests the available sponsor LLMs to find the best one for each role."""
81
 
82
+ def __init__(self, api_clients: dict, evaluator: BusinessSolutionEvaluator):
83
  self.evaluator = evaluator
84
+ # 1. Get the dict of *live* clients
85
+ self.api_clients = {name: client for name, client in api_clients.items() if client}
86
+ self.sponsor_llms = list(self.api_clients.keys())
87
+ print(f"AgentCalibrator initialized with enabled clients: {self.sponsor_llms}")
88
 
89
  async def calibrate_team(self, problem: str) -> dict:
90
+ print(f"Running LIVE calibration test for specialist team on {self.sponsor_llms}...")
91
 
92
+ # If only one LLM is available, skip calibration and default to it
93
+ if len(self.sponsor_llms) == 1:
94
+ default_llm = self.sponsor_llms[0]
95
+ print("Only one LLM available. Skipping calibration.")
96
+ return {
97
+ "Plant": {"persona": "Culture_5", "llm": default_llm},
98
+ "Implementer": {"persona": "Culture_Expert", "llm": default_llm},
99
+ "Monitor": {"persona": "Culture_11", "llm": default_llm}
100
+ }
101
+
102
  roles_to_test = {
103
  "Plant": PERSONAS_DATA["Culture_5"]["description"],
104
  "Implementer": PERSONAS_DATA["Culture_Expert"]["description"],
 
109
 
110
  tasks = []
111
  for role, persona in roles_to_test.items():
112
+ for llm_name in self.sponsor_llms:
113
+ tasks.append(self.run_calibration_test(problem, role, llm_name, persona, test_problem))
114
 
115
  results = await asyncio.gather(*tasks)
116
 
 
123
 
124
  for role in roles_to_test.keys():
125
  best_score = -1
126
+ best_llm = self.sponsor_llms[0] # Default to first available
127
  for res in results:
128
  if res["role"] == role:
129
  metric = role_metrics[role]
 
142
  print(f"Calibration complete (live). Team plan: {team_plan}")
143
  return team_plan
144
 
145
+ async def run_calibration_test(self, problem, role, llm_name, persona, test_problem):
146
  """Helper to run a single test and evaluation."""
147
+ print(f"...Calibrating {role} on {llm_name}...")
148
+ client = self.api_clients[llm_name]
149
+ solution = await get_llm_response(llm_name, client, persona, test_problem)
150
+
151
  if "Error generating response" in solution:
152
+ return {"role": role, "llm": llm_name, "score": {
153
+ "Novelty": {"score": 0}, "Usefulness_Feasibility": {"score": 0}, "Cultural_Appropriateness": {"score": 0}
 
 
154
  }}
155
 
156
  score = await self.evaluator.evaluate(problem, solution)
157
+ return {"role": role, "llm": llm_name, "score": score}
158
+
159
+ # --- Unified API Call Function ---
160
+ async def get_llm_response(client_name: str, client, system_prompt: str, user_prompt: str) -> str:
161
+ """A single function to handle calling any of the three sponsor LLMs."""
162
+ try:
163
+ if client_name == "Gemini":
164
+ chat = client.start_chat(history=[
165
+ {'role': 'user', 'parts': [system_prompt]},
166
+ {'role': 'model', 'parts': ["Understood. I will act as this persona."]}
167
+ ])
168
+ response = await chat.send_message_async(user_prompt)
169
+ return response.text
170
+
171
+ elif client_name == "Anthropic":
172
+ response = await client.messages.create(
173
+ model="claude-3-opus-20240229",
174
+ max_tokens=2048,
175
+ system=system_prompt,
176
+ messages=[{"role": "user", "content": user_prompt}]
177
+ )
178
+ return response.content[0].text
179
+
180
+ elif client_name == "SambaNova":
181
+ completion = await client.chat.completions.create(
182
+ model="Meta-Llama-3.1-8B-Instruct",
183
+ messages=[
184
+ {"role": "system", "content": system_prompt},
185
+ {"role": "user", "content": user_prompt}
186
+ ]
187
+ )
188
+ return completion.choices[0].message.content
189
+
190
+ except Exception as e:
191
+ print(f"ERROR: API call to {client_name} failed: {e}")
192
+ return f"Error generating response from {client_name}."