Spaces:
Running
Running
Upload from GitHub Actions: minor chashing change
Browse files- evals/main.py +12 -7
- evals/models.py +1 -1
evals/main.py
CHANGED
|
@@ -14,16 +14,21 @@ async def evaluate():
|
|
| 14 |
single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
|
| 15 |
test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
top_languages = languages.head(max_languages)
|
| 20 |
|
| 21 |
-
# Filter to single model if specified
|
| 22 |
if single_model:
|
| 23 |
models_df = models_df[models_df["id"] == single_model]
|
| 24 |
if len(models_df) == 0:
|
| 25 |
print(f"Error: Model '{single_model}' not found. Available models:")
|
| 26 |
-
for model_id in
|
| 27 |
print(f" {model_id}")
|
| 28 |
return pd.DataFrame()
|
| 29 |
|
|
@@ -110,9 +115,9 @@ async def evaluate():
|
|
| 110 |
results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
|
| 111 |
results_df.to_json("results.json", **args)
|
| 112 |
|
| 113 |
-
# Save model and language info
|
| 114 |
-
|
| 115 |
-
|
| 116 |
else:
|
| 117 |
print("TEST MODE: Skipping results saving")
|
| 118 |
|
|
|
|
| 14 |
single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
|
| 15 |
test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
|
| 16 |
|
| 17 |
+
# Keep original DataFrames for saving metadata
|
| 18 |
+
original_models_df = pd.DataFrame(models)
|
| 19 |
+
original_languages_df = pd.DataFrame(languages)
|
| 20 |
+
|
| 21 |
+
# Create working copies for single evaluation runs
|
| 22 |
+
models_df = original_models_df.copy()
|
| 23 |
+
languages_df = original_languages_df.copy()
|
| 24 |
top_languages = languages.head(max_languages)
|
| 25 |
|
| 26 |
+
# Filter to single model if specified (only affects evaluation, not saving)
|
| 27 |
if single_model:
|
| 28 |
models_df = models_df[models_df["id"] == single_model]
|
| 29 |
if len(models_df) == 0:
|
| 30 |
print(f"Error: Model '{single_model}' not found. Available models:")
|
| 31 |
+
for model_id in original_models_df["id"]:
|
| 32 |
print(f" {model_id}")
|
| 33 |
return pd.DataFrame()
|
| 34 |
|
|
|
|
| 115 |
results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
|
| 116 |
results_df.to_json("results.json", **args)
|
| 117 |
|
| 118 |
+
# Save model and language info (always save complete metadata, not filtered)
|
| 119 |
+
original_models_df.to_json("models.json", **args)
|
| 120 |
+
original_languages_df.to_json("languages.json", **args)
|
| 121 |
else:
|
| 122 |
print("TEST MODE: Skipping results saving")
|
| 123 |
|
evals/models.py
CHANGED
|
@@ -345,7 +345,7 @@ def get_cost(row):
|
|
| 345 |
return None
|
| 346 |
|
| 347 |
|
| 348 |
-
|
| 349 |
def load_models(date: date):
|
| 350 |
popular_models = (
|
| 351 |
get_historical_popular_models(date.today())[:20]
|
|
|
|
| 345 |
return None
|
| 346 |
|
| 347 |
|
| 348 |
+
@cache
|
| 349 |
def load_models(date: date):
|
| 350 |
popular_models = (
|
| 351 |
get_historical_popular_models(date.today())[:20]
|