| | from huggingface_hub import list_models, model_info |
| | from datetime import datetime |
| | from datasets import Dataset, load_dataset |
| | import pandas as pd |
| | import os |
| | import globals |
| | from typing import List, Tuple |
| |
|
| |
|
| | def get_models_providers() -> List[Tuple[str, List[str]]]: |
| | """Get list of popular text generation models and associated providers from Hugging Face""" |
| | models = list_models( |
| | filter="text-generation", |
| | sort="likes", |
| | direction=-1, |
| | limit=globals.NUM_MODELS_RUN, |
| | expand="inferenceProviderMapping" |
| | ) |
| |
|
| | model_providers = [ |
| | (model.id, [p.provider for p in model.inference_provider_mapping]) |
| | for model in models |
| | if hasattr(model, 'inference_provider_mapping') and model.inference_provider_mapping |
| | ] |
| | return model_providers |
| |
|
| |
|
| | def initialize_models_providers_file(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
| | """Initialize the models_providers.txt file with popular models and their providers.""" |
| | model_to_providers = get_models_providers() |
| |
|
| | with open(file_path, 'w') as f: |
| | f.write("# Models and Providers Configuration\n") |
| | f.write("# Format: model_name provider_name\n") |
| | f.write(f"# Auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") |
| |
|
| | count = 0 |
| | for (model_id, providers) in model_to_providers: |
| | try: |
| | for provider in providers: |
| | f.write(f"{model_id} {provider}\n") |
| | count += 1 |
| | except Exception as e: |
| | print(f"Error processing model {model_id}: {e}") |
| | continue |
| |
|
| | print(f"Successfully wrote {count} model-provider combinations to {file_path}") |
| |
|
| | return f"Initialized {count} model-provider combinations", load_models_providers_str() |
| |
|
| |
|
| | def load_models_providers_str(file_path: str = globals.LOCAL_CONFIG_FILE) -> str: |
| | mp_list = load_models_providers(file_path) |
| |
|
| | return "\n".join([f"{model} : {provider}" for (model, provider) in mp_list]) |
| |
|
| |
|
| | def load_models_providers(file_path: str = globals.LOCAL_CONFIG_FILE) -> List[Tuple[str, str]]: |
| | """Load models and providers from text file. Creates file if it doesn't exist.""" |
| | models_providers = [] |
| | try: |
| | |
| | if not os.path.exists(file_path): |
| | print(f"Config file {file_path} not found. Initializing...") |
| | initialize_models_providers_file(file_path) |
| |
|
| | with open(file_path, 'r') as f: |
| | for line in f: |
| | line = line.strip() |
| | |
| | if line and not line.startswith('#'): |
| | parts = line.split() |
| | if len(parts) >= 2: |
| | model = parts[0] |
| | provider = parts[1] |
| | models_providers.append((model, provider)) |
| | except Exception as e: |
| | print(f"Error loading model providers: {str(e)}") |
| | import traceback |
| | traceback.print_exc() |
| | return models_providers |
| |
|
| |
|
| | def save_results() -> None: |
| | """Persist job results to HuggingFace dataset.""" |
| | try: |
| | if not globals.job_results: |
| | print("No results to save") |
| | return |
| |
|
| | records = list(globals.job_results.values()) |
| | df = pd.DataFrame(records) |
| | dataset = Dataset.from_pandas(df) |
| |
|
| | |
| | dataset.push_to_hub( |
| | globals.RESULTS_DATASET_NAME, |
| | token=os.getenv("HF_TOKEN"), |
| | private=False |
| | ) |
| | print(f"Saved {len(records)} results to dataset") |
| |
|
| | except Exception as e: |
| | print(f"Error saving results to dataset: {e}") |
| |
|
| |
|
| | def load_results() -> None: |
| | """Load job results from HuggingFace dataset.""" |
| | try: |
| | |
| | dataset = load_dataset( |
| | globals.RESULTS_DATASET_NAME, |
| | split="train", |
| | token=os.getenv("HF_TOKEN") |
| | ) |
| |
|
| | |
| | for row in dataset: |
| | key = globals.get_model_provider_key(row["model"], row["provider"]) |
| | globals.job_results[key] = { |
| | "model": row["model"], |
| | "provider": row["provider"], |
| | "last_run": row["last_run"], |
| | "status": row["status"], |
| | "current_score": row["current_score"], |
| | "previous_score": row["previous_score"], |
| | "job_id": row["job_id"], |
| | "start_time": row.get("start_time"), |
| | "duration": row.get("duration"), |
| | "completed_at": row.get("completed_at"), |
| | "runs": row.get("runs", []), |
| | "score_variance": row.get("score_variance") |
| | } |
| |
|
| | print(f"Loaded {len(globals.job_results)} results from dataset") |
| |
|
| | except Exception as e: |
| | print(f"No existing dataset found or error loading: {e}") |
| | print("Starting with empty results") |
| |
|
| | def style_status(val): |
| | """Style function for status column.""" |
| | if val == "COMPLETED": |
| | return 'background-color: green' |
| | elif val == "ERROR": |
| | return 'background-color: red' |
| | elif val == "RUNNING": |
| | return 'background-color: blue' |
| | return '' |
| |
|
| | def get_summary_stats(): |
| | """Get summary statistics of job results.""" |
| | if not globals.job_results: |
| | return "📊 **Status:** No jobs yet" |
| |
|
| | total = len(globals.job_results) |
| | running = sum(1 for info in globals.job_results.values() if info.get("status") == "RUNNING") |
| | completed = sum(1 for info in globals.job_results.values() if info.get("status") == "COMPLETED") |
| | failed = sum(1 for info in globals.job_results.values() if info.get("status") in ["ERROR", "FAILED"]) |
| |
|
| | return f"📊 **Total:** {total} | 🔵 **Running:** {running} | ✅ **Completed:** {completed} | ❌ **Failed:** {failed}" |
| |
|
| |
|
| | def get_results_table(): |
| | """Return job results as a styled pandas DataFrame for Gradio DataFrame.""" |
| | if not globals.job_results: |
| | return pd.DataFrame(columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Latest Job Id"]) |
| |
|
| | table_data = [] |
| | for key, info in globals.job_results.items(): |
| | |
| | current_score = info.get("current_score", "N/A") |
| | if current_score is not None and isinstance(current_score, (int, float)): |
| | current_score = f"{current_score:.4f}" |
| |
|
| | |
| | variance = info.get("score_variance", "N/A") |
| | if variance is not None and isinstance(variance, (int, float)): |
| | variance = f"{variance:.6f}" |
| |
|
| | |
| | previous_score = info.get("previous_score", "N/A") |
| | if previous_score is not None and isinstance(previous_score, (int, float)): |
| | previous_score = f"{previous_score:.4f}" |
| |
|
| | |
| | runs = info.get("runs", []) |
| | completed_runs = sum(1 for run in runs if run.get("status") == "COMPLETED") |
| | total_runs = len(runs) |
| | runs_str = f"{completed_runs}/{total_runs}" if runs else "0/0" |
| |
|
| | |
| | duration = info.get("duration") |
| | if duration is not None and isinstance(duration, (int, float)): |
| | |
| | minutes = int(duration // 60) |
| | seconds = int(duration % 60) |
| | duration_str = f"{minutes}m {seconds}s" |
| | else: |
| | duration_str = "N/A" |
| |
|
| | |
| | completed_at = info.get("completed_at", "N/A") |
| |
|
| | job_id = info.get("job_id", "N/A") |
| | |
| | if job_id != "N/A": |
| | job_url = f"https://hf.co/jobs/{globals.NAMESPACE}/{job_id}" |
| | job_link = f'{job_id}: <a href="{job_url}" target="_blank">📄</a> ' |
| | else: |
| | job_link = job_id |
| |
|
| | |
| | model = info["model"] |
| | provider = info["provider"] |
| | relaunch_link = '🔄 Relaunch' |
| |
|
| | table_data.append([ |
| | model, |
| | provider, |
| | runs_str, |
| | info["last_run"], |
| | info["status"], |
| | current_score, |
| | variance, |
| | previous_score, |
| | duration_str, |
| | completed_at, |
| | job_link, |
| | relaunch_link |
| | ]) |
| |
|
| | df = pd.DataFrame(table_data, columns=["Model", "Provider", "Runs", "Last Run", "Status", "Mean Score", "Variance", "Previous Score", "Duration", "Completed At", "Job Id and Logs", "Actions"]) |
| |
|
| | |
| | styled_df = df.style.map(style_status, subset=['Status']) |
| | return styled_df |
| |
|
| |
|