File size: 4,681 Bytes
359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 5adf0a6 359afe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import pandas as pd
from src.utils.load_model import run_hubert_base, run_whisper, run_model, run_timit
from src.utils.audio_process import calculate_error_rate, load_audio
from src.utils.cmu_process import clean_cmu, cmu_to_ipa
def set_output(model, pre_pho, ref_pho, duration, per, score):
return {
"model": model,
"phonemes": pre_pho,
"ref_phonemes": ref_pho,
"duration": duration,
"PER": per,
"score": score
}
# Map model names to their runner functions
MODEL_RUNNERS = {
"HuBERT-Base": run_hubert_base,
# "Whisper": run_whisper,
"HuBERT fine-tuned": run_model,
"Timit": run_timit
}
def get_output(model, wav, reference_phoneme):
"""
Run the given model, compute error rate, and return formatted output.
"""
if model not in MODEL_RUNNERS:
raise ValueError(f"Unknown model: {model}")
run_func = MODEL_RUNNERS[model]
phonemes, dur = run_func(wav)
per, score = calculate_error_rate(reference_phoneme, phonemes)
return set_output(model, phonemes, reference_phoneme, dur, per, score)
def benchmark_all(example):
"""
Run all models on a single dataset example.
"""
# Load waveform manually to avoid datasets' torchcodec dependency
wav = load_audio(example["audio"])
reference_phoneme = example["phonetic"]
reference_phoneme = cmu_to_ipa(clean_cmu(reference_phoneme))
# Run all models
results = [
get_output("HuBERT-Base", wav, reference_phoneme),
# get_output("Whisper", wav, reference_phoneme),
get_output("HuBERT fine-tuned", wav, reference_phoneme),
get_output("Timit", wav, reference_phoneme),
]
return pd.DataFrame(results)
def benchmark_dataset(dataset):
"""
Run benchmark_all on each sample and compute average PER and duration per model.
"""
all_results = []
for example in dataset:
df = benchmark_all(example)
all_results.append(df)
full_df = pd.concat(all_results, ignore_index=True)
# Compute average PER and duration per model
avg_stats = (
full_df.groupby("model")[["PER", "duration"]]
.mean()
.reset_index()
.rename(columns={"PER": "Average PER", "duration": "Average Duration (s)"})
)
return full_df, avg_stats
from datasets import load_dataset, Audio
def main():
dataset = load_dataset("mirfan899/phoneme_asr", split="train")
# Disable automatic audio decoding to avoid torchcodec requirement
dataset = dataset.cast_column("audio", Audio(decode=False))
field = "phonetic"
unique_texts = dataset.unique(field)
print("Unique phonetic strings:", len(unique_texts))
dataset_unique = dataset.filter(lambda x: x[field] in unique_texts)
def is_valid(example):
phoneme_tokens = example[field].split()
return len(phoneme_tokens) >= 10
dataset_filtered = dataset_unique.filter(is_valid)
dataset_final = dataset_filtered.shuffle(seed=42).select(range(min(100, len(dataset_filtered))))
print(dataset_final)
print("Final size:", len(dataset_final))
full_results, avg_stats = benchmark_dataset(dataset_final.select(range(10)))
print("Average Statistic per model:")
print(avg_stats)
# Optional: inspect detailed results
print(full_results.head())
# Save results for leaderboard consumption (one JSON per model)
import json, os, time
results_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "eval-results")
os.makedirs(results_dir, exist_ok=True)
timestamp = int(time.time())
for _, row in avg_stats.iterrows():
model_name = str(row["model"]).replace(" ", "-")
org_model = f"local/{model_name}"
per = float(row["Average PER"]) if row["Average PER"] is not None else None
avg_dur = float(row["Average Duration (s)"]) if row["Average Duration (s)"] is not None else None
payload = {
"config": {
"model_name": org_model,
"model_dtype": "float32",
"model_sha": ""
},
"results": {
# Populate both keys expected by Tasks to avoid NaNs in the leaderboard
"phoneme_dev": {"per": per, "avg_duration": avg_dur},
"phoneme_test": {"per": per, "avg_duration": avg_dur}
}
}
out_path = os.path.join(results_dir, f"results_{timestamp}_{model_name}.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
print(f"Saved leaderboard result: {out_path}")
if __name__ == "__main__":
main()
|