Spaces:

HaoVuong
/

MedicalASR

Sleeping

HaoVuong commited on Nov 27, 2025

Commit

31e8df9

1 Parent(s): 1889f8a

Add app and requirements

Files changed (2) hide show

app.py ADDED Viewed

+import torch
+import gradio as gr
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+device = "cpu"
+torch_dtype = torch.float32
+fine_tuned_model_id = "leduckhai/MultiMed-ST"
+fine_tuned_subfolder = "asr/whisper-small-english/checkpoint"
+print("Loading model on CPU... this may take a moment.")
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    fine_tuned_model_id,
+    subfolder=fine_tuned_subfolder,
+    torch_dtype=torch_dtype,
+    low_cpu_mem_usage=True,
+    use_safetensors=True
+).to(device)
+processor = AutoProcessor.from_pretrained("openai/whisper-small")
+asr_pipeline = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=30,
+    batch_size=16,
+    return_timestamps=True,
+    torch_dtype=torch_dtype,
+    device=device
+)
+def transcribe_audio(audio_path):
+    if audio_path is None:
+        return "No audio found."
+    print(f"Transcribing: {audio_path}")
+    result = asr_pipeline(audio_path, generate_kwargs={"language": "en", "task": "transcribe"})
+    return result['text']
+demo = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
+    outputs="text",
+    title="Medical Whisper ASR (CPU Mode)",
+    description="Running on CPU. Processing might take a few seconds."
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

+torch
+transformers
+accelerate
+librosa
+scipy