HaoVuong commited on
Commit
31e8df9
·
1 Parent(s): 1889f8a

Add app and requirements

Browse files
Files changed (2) hide show
  1. app.py +53 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+
5
+ device = "cpu"
6
+ torch_dtype = torch.float32
7
+
8
+ fine_tuned_model_id = "leduckhai/MultiMed-ST"
9
+ fine_tuned_subfolder = "asr/whisper-small-english/checkpoint"
10
+
11
+ print("Loading model on CPU... this may take a moment.")
12
+
13
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
14
+ fine_tuned_model_id,
15
+ subfolder=fine_tuned_subfolder,
16
+ torch_dtype=torch_dtype,
17
+ low_cpu_mem_usage=True,
18
+ use_safetensors=True
19
+ ).to(device)
20
+
21
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
22
+
23
+ asr_pipeline = pipeline(
24
+ "automatic-speech-recognition",
25
+ model=model,
26
+ tokenizer=processor.tokenizer,
27
+ feature_extractor=processor.feature_extractor,
28
+ max_new_tokens=128,
29
+ chunk_length_s=30,
30
+ batch_size=16,
31
+ return_timestamps=True,
32
+ torch_dtype=torch_dtype,
33
+ device=device
34
+ )
35
+
36
+ def transcribe_audio(audio_path):
37
+ if audio_path is None:
38
+ return "No audio found."
39
+
40
+ print(f"Transcribing: {audio_path}")
41
+ result = asr_pipeline(audio_path, generate_kwargs={"language": "en", "task": "transcribe"})
42
+ return result['text']
43
+
44
+ demo = gr.Interface(
45
+ fn=transcribe_audio,
46
+ inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
47
+ outputs="text",
48
+ title="Medical Whisper ASR (CPU Mode)",
49
+ description="Running on CPU. Processing might take a few seconds."
50
+ )
51
+
52
+ if __name__ == "__main__":
53
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ accelerate
4
+ librosa
5
+ scipy