rohitptnk commited on
Commit
c5e3ece
·
1 Parent(s): beb715c

Refactor: Move translation and TTS code from notebook to separate scripts

Browse files
Voice2VoiceTranslation.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
__pycache__/my_translate.cpython-311.pyc ADDED
Binary file (1.65 kB). View file
 
__pycache__/my_tts.cpython-311.pyc ADDED
Binary file (1.86 kB). View file
 
transcribe.py → my_transcribe.py RENAMED
File without changes
my_translate.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argostranslate.package
2
+ import argostranslate.translate
3
+
4
+ def translate_text(text, from_lang="en", to_lang="hi"):
5
+ """
6
+ Translate text using Argos Translate
7
+
8
+ Args:
9
+ text (str): Text to translate
10
+ from_lang (str): Source language code (default: "en")
11
+ to_lang (str): Target language code (default: "hi")
12
+
13
+ Returns:
14
+ str: Translated text
15
+ """
16
+ # Download language packs (e.g., English to Hindi)
17
+ argostranslate.package.update_package_index()
18
+ available_packages = argostranslate.package.get_available_packages()
19
+ package = next(filter(lambda x: x.from_code == from_lang and x.to_code == to_lang, available_packages))
20
+ argostranslate.package.install_from_path(package.download())
21
+
22
+ translated_text = argostranslate.translate.translate(text, from_lang, to_lang)
23
+ # hindi_translation = argostranslate.translate.translate(text, "en", "hi")
24
+
25
+
26
+ return translated_text
my_tts.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BarkModel, AutoProcessor
2
+ import torch
3
+
4
+ def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
5
+ """
6
+ Convert text to speech using Bark model
7
+
8
+ Args:
9
+ text (str): Text to convert to speech
10
+ voice_preset (str): Voice preset to use for the speech synthesis
11
+
12
+ Returns:
13
+ torch.Tensor: Generated speech audio
14
+ sampling_rate (int): Sampling rate of the generated audio
15
+ """
16
+ # Check if CUDA is available and set device accordingly
17
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
18
+
19
+ # Load the model and processor
20
+ model = BarkModel.from_pretrained("suno/bark-small")
21
+ processor = AutoProcessor.from_pretrained("suno/bark")
22
+
23
+ # Move model and inputs to the appropriate device
24
+ model = model.to(device)
25
+ inputs = processor(text=text, voice_preset=voice_preset)
26
+ for key, value in inputs.items():
27
+ inputs[key] = value.to(device)
28
+
29
+ # prepare the inputs
30
+ inputs = processor(text, voice_preset=voice_preset)
31
+ for key, value in inputs.items():
32
+ inputs[key] = inputs[key].to(device)
33
+
34
+ # generate speech
35
+ speech_output = model.generate(**inputs)
36
+ sampling_rate = model.generation_config.sample_rate
37
+
38
+ return speech_output, sampling_rate