Spaces:
Running
Running
Refactor: Move translation and TTS code from notebook to separate scripts
Browse files- Voice2VoiceTranslation.ipynb +0 -0
- __pycache__/my_translate.cpython-311.pyc +0 -0
- __pycache__/my_tts.cpython-311.pyc +0 -0
- transcribe.py → my_transcribe.py +0 -0
- my_translate.py +26 -0
- my_tts.py +38 -0
Voice2VoiceTranslation.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
__pycache__/my_translate.cpython-311.pyc
ADDED
|
Binary file (1.65 kB). View file
|
|
|
__pycache__/my_tts.cpython-311.pyc
ADDED
|
Binary file (1.86 kB). View file
|
|
|
transcribe.py → my_transcribe.py
RENAMED
|
File without changes
|
my_translate.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argostranslate.package
|
| 2 |
+
import argostranslate.translate
|
| 3 |
+
|
| 4 |
+
def translate_text(text, from_lang="en", to_lang="hi"):
|
| 5 |
+
"""
|
| 6 |
+
Translate text using Argos Translate
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
text (str): Text to translate
|
| 10 |
+
from_lang (str): Source language code (default: "en")
|
| 11 |
+
to_lang (str): Target language code (default: "hi")
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
str: Translated text
|
| 15 |
+
"""
|
| 16 |
+
# Download language packs (e.g., English to Hindi)
|
| 17 |
+
argostranslate.package.update_package_index()
|
| 18 |
+
available_packages = argostranslate.package.get_available_packages()
|
| 19 |
+
package = next(filter(lambda x: x.from_code == from_lang and x.to_code == to_lang, available_packages))
|
| 20 |
+
argostranslate.package.install_from_path(package.download())
|
| 21 |
+
|
| 22 |
+
translated_text = argostranslate.translate.translate(text, from_lang, to_lang)
|
| 23 |
+
# hindi_translation = argostranslate.translate.translate(text, "en", "hi")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
return translated_text
|
my_tts.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import BarkModel, AutoProcessor
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
|
| 5 |
+
"""
|
| 6 |
+
Convert text to speech using Bark model
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
text (str): Text to convert to speech
|
| 10 |
+
voice_preset (str): Voice preset to use for the speech synthesis
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
torch.Tensor: Generated speech audio
|
| 14 |
+
sampling_rate (int): Sampling rate of the generated audio
|
| 15 |
+
"""
|
| 16 |
+
# Check if CUDA is available and set device accordingly
|
| 17 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 18 |
+
|
| 19 |
+
# Load the model and processor
|
| 20 |
+
model = BarkModel.from_pretrained("suno/bark-small")
|
| 21 |
+
processor = AutoProcessor.from_pretrained("suno/bark")
|
| 22 |
+
|
| 23 |
+
# Move model and inputs to the appropriate device
|
| 24 |
+
model = model.to(device)
|
| 25 |
+
inputs = processor(text=text, voice_preset=voice_preset)
|
| 26 |
+
for key, value in inputs.items():
|
| 27 |
+
inputs[key] = value.to(device)
|
| 28 |
+
|
| 29 |
+
# prepare the inputs
|
| 30 |
+
inputs = processor(text, voice_preset=voice_preset)
|
| 31 |
+
for key, value in inputs.items():
|
| 32 |
+
inputs[key] = inputs[key].to(device)
|
| 33 |
+
|
| 34 |
+
# generate speech
|
| 35 |
+
speech_output = model.generate(**inputs)
|
| 36 |
+
sampling_rate = model.generation_config.sample_rate
|
| 37 |
+
|
| 38 |
+
return speech_output, sampling_rate
|