|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoProcessor, MusicgenForConditionalGeneration |
|
|
import os |
|
|
from pathlib import Path |
|
|
import time |
|
|
import tempfile |
|
|
import numpy as np |
|
|
from scipy.io.wavfile import write |
|
|
|
|
|
|
|
|
custom_theme = gr.themes.Soft( |
|
|
primary_hue="purple", |
|
|
secondary_hue="indigo", |
|
|
neutral_hue="slate", |
|
|
font=gr.themes.GoogleFont("Inter"), |
|
|
text_size="lg", |
|
|
spacing_size="lg", |
|
|
radius_size="md" |
|
|
).set( |
|
|
button_primary_background_fill="*primary_600", |
|
|
button_primary_background_fill_hover="*primary_700", |
|
|
block_title_text_weight="600", |
|
|
) |
|
|
|
|
|
|
|
|
MODEL_NAME = "facebook/musicgen-small" |
|
|
MODEL_CACHE_DIR = Path.home() / ".cache" / "huggingface" / "musicgen" |
|
|
MAX_NEW_TOKENS = 500 |
|
|
AUDIO_DURATION = 240 |
|
|
|
|
|
|
|
|
def load_model(): |
|
|
"""Load the MusicGen model with caching and optimization""" |
|
|
if not os.path.exists(MODEL_CACHE_DIR): |
|
|
os.makedirs(MODEL_CACHE_DIR, exist_ok=True) |
|
|
|
|
|
print("Loading MusicGen model...") |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
processor = AutoProcessor.from_pretrained( |
|
|
MODEL_NAME, |
|
|
cache_dir=MODEL_CACHE_DIR |
|
|
) |
|
|
|
|
|
|
|
|
model = MusicgenForConditionalGeneration.from_pretrained( |
|
|
MODEL_NAME, |
|
|
cache_dir=MODEL_CACHE_DIR, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" if torch.cuda.is_available() else None |
|
|
) |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
model = model.to("cuda") |
|
|
model.eval() |
|
|
|
|
|
load_time = time.time() - start_time |
|
|
print(f"Model loaded in {load_time:.2f} seconds") |
|
|
return model, processor |
|
|
|
|
|
|
|
|
model, processor = load_model() |
|
|
|
|
|
def generate_music(prompt, duration, temperature, top_k): |
|
|
""" |
|
|
Generate music from text prompt using MusicGen model |
|
|
|
|
|
Args: |
|
|
prompt: Text description of the music |
|
|
duration: Duration in seconds (5-240) |
|
|
temperature: Creativity parameter |
|
|
top_k: Sampling parameter |
|
|
|
|
|
Returns: |
|
|
Generated audio file path |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
tokens_per_second = 50 |
|
|
max_new_tokens = int(duration * tokens_per_second) |
|
|
|
|
|
|
|
|
inputs = processor( |
|
|
text=[prompt], |
|
|
padding=True, |
|
|
return_tensors="pt" |
|
|
).to(model.device) |
|
|
|
|
|
|
|
|
audio_values = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
do_sample=True, |
|
|
temperature=temperature, |
|
|
top_k=top_k, |
|
|
use_cache=True |
|
|
) |
|
|
|
|
|
|
|
|
sampling_rate = processor.feature_extractor.sampling_rate |
|
|
|
|
|
|
|
|
audio_data = audio_values[0, 0].cpu().numpy() |
|
|
|
|
|
|
|
|
if len(audio_data.shape) == 1: |
|
|
audio_data = np.stack([audio_data, audio_data], axis=0) |
|
|
elif audio_data.shape[0] == 1: |
|
|
audio_data = np.concatenate([audio_data, audio_data], axis=0) |
|
|
|
|
|
|
|
|
audio_data = audio_data / np.max(np.abs(audio_data)) * 0.9 |
|
|
audio_data = (audio_data * 32767).astype(np.int16) |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
|
write(temp_file.name, sampling_rate, audio_data.T) |
|
|
return temp_file.name |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error generating music: {e}") |
|
|
raise gr.Error(f"Failed to generate music: {str(e)}") |
|
|
|
|
|
def music_maker_interface(prompt, duration, temperature, top_k): |
|
|
""" |
|
|
Main interface function for music generation |
|
|
""" |
|
|
if not prompt.strip(): |
|
|
raise gr.Error("Please enter a music description") |
|
|
|
|
|
if duration < 5 or duration > 240: |
|
|
raise gr.Error("Duration must be between 5 and 240 seconds (4 minutes)") |
|
|
|
|
|
|
|
|
progress = gr.Progress() |
|
|
for i in progress.tqdm(range(10), desc=f"Generating {duration} second music..."): |
|
|
time.sleep(0.2) |
|
|
|
|
|
|
|
|
audio_file = generate_music(prompt, duration, temperature, top_k) |
|
|
|
|
|
return audio_file |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown(""" |
|
|
# π΅ AI Music Maker - Extended Edition |
|
|
|
|
|
Create original music from text descriptions using AI! Now with support for songs up to 4 minutes long. |
|
|
|
|
|
[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder) |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
|
|
|
prompt = gr.Textbox( |
|
|
label="Music Description", |
|
|
placeholder="e.g., 'Happy electronic dance music with catchy beats'", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
duration = gr.Slider( |
|
|
minimum=5, |
|
|
maximum=240, |
|
|
value=30, |
|
|
step=5, |
|
|
label="Duration (seconds) - Up to 4 minutes!" |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
temperature = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Creativity (Temperature)" |
|
|
) |
|
|
|
|
|
top_k = gr.Slider( |
|
|
minimum=10, |
|
|
maximum=100, |
|
|
value=50, |
|
|
step=10, |
|
|
label="Sampling Diversity (Top K)" |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button("π΅ Generate Music", variant="primary", size="lg") |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Happy electronic dance music with catchy beats and uplifting melodies"], |
|
|
["Calm piano music for meditation and relaxation"], |
|
|
["Epic orchestral soundtrack with dramatic strings and powerful brass"], |
|
|
["Jazz improvisation with saxophone and piano"], |
|
|
["Rock guitar solo with heavy distortion and fast tempo"] |
|
|
], |
|
|
inputs=[prompt], |
|
|
label="Try these examples:" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
|
|
|
audio_output = gr.Audio( |
|
|
label="Generated Music", |
|
|
type="filepath", |
|
|
interactive=False, |
|
|
autoplay=True |
|
|
) |
|
|
|
|
|
|
|
|
status = gr.Markdown("Enter a description and click 'Generate Music' to create your track!") |
|
|
model_info = gr.Markdown(f""" |
|
|
### Model Info |
|
|
- **Model**: MusicGen Small |
|
|
- **Cache Location**: `{MODEL_CACHE_DIR}` |
|
|
- **Device**: {'CUDA' if torch.cuda.is_available() else 'CPU'} |
|
|
- **Max Duration**: {AUDIO_DURATION}s (4 minutes) |
|
|
- **Generation Speed**: Optimized for performance |
|
|
""") |
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=music_maker_interface, |
|
|
inputs=[prompt, duration, temperature, top_k], |
|
|
outputs=[audio_output], |
|
|
api_visibility="public" |
|
|
) |
|
|
|
|
|
|
|
|
prompt.change( |
|
|
fn=lambda p: f"Ready to generate music from: '{p}'", |
|
|
inputs=[prompt], |
|
|
outputs=[status] |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch( |
|
|
theme=custom_theme, |
|
|
footer_links=[ |
|
|
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, |
|
|
{"label": "MusicGen Model", "url": "https://huggingface.co/facebook/musicgen-small"}, |
|
|
{"label": "Gradio", "url": "https://gradio.app"} |
|
|
], |
|
|
show_error=True, |
|
|
share=True |
|
|
) |