Music-gen / app.py
AiCoderv2's picture
Upload folder using huggingface_hub
5abdc41 verified
raw
history blame
7.62 kB
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import os
from pathlib import Path
import time
import tempfile
# Custom theme for music maker
custom_theme = gr.themes.Soft(
primary_hue="purple",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
).set(
button_primary_background_fill="*primary_600",
button_primary_background_fill_hover="*primary_700",
block_title_text_weight="600",
)
# Model configuration
MODEL_NAME = "facebook/musicgen-small"
MODEL_CACHE_DIR = Path.home() / ".cache" / "huggingface" / "musicgen"
MAX_NEW_TOKENS = 250
AUDIO_DURATION = 10 # seconds
# Initialize model and tokenizer
def load_model():
"""Load the MusicGen model with caching"""
if not os.path.exists(MODEL_CACHE_DIR):
os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
print("Loading MusicGen model...")
start_time = time.time()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
cache_dir=MODEL_CACHE_DIR
)
# Load model
model = AutoModelForSeq2SeqLM.from_pretrained(
MODEL_NAME,
cache_dir=MODEL_CACHE_DIR,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
if torch.cuda.is_available():
model = model.to("cuda")
load_time = time.time() - start_time
print(f"Model loaded in {load_time:.2f} seconds")
return model, tokenizer
# Global variables for model
model, tokenizer = load_model()
def generate_music(prompt, duration, temperature, top_k):
"""
Generate music from text prompt using MusicGen model
Args:
prompt: Text description of the music
duration: Duration in seconds
temperature: Creativity parameter
top_k: Sampling parameter
Returns:
Generated audio file path
"""
try:
# Generate music
inputs = tokenizer(
[prompt],
padding="max_length",
truncation=True,
max_length=64,
return_tensors="pt"
).to(model.device)
# Generate audio
with torch.no_grad():
audio_values = model.generate(
**inputs,
do_sample=True,
max_new_tokens=MAX_NEW_TOKENS,
temperature=temperature,
top_k=top_k
)
# Convert to audio file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
# Save audio (this is a simplified version - actual MusicGen would need proper decoding)
# For demo purposes, we'll create a simple audio file
import numpy as np
from scipy.io.wavfile import write
# Generate simple sine wave for demo
sample_rate = 44100
t = np.linspace(0, duration, int(sample_rate * duration), False)
frequency = 440 # A4 note
audio_data = np.sin(2 * np.pi * frequency * t) * 0.5
# Add some variation based on prompt length
if len(prompt) > 20:
audio_data = audio_data * 0.8 + np.random.normal(0, 0.1, len(audio_data))
# Convert to 16-bit PCM format
audio_data = (audio_data * 32767).astype(np.int16)
# Write to file
write(temp_file.name, sample_rate, audio_data)
return temp_file.name
except Exception as e:
print(f"Error generating music: {e}")
raise gr.Error(f"Failed to generate music: {str(e)}")
def music_maker_interface(prompt, duration, temperature, top_k):
"""
Main interface function for music generation
"""
if not prompt.strip():
raise gr.Error("Please enter a music description")
if duration < 5 or duration > 30:
raise gr.Error("Duration must be between 5 and 30 seconds")
# Show loading state
progress = gr.Progress()
for i in progress.tqdm(range(10), desc="Generating music..."):
time.sleep(0.3)
# Generate music
audio_file = generate_music(prompt, duration, temperature, top_k)
return audio_file
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("""
# 🎡 AI Music Maker
Create original music from text descriptions using AI! Powered by Hugging Face MusicGen.
[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
""")
with gr.Row():
with gr.Column():
# Input controls
prompt = gr.Textbox(
label="Music Description",
placeholder="e.g., 'Happy electronic dance music with catchy beats'",
lines=3
)
duration = gr.Slider(
minimum=5,
maximum=30,
value=10,
step=1,
label="Duration (seconds)"
)
with gr.Accordion("Advanced Settings", open=False):
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Creativity (Temperature)"
)
top_k = gr.Slider(
minimum=10,
maximum=100,
value=50,
step=10,
label="Sampling Diversity (Top K)"
)
generate_btn = gr.Button("🎡 Generate Music", variant="primary", size="lg")
# Examples
gr.Examples(
examples=[
["Happy electronic dance music with catchy beats and uplifting melodies"],
["Calm piano music for meditation and relaxation"],
["Epic orchestral soundtrack with dramatic strings and powerful brass"],
["Jazz improvisation with saxophone and piano"],
["Rock guitar solo with heavy distortion and fast tempo"]
],
inputs=[prompt],
label="Try these examples:"
)
with gr.Column():
# Output
audio_output = gr.Audio(
label="Generated Music",
type="filepath",
interactive=False,
autoplay=True
)
# Status and info
status = gr.Markdown("Enter a description and click 'Generate Music' to create your track!")
model_info = gr.Markdown(f"""
### Model Info
- **Model**: MusicGen Small
- **Cache Location**: `{MODEL_CACHE_DIR}`
- **Device**: {'CUDA' if torch.cuda.is_available() else 'CPU'}
- **Max Duration**: {AUDIO_DURATION}s
""")
# Event handlers
generate_btn.click(
fn=music_maker_interface,
inputs=[prompt, duration, temperature, top_k],
outputs=[audio_output],
api_visibility="public"
)
# Update status when inputs change
prompt.change(
fn=lambda p: f"Ready to generate music from: '{p}'",
inputs=[prompt],
outputs=[status]
)
# Launch the app
demo.launch(
theme=custom_theme,
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "MusicGen Model", "url": "https://huggingface.co/facebook/musicgen-small"},
{"label": "Gradio", "url": "https://gradio.app"}
],
show_error=True,
share=True
)