What is AI Music Generation?
AI music generation uses machine learning models to create original music from text descriptions, continue melodies, or generate accompaniments. Modern models can produce high-quality, royalty-free music in various genres.
Key Capabilities:
- Text-to-Music: Generate music from descriptions
- Melody Continuation: Complete musical ideas
- Style Transfer: Convert music between genres
- Accompaniment: Generate harmony and rhythm
š¹ MusicGen by Meta
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import torch
# Load MusicGen model
# Available sizes: small (300M), medium (1.5B), large (3.3B)
model = MusicGen.get_pretrained('facebook/musicgen-medium')
print("MusicGen Model Loaded!")
print(f"Model size: medium (1.5B parameters)")
print(f"Sample rate: {model.sample_rate} Hz")
print(f"Max duration: 30 seconds per generation")
# Configure generation parameters
model.set_generation_params(
duration=10, # seconds
temperature=1.0, # creativity (0.1-1.5)
top_k=250, # sampling diversity
top_p=0.0, # nucleus sampling
cfg_coef=3.0, # classifier-free guidance strength
)
print("\nGeneration parameters configured!")
š¼ Text-to-Music Generation
def generate_music(prompt, duration=10, output_name="generated_music"):
"""
Generate music from text description
Args:
prompt: Text description of desired music
duration: Length in seconds
output_name: Output filename
"""
model.set_generation_params(duration=duration)
print(f"Generating: {prompt}")
print(f"Duration: {duration} seconds")
# Generate music
wav = model.generate([prompt])
# Save audio file
for idx, one_wav in enumerate(wav):
audio_write(
f'{output_name}_{idx}',
one_wav.cpu(),
model.sample_rate,
strategy="loudness",
loudness_compressor=True
)
print(f"Saved to: {output_name}_0.wav")
return wav
# Example generations
examples = [
"upbeat electronic dance music with energetic drums and synth melodies",
"calm acoustic guitar with soft piano, peaceful and relaxing",
"epic orchestral soundtrack with dramatic strings and powerful brass",
"lo-fi hip hop beat with mellow jazz piano and vinyl crackle",
"80s synthwave with retro drums and nostalgic synth pads",
]
# Generate from first prompt
music = generate_music(
examples[0],
duration=15,
output_name="edm_track"
)
print("\nā Music generated successfully!")
print("\nTry these other prompts:")
for i, prompt in enumerate(examples[1:], 1):
print(f" {i}. {prompt}")
šø Melody Conditioning
import torchaudio
# Load melody-conditioned model
melody_model = MusicGen.get_pretrained('facebook/musicgen-melody')
def generate_with_melody(prompt, melody_path, duration=10):
"""
Generate music that follows a melody
Args:
prompt: Text description for style
melody_path: Path to melody audio file
duration: Output duration
"""
# Load melody audio
melody, sr = torchaudio.load(melody_path)
# Resample if needed
if sr != melody_model.sample_rate:
resampler = torchaudio.transforms.Resample(sr, melody_model.sample_rate)
melody = resampler(melody)
# Generate conditioned on melody
melody_model.set_generation_params(duration=duration)
wav = melody_model.generate_with_chroma(
descriptions=[prompt],
melody_wavs=melody[None],
melody_sample_rate=melody_model.sample_rate,
progress=True
)
# Save output
audio_write(
'melody_conditioned',
wav[0].cpu(),
melody_model.sample_rate,
strategy="loudness"
)
print("Generated music following melody structure!")
return wav
# Example: Add accompaniment to melody
result = generate_with_melody(
prompt="classical piano with rich harmonies and gentle strings",
melody_path="simple_melody.wav",
duration=12
)
print("Melody conditioning allows you to guide musical structure!")
šļø Batch Generation
def generate_variations(base_prompt, num_variations=5, duration=10):
"""Generate multiple variations of a musical idea"""
prompts = [base_prompt] * num_variations
# Generate all variations in batch
wav = model.generate(prompts)
# Save each variation
for idx, one_wav in enumerate(wav):
audio_write(
f'variation_{idx+1}',
one_wav.cpu(),
model.sample_rate,
strategy="loudness"
)
print(f"Saved variation {idx+1}")
print(f"\nā Generated {num_variations} variations!")
return wav
# Generate variations
variations = generate_variations(
base_prompt="jazzy lounge music with smooth saxophone and gentle piano",
num_variations=3,
duration=8
)
def generate_music_library(prompts, output_dir="music_library"):
"""Generate a library of music tracks"""
import os
os.makedirs(output_dir, exist_ok=True)
results = []
for i, prompt in enumerate(prompts):
print(f"\n[{i+1}/{len(prompts)}] Generating: {prompt[:50]}...")
# Generate
wav = model.generate([prompt])
# Create filename from prompt
filename = f"track_{i+1:02d}"
filepath = os.path.join(output_dir, filename)
# Save
audio_write(
filepath,
wav[0].cpu(),
model.sample_rate,
strategy="loudness"
)
results.append({
'track_number': i + 1,
'prompt': prompt,
'filename': f"{filename}.wav",
'duration': model.generation_params['duration']
})
# Save metadata
import json
with open(os.path.join(output_dir, 'library_metadata.json'), 'w') as f:
json.dump(results, f, indent=2)
print(f"\nā Generated {len(prompts)} tracks in '{output_dir}/'")
return results
# Create music library
library_prompts = [
"upbeat pop with catchy melody and energetic drums",
"dark ambient with atmospheric pads and deep bass",
"cheerful ukulele with whistling and hand claps",
"epic trailer music with dramatic percussion and horns",
]
library = generate_music_library(library_prompts)
print(f"\nMusic library ready with {len(library)} tracks!")
šØ Style Transfer
def style_transfer_music(input_audio, target_style, duration=10):
"""
Transfer musical style while preserving structure
Uses melody conditioning with style prompt
"""
# Load input audio
audio, sr = torchaudio.load(input_audio)
# Resample if needed
if sr != melody_model.sample_rate:
resampler = torchaudio.transforms.Resample(sr, melody_model.sample_rate)
audio = resampler(audio)
# Generate with target style
wav = melody_model.generate_with_chroma(
descriptions=[target_style],
melody_wavs=audio[None],
melody_sample_rate=melody_model.sample_rate,
progress=True
)
# Save result
output_name = f"style_transfer_{target_style.split()[0]}"
audio_write(
output_name,
wav[0].cpu(),
melody_model.sample_rate,
strategy="loudness"
)
print(f"Style transferred to: {target_style}")
return wav
# Example: Convert to different styles
original_song = "pop_song.wav"
styles = [
"classical orchestra with strings and piano",
"heavy metal with distorted guitars and drums",
"reggae with offbeat rhythm and bass",
"baroque harpsichord with counterpoint",
]
for style in styles:
print(f"\nTransferring to: {style}")
style_transfer_music(original_song, style, duration=12)
print("\nā Created multiple style variations!")
šµ Music Theory Integration
import pretty_midi
import numpy as np
class MusicTheoryHelper:
"""Helper for music theory-based generation"""
SCALES = {
'major': [0, 2, 4, 5, 7, 9, 11],
'minor': [0, 2, 3, 5, 7, 8, 10],
'pentatonic': [0, 2, 4, 7, 9],
'blues': [0, 3, 5, 6, 7, 10],
'chromatic': list(range(12)),
}
CHORD_PROGRESSIONS = {
'pop': ['I', 'V', 'vi', 'IV'], # Very common
'jazz': ['ii', 'V', 'I', 'VI'],
'blues': ['I', 'I', 'I', 'I', 'IV', 'IV', 'I', 'I', 'V', 'IV', 'I', 'V'],
'sad': ['vi', 'IV', 'I', 'V'],
}
@staticmethod
def create_midi_melody(notes, durations, tempo=120, output_file='melody.mid'):
"""Create MIDI file from notes"""
midi = pretty_midi.PrettyMIDI(initial_tempo=tempo)
instrument = pretty_midi.Instrument(program=0) # Piano
time = 0
for note, duration in zip(notes, durations):
midi_note = pretty_midi.Note(
velocity=100,
pitch=note,
start=time,
end=time + duration
)
instrument.notes.append(midi_note)
time += duration
midi.instruments.append(instrument)
midi.write(output_file)
print(f"Created MIDI: {output_file}")
return output_file
@staticmethod
def generate_prompt_with_theory(key='C', scale='major',
tempo='moderate', mood='happy',
instruments=None):
"""Generate detailed prompt using music theory"""
if instruments is None:
instruments = ['piano', 'strings']
tempo_desc = {
'slow': 'slow tempo, relaxed',
'moderate': 'moderate tempo',
'fast': 'fast tempo, energetic',
}
mood_desc = {
'happy': 'bright, cheerful, uplifting',
'sad': 'melancholic, emotional, gentle',
'energetic': 'powerful, driving, intense',
'calm': 'peaceful, serene, soothing',
}
prompt = f"{key} {scale} scale, {tempo_desc[tempo]}, "
prompt += f"{mood_desc[mood]}, "
prompt += f"featuring {', '.join(instruments)}"
return prompt
# Use music theory for generation
theory_helper = MusicTheoryHelper()
# Generate with music theory constraints
theory_prompt = theory_helper.generate_prompt_with_theory(
key='D',
scale='minor',
tempo='slow',
mood='sad',
instruments=['piano', 'cello', 'violin']
)
print(f"Theory-based prompt: {theory_prompt}")
music = generate_music(theory_prompt, duration=15, output_name="theory_music")
# Create and use MIDI melody
notes = [60, 62, 64, 65, 67, 69, 71, 72] # C major scale
durations = [0.5] * 8
midi_file = theory_helper.create_midi_melody(notes, durations)
print("\nMusic theory helps create more intentional compositions!")
š§ Advanced Applications
class MusicGenerationApp:
"""Production-ready music generation application"""
def __init__(self, model_size='medium'):
self.model = MusicGen.get_pretrained(f'facebook/musicgen-{model_size}')
self.generated_tracks = []
def generate_for_video(self, video_description, duration, mood='neutral'):
"""Generate background music for video content"""
mood_modifiers = {
'neutral': '',
'upbeat': 'energetic, uplifting, positive',
'dramatic': 'epic, intense, powerful',
'calm': 'peaceful, ambient, relaxing',
'suspenseful': 'mysterious, tense, dark',
}
prompt = f"{video_description}, {mood_modifiers[mood]}"
self.model.set_generation_params(duration=duration)
wav = self.model.generate([prompt])
filename = f"video_bg_{mood}_{len(self.generated_tracks)}"
audio_write(filename, wav[0].cpu(), self.model.sample_rate)
self.generated_tracks.append({
'filename': f"{filename}.wav",
'prompt': prompt,
'duration': duration,
'mood': mood
})
return filename
def generate_loop(self, prompt, loop_duration=8, num_loops=4):
"""Generate seamless looping music"""
# Generate base loop
self.model.set_generation_params(duration=loop_duration)
wav = self.model.generate([prompt + ", seamless loop"])
# Repeat for desired length
looped = wav.repeat(1, 1, num_loops)
filename = "looped_music"
audio_write(filename, looped[0].cpu(), self.model.sample_rate)
print(f"Created {num_loops} loops of {loop_duration}s")
return filename
def generate_with_fadeout(self, prompt, duration=20, fadeout_duration=5):
"""Generate music with smooth fadeout"""
self.model.set_generation_params(duration=duration)
wav = self.model.generate([prompt])
# Apply fadeout
fadeout_samples = int(fadeout_duration * self.model.sample_rate)
fade_curve = torch.linspace(1, 0, fadeout_samples)
wav[0, :, -fadeout_samples:] *= fade_curve
filename = "music_with_fadeout"
audio_write(filename, wav[0].cpu(), self.model.sample_rate)
return filename
def export_metadata(self, output_file='music_metadata.json'):
"""Export generation metadata"""
import json
with open(output_file, 'w') as f:
json.dump(self.generated_tracks, f, indent=2)
print(f"Metadata saved to {output_file}")
# Example usage
app = MusicGenerationApp(model_size='medium')
# Generate for different video scenarios
video_tracks = [
("tech product showcase", 30, "upbeat"),
("nature documentary", 45, "calm"),
("action movie trailer", 30, "dramatic"),
]
for description, duration, mood in video_tracks:
track = app.generate_for_video(description, duration, mood)
print(f"ā Generated: {track}")
# Generate loop for game
app.generate_loop(
"medieval fantasy tavern music with lute and flute",
loop_duration=10,
num_loops=6
)
# Export metadata
app.export_metadata()
print("\nā Music generation application ready!")
š¼ Model Comparison
| Model | Parameters | Quality | Speed | Best For |
|---|---|---|---|---|
| MusicGen Small | 300M | Good | Fast | Quick iterations |
| MusicGen Medium | 1.5B | Very Good | Moderate | Balanced |
| MusicGen Large | 3.3B | Excellent | Slow | Production quality |
| MusicGen Melody | 1.5B | Very Good | Moderate | Melody conditioning |
šÆ Key Takeaways
- MusicGen generates high-quality, royalty-free music
- Text-to-music enables creation from descriptions
- Melody conditioning guides musical structure
- Batch generation creates variations efficiently
- Style transfer reimagines music in different genres
- Production ready for games, videos, apps
- Customizable through temperature and guidance
- Ethical use respect copyright and attribution