šŸŽµ AI Music Generation

Creating music with artificial intelligence

What is AI Music Generation?

AI music generation uses machine learning models to create original music from text descriptions, continue melodies, or generate accompaniments. Modern models can produce high-quality, royalty-free music in various genres.

Key Capabilities:

  • Text-to-Music: Generate music from descriptions
  • Melody Continuation: Complete musical ideas
  • Style Transfer: Convert music between genres
  • Accompaniment: Generate harmony and rhythm

šŸŽ¹ MusicGen by Meta

from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import torch

# Load MusicGen model
# Available sizes: small (300M), medium (1.5B), large (3.3B)
model = MusicGen.get_pretrained('facebook/musicgen-medium')

print("MusicGen Model Loaded!")
print(f"Model size: medium (1.5B parameters)")
print(f"Sample rate: {model.sample_rate} Hz")
print(f"Max duration: 30 seconds per generation")

# Configure generation parameters
model.set_generation_params(
    duration=10,  # seconds
    temperature=1.0,  # creativity (0.1-1.5)
    top_k=250,  # sampling diversity
    top_p=0.0,  # nucleus sampling
    cfg_coef=3.0,  # classifier-free guidance strength
)

print("\nGeneration parameters configured!")

šŸŽ¼ Text-to-Music Generation

def generate_music(prompt, duration=10, output_name="generated_music"):
    """
    Generate music from text description
    
    Args:
        prompt: Text description of desired music
        duration: Length in seconds
        output_name: Output filename
    """
    
    model.set_generation_params(duration=duration)
    
    print(f"Generating: {prompt}")
    print(f"Duration: {duration} seconds")
    
    # Generate music
    wav = model.generate([prompt])
    
    # Save audio file
    for idx, one_wav in enumerate(wav):
        audio_write(
            f'{output_name}_{idx}',
            one_wav.cpu(),
            model.sample_rate,
            strategy="loudness",
            loudness_compressor=True
        )
    
    print(f"Saved to: {output_name}_0.wav")
    return wav

# Example generations
examples = [
    "upbeat electronic dance music with energetic drums and synth melodies",
    "calm acoustic guitar with soft piano, peaceful and relaxing",
    "epic orchestral soundtrack with dramatic strings and powerful brass",
    "lo-fi hip hop beat with mellow jazz piano and vinyl crackle",
    "80s synthwave with retro drums and nostalgic synth pads",
]

# Generate from first prompt
music = generate_music(
    examples[0],
    duration=15,
    output_name="edm_track"
)

print("\nāœ“ Music generated successfully!")
print("\nTry these other prompts:")
for i, prompt in enumerate(examples[1:], 1):
    print(f"  {i}. {prompt}")

šŸŽø Melody Conditioning

import torchaudio

# Load melody-conditioned model
melody_model = MusicGen.get_pretrained('facebook/musicgen-melody')

def generate_with_melody(prompt, melody_path, duration=10):
    """
    Generate music that follows a melody
    
    Args:
        prompt: Text description for style
        melody_path: Path to melody audio file
        duration: Output duration
    """
    
    # Load melody audio
    melody, sr = torchaudio.load(melody_path)
    
    # Resample if needed
    if sr != melody_model.sample_rate:
        resampler = torchaudio.transforms.Resample(sr, melody_model.sample_rate)
        melody = resampler(melody)
    
    # Generate conditioned on melody
    melody_model.set_generation_params(duration=duration)
    
    wav = melody_model.generate_with_chroma(
        descriptions=[prompt],
        melody_wavs=melody[None],
        melody_sample_rate=melody_model.sample_rate,
        progress=True
    )
    
    # Save output
    audio_write(
        'melody_conditioned',
        wav[0].cpu(),
        melody_model.sample_rate,
        strategy="loudness"
    )
    
    print("Generated music following melody structure!")
    return wav

# Example: Add accompaniment to melody
result = generate_with_melody(
    prompt="classical piano with rich harmonies and gentle strings",
    melody_path="simple_melody.wav",
    duration=12
)

print("Melody conditioning allows you to guide musical structure!")

šŸŽ›ļø Batch Generation

def generate_variations(base_prompt, num_variations=5, duration=10):
    """Generate multiple variations of a musical idea"""
    
    prompts = [base_prompt] * num_variations
    
    # Generate all variations in batch
    wav = model.generate(prompts)
    
    # Save each variation
    for idx, one_wav in enumerate(wav):
        audio_write(
            f'variation_{idx+1}',
            one_wav.cpu(),
            model.sample_rate,
            strategy="loudness"
        )
        print(f"Saved variation {idx+1}")
    
    print(f"\nāœ“ Generated {num_variations} variations!")
    return wav

# Generate variations
variations = generate_variations(
    base_prompt="jazzy lounge music with smooth saxophone and gentle piano",
    num_variations=3,
    duration=8
)

def generate_music_library(prompts, output_dir="music_library"):
    """Generate a library of music tracks"""
    
    import os
    os.makedirs(output_dir, exist_ok=True)
    
    results = []
    
    for i, prompt in enumerate(prompts):
        print(f"\n[{i+1}/{len(prompts)}] Generating: {prompt[:50]}...")
        
        # Generate
        wav = model.generate([prompt])
        
        # Create filename from prompt
        filename = f"track_{i+1:02d}"
        filepath = os.path.join(output_dir, filename)
        
        # Save
        audio_write(
            filepath,
            wav[0].cpu(),
            model.sample_rate,
            strategy="loudness"
        )
        
        results.append({
            'track_number': i + 1,
            'prompt': prompt,
            'filename': f"{filename}.wav",
            'duration': model.generation_params['duration']
        })
    
    # Save metadata
    import json
    with open(os.path.join(output_dir, 'library_metadata.json'), 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"\nāœ“ Generated {len(prompts)} tracks in '{output_dir}/'")
    return results

# Create music library
library_prompts = [
    "upbeat pop with catchy melody and energetic drums",
    "dark ambient with atmospheric pads and deep bass",
    "cheerful ukulele with whistling and hand claps",
    "epic trailer music with dramatic percussion and horns",
]

library = generate_music_library(library_prompts)
print(f"\nMusic library ready with {len(library)} tracks!")

šŸŽØ Style Transfer

def style_transfer_music(input_audio, target_style, duration=10):
    """
    Transfer musical style while preserving structure
    
    Uses melody conditioning with style prompt
    """
    
    # Load input audio
    audio, sr = torchaudio.load(input_audio)
    
    # Resample if needed
    if sr != melody_model.sample_rate:
        resampler = torchaudio.transforms.Resample(sr, melody_model.sample_rate)
        audio = resampler(audio)
    
    # Generate with target style
    wav = melody_model.generate_with_chroma(
        descriptions=[target_style],
        melody_wavs=audio[None],
        melody_sample_rate=melody_model.sample_rate,
        progress=True
    )
    
    # Save result
    output_name = f"style_transfer_{target_style.split()[0]}"
    audio_write(
        output_name,
        wav[0].cpu(),
        melody_model.sample_rate,
        strategy="loudness"
    )
    
    print(f"Style transferred to: {target_style}")
    return wav

# Example: Convert to different styles
original_song = "pop_song.wav"

styles = [
    "classical orchestra with strings and piano",
    "heavy metal with distorted guitars and drums",
    "reggae with offbeat rhythm and bass",
    "baroque harpsichord with counterpoint",
]

for style in styles:
    print(f"\nTransferring to: {style}")
    style_transfer_music(original_song, style, duration=12)

print("\nāœ“ Created multiple style variations!")

šŸŽµ Music Theory Integration

import pretty_midi
import numpy as np

class MusicTheoryHelper:
    """Helper for music theory-based generation"""
    
    SCALES = {
        'major': [0, 2, 4, 5, 7, 9, 11],
        'minor': [0, 2, 3, 5, 7, 8, 10],
        'pentatonic': [0, 2, 4, 7, 9],
        'blues': [0, 3, 5, 6, 7, 10],
        'chromatic': list(range(12)),
    }
    
    CHORD_PROGRESSIONS = {
        'pop': ['I', 'V', 'vi', 'IV'],  # Very common
        'jazz': ['ii', 'V', 'I', 'VI'],
        'blues': ['I', 'I', 'I', 'I', 'IV', 'IV', 'I', 'I', 'V', 'IV', 'I', 'V'],
        'sad': ['vi', 'IV', 'I', 'V'],
    }
    
    @staticmethod
    def create_midi_melody(notes, durations, tempo=120, output_file='melody.mid'):
        """Create MIDI file from notes"""
        
        midi = pretty_midi.PrettyMIDI(initial_tempo=tempo)
        instrument = pretty_midi.Instrument(program=0)  # Piano
        
        time = 0
        for note, duration in zip(notes, durations):
            midi_note = pretty_midi.Note(
                velocity=100,
                pitch=note,
                start=time,
                end=time + duration
            )
            instrument.notes.append(midi_note)
            time += duration
        
        midi.instruments.append(instrument)
        midi.write(output_file)
        print(f"Created MIDI: {output_file}")
        
        return output_file
    
    @staticmethod
    def generate_prompt_with_theory(key='C', scale='major', 
                                     tempo='moderate', mood='happy',
                                     instruments=None):
        """Generate detailed prompt using music theory"""
        
        if instruments is None:
            instruments = ['piano', 'strings']
        
        tempo_desc = {
            'slow': 'slow tempo, relaxed',
            'moderate': 'moderate tempo',
            'fast': 'fast tempo, energetic',
        }
        
        mood_desc = {
            'happy': 'bright, cheerful, uplifting',
            'sad': 'melancholic, emotional, gentle',
            'energetic': 'powerful, driving, intense',
            'calm': 'peaceful, serene, soothing',
        }
        
        prompt = f"{key} {scale} scale, {tempo_desc[tempo]}, "
        prompt += f"{mood_desc[mood]}, "
        prompt += f"featuring {', '.join(instruments)}"
        
        return prompt

# Use music theory for generation
theory_helper = MusicTheoryHelper()

# Generate with music theory constraints
theory_prompt = theory_helper.generate_prompt_with_theory(
    key='D',
    scale='minor',
    tempo='slow',
    mood='sad',
    instruments=['piano', 'cello', 'violin']
)

print(f"Theory-based prompt: {theory_prompt}")

music = generate_music(theory_prompt, duration=15, output_name="theory_music")

# Create and use MIDI melody
notes = [60, 62, 64, 65, 67, 69, 71, 72]  # C major scale
durations = [0.5] * 8
midi_file = theory_helper.create_midi_melody(notes, durations)

print("\nMusic theory helps create more intentional compositions!")

šŸŽ§ Advanced Applications

class MusicGenerationApp:
    """Production-ready music generation application"""
    
    def __init__(self, model_size='medium'):
        self.model = MusicGen.get_pretrained(f'facebook/musicgen-{model_size}')
        self.generated_tracks = []
    
    def generate_for_video(self, video_description, duration, mood='neutral'):
        """Generate background music for video content"""
        
        mood_modifiers = {
            'neutral': '',
            'upbeat': 'energetic, uplifting, positive',
            'dramatic': 'epic, intense, powerful',
            'calm': 'peaceful, ambient, relaxing',
            'suspenseful': 'mysterious, tense, dark',
        }
        
        prompt = f"{video_description}, {mood_modifiers[mood]}"
        
        self.model.set_generation_params(duration=duration)
        wav = self.model.generate([prompt])
        
        filename = f"video_bg_{mood}_{len(self.generated_tracks)}"
        audio_write(filename, wav[0].cpu(), self.model.sample_rate)
        
        self.generated_tracks.append({
            'filename': f"{filename}.wav",
            'prompt': prompt,
            'duration': duration,
            'mood': mood
        })
        
        return filename
    
    def generate_loop(self, prompt, loop_duration=8, num_loops=4):
        """Generate seamless looping music"""
        
        # Generate base loop
        self.model.set_generation_params(duration=loop_duration)
        wav = self.model.generate([prompt + ", seamless loop"])
        
        # Repeat for desired length
        looped = wav.repeat(1, 1, num_loops)
        
        filename = "looped_music"
        audio_write(filename, looped[0].cpu(), self.model.sample_rate)
        
        print(f"Created {num_loops} loops of {loop_duration}s")
        return filename
    
    def generate_with_fadeout(self, prompt, duration=20, fadeout_duration=5):
        """Generate music with smooth fadeout"""
        
        self.model.set_generation_params(duration=duration)
        wav = self.model.generate([prompt])
        
        # Apply fadeout
        fadeout_samples = int(fadeout_duration * self.model.sample_rate)
        fade_curve = torch.linspace(1, 0, fadeout_samples)
        
        wav[0, :, -fadeout_samples:] *= fade_curve
        
        filename = "music_with_fadeout"
        audio_write(filename, wav[0].cpu(), self.model.sample_rate)
        
        return filename
    
    def export_metadata(self, output_file='music_metadata.json'):
        """Export generation metadata"""
        
        import json
        with open(output_file, 'w') as f:
            json.dump(self.generated_tracks, f, indent=2)
        
        print(f"Metadata saved to {output_file}")

# Example usage
app = MusicGenerationApp(model_size='medium')

# Generate for different video scenarios
video_tracks = [
    ("tech product showcase", 30, "upbeat"),
    ("nature documentary", 45, "calm"),
    ("action movie trailer", 30, "dramatic"),
]

for description, duration, mood in video_tracks:
    track = app.generate_for_video(description, duration, mood)
    print(f"āœ“ Generated: {track}")

# Generate loop for game
app.generate_loop(
    "medieval fantasy tavern music with lute and flute",
    loop_duration=10,
    num_loops=6
)

# Export metadata
app.export_metadata()

print("\nāœ“ Music generation application ready!")

šŸŽ¼ Model Comparison

Model Parameters Quality Speed Best For
MusicGen Small 300M Good Fast Quick iterations
MusicGen Medium 1.5B Very Good Moderate Balanced
MusicGen Large 3.3B Excellent Slow Production quality
MusicGen Melody 1.5B Very Good Moderate Melody conditioning

šŸŽÆ Key Takeaways