🚀 Model Deployment

From notebook to production

What is Model Deployment?

Model deployment is making your trained model available for real-world use. Turn notebooks into production APIs, web apps, or mobile apps.

Deployment Options:

  • REST API: Flask, FastAPI for web services
  • Cloud platforms: AWS, GCP, Azure
  • Containers: Docker for reproducibility
  • Serverless: AWS Lambda, Google Cloud Functions
  • Edge devices: Mobile, IoT devices

💾 Save & Load Models

import pickle
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

# Train a model
X, y = make_classification(n_samples=1000, n_features=20)
model = RandomForestClassifier(n_estimators=100)
model.fit(X, y)

# Save with pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Load with pickle
with open('model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Save with joblib (better for sklearn)
joblib.dump(model, 'model.joblib')
loaded_model = joblib.load('model.joblib')

# Test
prediction = loaded_model.predict(X[:5])
print(f"Predictions: {prediction}")

🌐 Flask API

# app.py
from flask import Flask, request, jsonify
import joblib
import numpy as np

app = Flask(__name__)

# Load model at startup
model = joblib.load('model.joblib')

@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get data from request
        data = request.get_json()
        features = np.array(data['features']).reshape(1, -1)
        
        # Make prediction
        prediction = model.predict(features)
        probability = model.predict_proba(features)
        
        return jsonify({
            'prediction': int(prediction[0]),
            'probability': probability[0].tolist(),
            'status': 'success'
        })
    
    except Exception as e:
        return jsonify({
            'error': str(e),
            'status': 'error'
        }), 400

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'healthy'})

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=5000)

# Run: python app.py
# Test: curl -X POST http://localhost:5000/predict \
#       -H "Content-Type: application/json" \
#       -d '{"features": [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}'

⚡ FastAPI (Modern Alternative)

# main.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
from typing import List

app = FastAPI(title="ML Model API")

# Load model
model = joblib.load('model.joblib')

class PredictionInput(BaseModel):
    features: List[float]

class PredictionOutput(BaseModel):
    prediction: int
    probability: List[float]

@app.post("/predict", response_model=PredictionOutput)
async def predict(input_data: PredictionInput):
    try:
        features = np.array(input_data.features).reshape(1, -1)
        prediction = model.predict(features)
        probability = model.predict_proba(features)
        
        return PredictionOutput(
            prediction=int(prediction[0]),
            probability=probability[0].tolist()
        )
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/health")
async def health():
    return {"status": "healthy"}

# Run: uvicorn main:app --reload
# Docs: http://localhost:8000/docs (automatic interactive API docs!)

🐳 Docker Containerization

# Dockerfile
FROM python:3.9-slim

WORKDIR /app

# Copy requirements
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy application
COPY app.py .
COPY model.joblib .

EXPOSE 5000

CMD ["python", "app.py"]

# Build: docker build -t ml-model-api .
# Run: docker run -p 5000:5000 ml-model-api

requirements.txt

flask==2.3.0
scikit-learn==1.3.0
joblib==1.3.0
numpy==1.24.0

☁️ AWS Deployment

1. AWS Lambda

# lambda_function.py
import json
import boto3
import joblib
import numpy as np

# Load model from S3
s3 = boto3.client('s3')
s3.download_file('my-bucket', 'model.joblib', '/tmp/model.joblib')
model = joblib.load('/tmp/model.joblib')

def lambda_handler(event, context):
    try:
        body = json.loads(event['body'])
        features = np.array(body['features']).reshape(1, -1)
        
        prediction = model.predict(features)
        probability = model.predict_proba(features)
        
        return {
            'statusCode': 200,
            'body': json.dumps({
                'prediction': int(prediction[0]),
                'probability': probability[0].tolist()
            })
        }
    except Exception as e:
        return {
            'statusCode': 400,
            'body': json.dumps({'error': str(e)})
        }

2. AWS SageMaker

import sagemaker
from sagemaker.sklearn.estimator import SKLearn

# Create SageMaker session
sagemaker_session = sagemaker.Session()
role = 'arn:aws:iam::YOUR_ACCOUNT:role/SageMakerRole'

# Create SKLearn estimator
sklearn = SKLearn(
    entry_point='train.py',
    role=role,
    instance_type='ml.m5.large',
    framework_version='1.0-1',
    py_version='py3'
)

# Train
sklearn.fit({'train': 's3://bucket/train.csv'})

# Deploy
predictor = sklearn.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium'
)

# Predict
result = predictor.predict(data)
print(result)

🔥 Google Cloud Platform

# Deploy to Cloud Run
# 1. Build container
gcloud builds submit --tag gcr.io/PROJECT_ID/ml-model

# 2. Deploy
gcloud run deploy ml-model-api \
  --image gcr.io/PROJECT_ID/ml-model \
  --platform managed \
  --region us-central1 \
  --allow-unauthenticated

# 3. Test
curl -X POST https://ml-model-api-xxx.run.app/predict \
  -H "Content-Type: application/json" \
  -d '{"features": [...]}'

⚡ Streamlit Web App

# app.py
import streamlit as st
import joblib
import numpy as np

# Load model
@st.cache_resource
def load_model():
    return joblib.load('model.joblib')

model = load_model()

st.title('🤖 ML Model Predictor')
st.write('Enter features to get prediction')

# Input form
with st.form('prediction_form'):
    features = []
    cols = st.columns(4)
    
    for i in range(20):
        with cols[i % 4]:
            feature = st.number_input(f'Feature {i+1}', value=0.0)
            features.append(feature)
    
    submit = st.form_submit_button('Predict')

if submit:
    # Make prediction
    X = np.array(features).reshape(1, -1)
    prediction = model.predict(X)
    probability = model.predict_proba(X)
    
    # Display results
    st.success(f'Prediction: Class {prediction[0]}')
    st.write(f'Probability: {probability[0]}')
    
    # Visualize
    st.bar_chart(probability[0])

# Run: streamlit run app.py

📱 Model Optimization

Quantization

# Reduce model size for edge deployment
import tensorflow as tf

# Load model
model = tf.keras.models.load_model('model.h5')

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

print(f"Original size: {os.path.getsize('model.h5') / 1024:.2f} KB")
print(f"Quantized size: {os.path.getsize('model.tflite') / 1024:.2f} KB")

ONNX Export

# Export to ONNX for cross-platform compatibility
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define input type
initial_type = [('float_input', FloatTensorType([None, 20]))]

# Convert
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save
with open("model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

🔐 Production Best Practices

📊 Monitoring

# Track predictions and performance
import time
from prometheus_client import Counter, Histogram

# Metrics
prediction_counter = Counter('predictions_total', 'Total predictions')
prediction_latency = Histogram('prediction_latency_seconds', 'Prediction latency')

@app.route('/predict', methods=['POST'])
def predict():
    start_time = time.time()
    
    # Make prediction
    result = model.predict(features)
    
    # Record metrics
    prediction_counter.inc()
    prediction_latency.observe(time.time() - start_time)
    
    return jsonify(result)

💡 Deployment Checklist

  1. ✅ Save model in portable format (pickle, joblib, ONNX)
  2. ✅ Create API with Flask/FastAPI
  3. ✅ Add input validation and error handling
  4. ✅ Containerize with Docker
  5. ✅ Set up logging and monitoring
  6. ✅ Add authentication and rate limiting
  7. ✅ Test API thoroughly
  8. ✅ Deploy to cloud platform
  9. ✅ Set up CI/CD pipeline
  10. ✅ Monitor in production

🎯 Key Takeaways