🔥 TensorFlow & Keras

Deep learning framework essentials

What is TensorFlow & Keras?

TensorFlow is Google's deep learning framework. Keras is the high-level API within TensorFlow 2.x for building neural networks easily.

Key Features:

  • Easy to use: Keras provides simple, intuitive API
  • Powerful: TensorFlow backend handles optimization
  • Flexible: Sequential, Functional, and Subclassing APIs
  • Production-ready: Deploy on mobile, web, servers

📦 Installation & Setup

# Install TensorFlow
# pip install tensorflow

import tensorflow as tf
import numpy as np
print(f"TensorFlow version: {tf.__version__}")

# Check GPU availability
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

# Set memory growth (prevents TF from allocating all GPU memory)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

🔢 TensorFlow Basics

Tensors

# Create tensors
scalar = tf.constant(42)
vector = tf.constant([1, 2, 3])
matrix = tf.constant([[1, 2], [3, 4]])
tensor_3d = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

print(f"Scalar: {scalar}, shape: {scalar.shape}")
print(f"Vector: {vector}, shape: {vector.shape}")
print(f"Matrix:\n{matrix}\nShape: {matrix.shape}")

# Tensor operations
a = tf.constant([1, 2, 3])
b = tf.constant([4, 5, 6])

print(f"Add: {a + b}")
print(f"Multiply: {a * b}")
print(f"Dot product: {tf.reduce_sum(a * b)}")

# Convert to numpy
print(f"To numpy: {a.numpy()}")

Variables

# Variables are trainable parameters
weight = tf.Variable([[1.0, 2.0], [3.0, 4.0]])
bias = tf.Variable([0.5, 0.5])

print(f"Weight: {weight}")
print(f"Trainable: {weight.trainable}")

# Update variables
weight.assign([[5.0, 6.0], [7.0, 8.0]])
weight.assign_add([[1.0, 1.0], [1.0, 1.0]])  # Add to existing
print(f"Updated weight: {weight}")

🏗️ Sequential API

from tensorflow import keras
from tensorflow.keras import layers

# Simple sequential model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10,)),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Model summary
model.summary()

# Compile model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Generate dummy data
X_train = np.random.randn(1000, 10)
y_train = np.random.randint(0, 2, 1000)

# Train
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

🔗 Functional API

# More flexible - allows multiple inputs/outputs, branching

# Define inputs
input_layer = keras.Input(shape=(10,))

# Define layers
x = layers.Dense(64, activation='relu')(input_layer)
x = layers.Dropout(0.3)(x)
x = layers.Dense(32, activation='relu')(x)
output = layers.Dense(1, activation='sigmoid')(x)

# Create model
model = keras.Model(inputs=input_layer, outputs=output)

model.summary()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Multi-Input Model

# Two inputs, one output
input1 = keras.Input(shape=(10,), name='input1')
input2 = keras.Input(shape=(5,), name='input2')

# Process input1
x1 = layers.Dense(32, activation='relu')(input1)

# Process input2
x2 = layers.Dense(16, activation='relu')(input2)

# Concatenate
combined = layers.concatenate([x1, x2])
x = layers.Dense(16, activation='relu')(combined)
output = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=[input1, input2], outputs=output)

# Train with multiple inputs
X1_train = np.random.randn(1000, 10)
X2_train = np.random.randn(1000, 5)
y_train = np.random.randint(0, 2, 1000)

model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit([X1_train, X2_train], y_train, epochs=5)

🎛️ Common Layers

# Dense (Fully Connected)
layers.Dense(64, activation='relu')

# Dropout (Regularization)
layers.Dropout(0.3)  # Drop 30% of neurons

# Batch Normalization
layers.BatchNormalization()

# Convolutional (for images)
layers.Conv2D(32, kernel_size=(3, 3), activation='relu')
layers.MaxPooling2D(pool_size=(2, 2))

# Recurrent (for sequences)
layers.LSTM(64, return_sequences=True)
layers.GRU(64)

# Embedding (for text/categorical)
layers.Embedding(input_dim=10000, output_dim=128)

# Flatten
layers.Flatten()

# Reshape
layers.Reshape((7, 7, 32))

🎯 Optimizers

# SGD (Stochastic Gradient Descent)
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

# Adam (Adaptive Moment Estimation) - Most popular
optimizer = keras.optimizers.Adam(learning_rate=0.001)

# RMSprop (Root Mean Square Propagation)
optimizer = keras.optimizers.RMSprop(learning_rate=0.001)

# AdaGrad
optimizer = keras.optimizers.Adagrad(learning_rate=0.01)

# Use in model
model.compile(optimizer=optimizer, loss='mse')

# Learning rate schedule
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.1,
    decay_steps=10000,
    decay_rate=0.96
)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

📉 Loss Functions

# Binary Classification
loss = 'binary_crossentropy'

# Multi-class Classification
loss = 'categorical_crossentropy'  # One-hot encoded labels
loss = 'sparse_categorical_crossentropy'  # Integer labels

# Regression
loss = 'mse'  # Mean Squared Error
loss = 'mae'  # Mean Absolute Error
loss = 'huber'  # Huber loss (robust to outliers)

# Custom loss
def custom_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

model.compile(optimizer='adam', loss=custom_loss)

📊 Callbacks

# Early Stopping
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# Model Checkpoint
checkpoint = keras.callbacks.ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True
)

# Reduce Learning Rate
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7
)

# TensorBoard
tensorboard = keras.callbacks.TensorBoard(
    log_dir='./logs',
    histogram_freq=1
)

# Custom callback
class CustomCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f"Epoch {epoch}: loss={logs['loss']:.4f}")

# Use callbacks
history = model.fit(
    X_train, y_train,
    epochs=100,
    validation_split=0.2,
    callbacks=[early_stop, checkpoint, reduce_lr, tensorboard]
)

💾 Save & Load Models

# Save entire model
model.save('my_model.h5')  # HDF5 format
model.save('my_model')     # SavedModel format (recommended)

# Load model
loaded_model = keras.models.load_model('my_model')

# Save weights only
model.save_weights('weights.h5')

# Load weights
model.load_weights('weights.h5')

# Save architecture only (as JSON)
json_config = model.to_json()
with open('model_architecture.json', 'w') as f:
    f.write(json_config)

# Load architecture
with open('model_architecture.json', 'r') as f:
    json_config = f.read()
new_model = keras.models.model_from_json(json_config)

📈 Training Visualization

import matplotlib.pyplot as plt

# Plot training history
def plot_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Loss
    ax1.plot(history.history['loss'], label='Train Loss')
    ax1.plot(history.history['val_loss'], label='Val Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)
    
    # Accuracy
    ax2.plot(history.history['accuracy'], label='Train Acc')
    ax2.plot(history.history['val_accuracy'], label='Val Acc')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()

plot_history(history)

🔧 Custom Training Loop

# For full control over training
@tf.function
def train_step(X, y):
    with tf.GradientTape() as tape:
        predictions = model(X, training=True)
        loss = loss_fn(y, predictions)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    return loss

# Training loop
epochs = 10
batch_size = 32

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    
    # Shuffle data
    indices = np.random.permutation(len(X_train))
    X_shuffled = X_train[indices]
    y_shuffled = y_train[indices]
    
    # Batch training
    for i in range(0, len(X_train), batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        y_batch = y_shuffled[i:i+batch_size]
        
        loss = train_step(X_batch, y_batch)
    
    print(f"Loss: {loss:.4f}")

💡 Best Practices

🎯 Key Takeaways