📈 Time Series Analysis

Forecasting sequential data

What is Time Series?

Time series is data collected over time with temporal ordering. Forecasting future values is crucial for business, finance, weather, and more.

Components:

  • Trend: Long-term increase/decrease
  • Seasonality: Regular patterns (daily, weekly, yearly)
  • Cycles: Irregular fluctuations
  • Noise: Random variation

📊 Load & Visualize

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate sample time series
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=365, freq='D')
trend = np.linspace(100, 200, 365)
seasonality = 10 * np.sin(np.linspace(0, 4*np.pi, 365))
noise = np.random.randn(365) * 5
values = trend + seasonality + noise

df = pd.DataFrame({'date': dates, 'value': values})
df.set_index('date', inplace=True)

# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True, alpha=0.3)
plt.show()

print(df.head())
print(f"Shape: {df.shape}")

🔍 Decomposition

from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose into trend, seasonal, residual
decomposition = seasonal_decompose(df['value'], model='additive', period=30)

# Plot components
fig, axes = plt.subplots(4, 1, figsize=(15, 10))

df['value'].plot(ax=axes[0], title='Original')
decomposition.trend.plot(ax=axes[1], title='Trend')
decomposition.seasonal.plot(ax=axes[2], title='Seasonality')
decomposition.resid.plot(ax=axes[3], title='Residuals')

plt.tight_layout()
plt.show()

📉 Stationarity Testing

from statsmodels.tsa.stattools import adfuller

# Augmented Dickey-Fuller test
def test_stationarity(timeseries):
    result = adfuller(timeseries.dropna())
    
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'   {key}: {value}')
    
    if result[1] <= 0.05:
        print("\nSeries is stationary")
    else:
        print("\nSeries is non-stationary")
    
    return result[1] <= 0.05

is_stationary = test_stationarity(df['value'])

# Make stationary with differencing
df['diff'] = df['value'].diff()
print("\nAfter differencing:")
test_stationarity(df['diff'])

🔮 Moving Average

# Simple Moving Average
df['MA_7'] = df['value'].rolling(window=7).mean()
df['MA_30'] = df['value'].rolling(window=30).mean()

# Exponential Moving Average
df['EMA_7'] = df['value'].ewm(span=7, adjust=False).mean()

# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'], label='Original', alpha=0.5)
plt.plot(df.index, df['MA_7'], label='7-day MA')
plt.plot(df.index, df['MA_30'], label='30-day MA')
plt.plot(df.index, df['EMA_7'], label='7-day EMA')
plt.legend()
plt.title('Moving Averages')
plt.show()

📊 ARIMA Model

from statsmodels.tsa.arima.model import ARIMA

# Split train/test
train_size = int(len(df) * 0.8)
train, test = df['value'][:train_size], df['value'][train_size:]

# ARIMA(p, d, q)
# p: autoregressive order
# d: differencing order
# q: moving average order
model = ARIMA(train, order=(5, 1, 2))
fitted_model = model.fit()

print(fitted_model.summary())

# Forecast
forecast_steps = len(test)
forecast = fitted_model.forecast(steps=forecast_steps)

# Plot
plt.figure(figsize=(15, 6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.legend()
plt.title('ARIMA Forecast')
plt.show()

# Evaluate
from sklearn.metrics import mean_squared_error, mean_absolute_error
mse = mean_squared_error(test, forecast)
mae = mean_absolute_error(test, forecast)
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

🔍 Auto ARIMA

# pip install pmdarima
from pmdarima import auto_arima

# Automatically find best (p,d,q)
auto_model = auto_arima(
    train,
    start_p=0, max_p=5,
    start_q=0, max_q=5,
    d=None,  # Let it find d
    seasonal=False,
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore'
)

print(auto_model.summary())
print(f"\nBest order: {auto_model.order}")

# Forecast
auto_forecast = auto_model.predict(n_periods=len(test))
auto_mse = mean_squared_error(test, auto_forecast)
print(f"Auto ARIMA MSE: {auto_mse:.2f}")

📅 SARIMA (Seasonal ARIMA)

from statsmodels.tsa.statespace.sarimax import SARIMAX

# SARIMA(p,d,q)(P,D,Q,s)
# (P,D,Q,s): seasonal parameters, s = seasonal period
sarima_model = SARIMAX(
    train,
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, 7)  # Weekly seasonality
)

sarima_fitted = sarima_model.fit(disp=False)
sarima_forecast = sarima_fitted.forecast(steps=len(test))

sarima_mse = mean_squared_error(test, sarima_forecast)
print(f"SARIMA MSE: {sarima_mse:.2f}")

🚀 Prophet (Facebook)

# pip install prophet
from prophet import Prophet

# Prepare data (requires 'ds' and 'y' columns)
prophet_df = df.reset_index()
prophet_df.columns = ['ds', 'y']
prophet_train = prophet_df[:train_size]
prophet_test = prophet_df[train_size:]

# Create and fit model
prophet_model = Prophet(
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=True
)
prophet_model.fit(prophet_train)

# Make future dataframe
future = prophet_model.make_future_dataframe(periods=len(test))
prophet_forecast = prophet_model.predict(future)

# Plot
fig = prophet_model.plot(prophet_forecast)
plt.title('Prophet Forecast')
plt.show()

# Plot components
fig2 = prophet_model.plot_components(prophet_forecast)
plt.show()

# Evaluate
prophet_pred = prophet_forecast['yhat'][-len(test):].values
prophet_mse = mean_squared_error(prophet_test['y'], prophet_pred)
print(f"Prophet MSE: {prophet_mse:.2f}")

🧠 LSTM for Time Series

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler

# Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['value'].values.reshape(-1, 1))

# Create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 30
X, y = create_sequences(scaled_data, seq_length)

# Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build LSTM
model = keras.Sequential([
    layers.LSTM(50, activation='relu', return_sequences=True, 
                input_shape=(seq_length, 1)),
    layers.Dropout(0.2),
    layers.LSTM(50, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, y_train, epochs=50, batch_size=32,
                   validation_split=0.1, verbose=0)

# Predict
lstm_pred = model.predict(X_test)
lstm_pred = scaler.inverse_transform(lstm_pred)
y_test_inv = scaler.inverse_transform(y_test)

lstm_mse = mean_squared_error(y_test_inv, lstm_pred)
print(f"LSTM MSE: {lstm_mse:.2f}")

# Plot
plt.figure(figsize=(15, 6))
plt.plot(y_test_inv, label='Actual')
plt.plot(lstm_pred, label='LSTM Prediction')
plt.legend()
plt.title('LSTM Time Series Forecast')
plt.show()

📊 Model Comparison

Model Best For Pros Cons
Moving Average Simple smoothing Fast, interpretable No forecasting
ARIMA Stationary data Statistical foundation Requires tuning
SARIMA Seasonal patterns Handles seasonality Complex tuning
Prophet Business forecasting Easy, handles holidays Less flexible
LSTM Complex patterns Powerful, flexible Needs more data

💡 Best Practices

🎯 Key Takeaways