Time Series Analysis

What is Time Series?

Time series is data collected over time with temporal ordering. Forecasting future values is crucial for business, finance, weather, and more.

                Components:
                Trend: Long-term increase/decrease
Seasonality: Regular patterns (daily, weekly, yearly)
Cycles: Irregular fluctuations
Noise: Random variation

            

📊 Load & Visualize

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate sample time series
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=365, freq='D')
trend = np.linspace(100, 200, 365)
seasonality = 10 * np.sin(np.linspace(0, 4*np.pi, 365))
noise = np.random.randn(365) * 5
values = trend + seasonality + noise

df = pd.DataFrame({'date': dates, 'value': values})
df.set_index('date', inplace=True)

# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True, alpha=0.3)
plt.show()

print(df.head())
print(f"Shape: {df.shape}")

🔍 Decomposition

from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose into trend, seasonal, residual
decomposition = seasonal_decompose(df['value'], model='additive', period=30)

# Plot components
fig, axes = plt.subplots(4, 1, figsize=(15, 10))

df['value'].plot(ax=axes[0], title='Original')
decomposition.trend.plot(ax=axes[1], title='Trend')
decomposition.seasonal.plot(ax=axes[2], title='Seasonality')
decomposition.resid.plot(ax=axes[3], title='Residuals')

plt.tight_layout()
plt.show()

📉 Stationarity Testing

from statsmodels.tsa.stattools import adfuller

# Augmented Dickey-Fuller test
def test_stationarity(timeseries):
    result = adfuller(timeseries.dropna())
    
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'   {key}: {value}')
    
    if result[1] <= 0.05:
        print("\nSeries is stationary")
    else:
        print("\nSeries is non-stationary")
    
    return result[1] <= 0.05

is_stationary = test_stationarity(df['value'])

# Make stationary with differencing
df['diff'] = df['value'].diff()
print("\nAfter differencing:")
test_stationarity(df['diff'])

🔮 Moving Average

# Simple Moving Average
df['MA_7'] = df['value'].rolling(window=7).mean()
df['MA_30'] = df['value'].rolling(window=30).mean()

# Exponential Moving Average
df['EMA_7'] = df['value'].ewm(span=7, adjust=False).mean()

# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'], label='Original', alpha=0.5)
plt.plot(df.index, df['MA_7'], label='7-day MA')
plt.plot(df.index, df['MA_30'], label='30-day MA')
plt.plot(df.index, df['EMA_7'], label='7-day EMA')
plt.legend()
plt.title('Moving Averages')
plt.show()

📊 ARIMA Model

from statsmodels.tsa.arima.model import ARIMA

# Split train/test
train_size = int(len(df) * 0.8)
train, test = df['value'][:train_size], df['value'][train_size:]

# ARIMA(p, d, q)
# p: autoregressive order
# d: differencing order
# q: moving average order
model = ARIMA(train, order=(5, 1, 2))
fitted_model = model.fit()

print(fitted_model.summary())

# Forecast
forecast_steps = len(test)
forecast = fitted_model.forecast(steps=forecast_steps)

# Plot
plt.figure(figsize=(15, 6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.legend()
plt.title('ARIMA Forecast')
plt.show()

# Evaluate
from sklearn.metrics import mean_squared_error, mean_absolute_error
mse = mean_squared_error(test, forecast)
mae = mean_absolute_error(test, forecast)
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

🔍 Auto ARIMA

# pip install pmdarima
from pmdarima import auto_arima

# Automatically find best (p,d,q)
auto_model = auto_arima(
    train,
    start_p=0, max_p=5,
    start_q=0, max_q=5,
    d=None,  # Let it find d
    seasonal=False,
    stepwise=True,
    suppress_warnings=True,
    error_action='ignore'
)

print(auto_model.summary())
print(f"\nBest order: {auto_model.order}")

# Forecast
auto_forecast = auto_model.predict(n_periods=len(test))
auto_mse = mean_squared_error(test, auto_forecast)
print(f"Auto ARIMA MSE: {auto_mse:.2f}")

📅 SARIMA (Seasonal ARIMA)

from statsmodels.tsa.statespace.sarimax import SARIMAX

# SARIMA(p,d,q)(P,D,Q,s)
# (P,D,Q,s): seasonal parameters, s = seasonal period
sarima_model = SARIMAX(
    train,
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, 7)  # Weekly seasonality
)

sarima_fitted = sarima_model.fit(disp=False)
sarima_forecast = sarima_fitted.forecast(steps=len(test))

sarima_mse = mean_squared_error(test, sarima_forecast)
print(f"SARIMA MSE: {sarima_mse:.2f}")

🚀 Prophet (Facebook)

# pip install prophet
from prophet import Prophet

# Prepare data (requires 'ds' and 'y' columns)
prophet_df = df.reset_index()
prophet_df.columns = ['ds', 'y']
prophet_train = prophet_df[:train_size]
prophet_test = prophet_df[train_size:]

# Create and fit model
prophet_model = Prophet(
    daily_seasonality=True,
    weekly_seasonality=True,
    yearly_seasonality=True
)
prophet_model.fit(prophet_train)

# Make future dataframe
future = prophet_model.make_future_dataframe(periods=len(test))
prophet_forecast = prophet_model.predict(future)

# Plot
fig = prophet_model.plot(prophet_forecast)
plt.title('Prophet Forecast')
plt.show()

# Plot components
fig2 = prophet_model.plot_components(prophet_forecast)
plt.show()

# Evaluate
prophet_pred = prophet_forecast['yhat'][-len(test):].values
prophet_mse = mean_squared_error(prophet_test['y'], prophet_pred)
print(f"Prophet MSE: {prophet_mse:.2f}")

🧠 LSTM for Time Series

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler

# Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['value'].values.reshape(-1, 1))

# Create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 30
X, y = create_sequences(scaled_data, seq_length)

# Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build LSTM
model = keras.Sequential([
    layers.LSTM(50, activation='relu', return_sequences=True, 
                input_shape=(seq_length, 1)),
    layers.Dropout(0.2),
    layers.LSTM(50, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, y_train, epochs=50, batch_size=32,
                   validation_split=0.1, verbose=0)

# Predict
lstm_pred = model.predict(X_test)
lstm_pred = scaler.inverse_transform(lstm_pred)
y_test_inv = scaler.inverse_transform(y_test)

lstm_mse = mean_squared_error(y_test_inv, lstm_pred)
print(f"LSTM MSE: {lstm_mse:.2f}")

# Plot
plt.figure(figsize=(15, 6))
plt.plot(y_test_inv, label='Actual')
plt.plot(lstm_pred, label='LSTM Prediction')
plt.legend()
plt.title('LSTM Time Series Forecast')
plt.show()

📊 Model Comparison

Model	Best For	Pros	Cons
Moving Average	Simple smoothing	Fast, interpretable	No forecasting
ARIMA	Stationary data	Statistical foundation	Requires tuning
SARIMA	Seasonal patterns	Handles seasonality	Complex tuning
Prophet	Business forecasting	Easy, handles holidays	Less flexible
LSTM	Complex patterns	Powerful, flexible	Needs more data

💡 Best Practices

Check stationarity: Use ADF test
Handle missing values: Forward fill or interpolate
Scale data: Especially for neural networks
Use walk-forward validation: Not random splits
Try multiple models: Ensemble often best
Consider seasonality: Use SARIMA or Prophet
Start simple: Moving average baseline
Monitor performance: Retrain regularly

🎯 Key Takeaways

Decompose into trend, seasonal, residual
Test stationarity with ADF test
ARIMA for stationary data
SARIMA for seasonal patterns
Prophet easy and effective for business
LSTM for complex non-linear patterns
Use proper validation (time-based splits)