What is Time Series?
Time series is data collected over time with temporal ordering. Forecasting future values is crucial for business, finance, weather, and more.
Components:
- Trend: Long-term increase/decrease
- Seasonality: Regular patterns (daily, weekly, yearly)
- Cycles: Irregular fluctuations
- Noise: Random variation
📊 Load & Visualize
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Generate sample time series
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=365, freq='D')
trend = np.linspace(100, 200, 365)
seasonality = 10 * np.sin(np.linspace(0, 4*np.pi, 365))
noise = np.random.randn(365) * 5
values = trend + seasonality + noise
df = pd.DataFrame({'date': dates, 'value': values})
df.set_index('date', inplace=True)
# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True, alpha=0.3)
plt.show()
print(df.head())
print(f"Shape: {df.shape}")
🔍 Decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
# Decompose into trend, seasonal, residual
decomposition = seasonal_decompose(df['value'], model='additive', period=30)
# Plot components
fig, axes = plt.subplots(4, 1, figsize=(15, 10))
df['value'].plot(ax=axes[0], title='Original')
decomposition.trend.plot(ax=axes[1], title='Trend')
decomposition.seasonal.plot(ax=axes[2], title='Seasonality')
decomposition.resid.plot(ax=axes[3], title='Residuals')
plt.tight_layout()
plt.show()
📉 Stationarity Testing
from statsmodels.tsa.stattools import adfuller
# Augmented Dickey-Fuller test
def test_stationarity(timeseries):
result = adfuller(timeseries.dropna())
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:')
for key, value in result[4].items():
print(f' {key}: {value}')
if result[1] <= 0.05:
print("\nSeries is stationary")
else:
print("\nSeries is non-stationary")
return result[1] <= 0.05
is_stationary = test_stationarity(df['value'])
# Make stationary with differencing
df['diff'] = df['value'].diff()
print("\nAfter differencing:")
test_stationarity(df['diff'])
🔮 Moving Average
# Simple Moving Average
df['MA_7'] = df['value'].rolling(window=7).mean()
df['MA_30'] = df['value'].rolling(window=30).mean()
# Exponential Moving Average
df['EMA_7'] = df['value'].ewm(span=7, adjust=False).mean()
# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['value'], label='Original', alpha=0.5)
plt.plot(df.index, df['MA_7'], label='7-day MA')
plt.plot(df.index, df['MA_30'], label='30-day MA')
plt.plot(df.index, df['EMA_7'], label='7-day EMA')
plt.legend()
plt.title('Moving Averages')
plt.show()
📊 ARIMA Model
from statsmodels.tsa.arima.model import ARIMA
# Split train/test
train_size = int(len(df) * 0.8)
train, test = df['value'][:train_size], df['value'][train_size:]
# ARIMA(p, d, q)
# p: autoregressive order
# d: differencing order
# q: moving average order
model = ARIMA(train, order=(5, 1, 2))
fitted_model = model.fit()
print(fitted_model.summary())
# Forecast
forecast_steps = len(test)
forecast = fitted_model.forecast(steps=forecast_steps)
# Plot
plt.figure(figsize=(15, 6))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.legend()
plt.title('ARIMA Forecast')
plt.show()
# Evaluate
from sklearn.metrics import mean_squared_error, mean_absolute_error
mse = mean_squared_error(test, forecast)
mae = mean_absolute_error(test, forecast)
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")
🔍 Auto ARIMA
# pip install pmdarima
from pmdarima import auto_arima
# Automatically find best (p,d,q)
auto_model = auto_arima(
train,
start_p=0, max_p=5,
start_q=0, max_q=5,
d=None, # Let it find d
seasonal=False,
stepwise=True,
suppress_warnings=True,
error_action='ignore'
)
print(auto_model.summary())
print(f"\nBest order: {auto_model.order}")
# Forecast
auto_forecast = auto_model.predict(n_periods=len(test))
auto_mse = mean_squared_error(test, auto_forecast)
print(f"Auto ARIMA MSE: {auto_mse:.2f}")
📅 SARIMA (Seasonal ARIMA)
from statsmodels.tsa.statespace.sarimax import SARIMAX
# SARIMA(p,d,q)(P,D,Q,s)
# (P,D,Q,s): seasonal parameters, s = seasonal period
sarima_model = SARIMAX(
train,
order=(1, 1, 1),
seasonal_order=(1, 1, 1, 7) # Weekly seasonality
)
sarima_fitted = sarima_model.fit(disp=False)
sarima_forecast = sarima_fitted.forecast(steps=len(test))
sarima_mse = mean_squared_error(test, sarima_forecast)
print(f"SARIMA MSE: {sarima_mse:.2f}")
🚀 Prophet (Facebook)
# pip install prophet
from prophet import Prophet
# Prepare data (requires 'ds' and 'y' columns)
prophet_df = df.reset_index()
prophet_df.columns = ['ds', 'y']
prophet_train = prophet_df[:train_size]
prophet_test = prophet_df[train_size:]
# Create and fit model
prophet_model = Prophet(
daily_seasonality=True,
weekly_seasonality=True,
yearly_seasonality=True
)
prophet_model.fit(prophet_train)
# Make future dataframe
future = prophet_model.make_future_dataframe(periods=len(test))
prophet_forecast = prophet_model.predict(future)
# Plot
fig = prophet_model.plot(prophet_forecast)
plt.title('Prophet Forecast')
plt.show()
# Plot components
fig2 = prophet_model.plot_components(prophet_forecast)
plt.show()
# Evaluate
prophet_pred = prophet_forecast['yhat'][-len(test):].values
prophet_mse = mean_squared_error(prophet_test['y'], prophet_pred)
print(f"Prophet MSE: {prophet_mse:.2f}")
🧠 LSTM for Time Series
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
# Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['value'].values.reshape(-1, 1))
# Create sequences
def create_sequences(data, seq_length):
X, y = [], []
for i in range(len(data) - seq_length):
X.append(data[i:i+seq_length])
y.append(data[i+seq_length])
return np.array(X), np.array(y)
seq_length = 30
X, y = create_sequences(scaled_data, seq_length)
# Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
# Build LSTM
model = keras.Sequential([
layers.LSTM(50, activation='relu', return_sequences=True,
input_shape=(seq_length, 1)),
layers.Dropout(0.2),
layers.LSTM(50, activation='relu'),
layers.Dropout(0.2),
layers.Dense(1)
])
model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, y_train, epochs=50, batch_size=32,
validation_split=0.1, verbose=0)
# Predict
lstm_pred = model.predict(X_test)
lstm_pred = scaler.inverse_transform(lstm_pred)
y_test_inv = scaler.inverse_transform(y_test)
lstm_mse = mean_squared_error(y_test_inv, lstm_pred)
print(f"LSTM MSE: {lstm_mse:.2f}")
# Plot
plt.figure(figsize=(15, 6))
plt.plot(y_test_inv, label='Actual')
plt.plot(lstm_pred, label='LSTM Prediction')
plt.legend()
plt.title('LSTM Time Series Forecast')
plt.show()
📊 Model Comparison
| Model | Best For | Pros | Cons |
|---|---|---|---|
| Moving Average | Simple smoothing | Fast, interpretable | No forecasting |
| ARIMA | Stationary data | Statistical foundation | Requires tuning |
| SARIMA | Seasonal patterns | Handles seasonality | Complex tuning |
| Prophet | Business forecasting | Easy, handles holidays | Less flexible |
| LSTM | Complex patterns | Powerful, flexible | Needs more data |
💡 Best Practices
- Check stationarity: Use ADF test
- Handle missing values: Forward fill or interpolate
- Scale data: Especially for neural networks
- Use walk-forward validation: Not random splits
- Try multiple models: Ensemble often best
- Consider seasonality: Use SARIMA or Prophet
- Start simple: Moving average baseline
- Monitor performance: Retrain regularly
🎯 Key Takeaways
- Decompose into trend, seasonal, residual
- Test stationarity with ADF test
- ARIMA for stationary data
- SARIMA for seasonal patterns
- Prophet easy and effective for business
- LSTM for complex non-linear patterns
- Use proper validation (time-based splits)