Python Code

# import libraries
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
import matplotlib.pyplot as plt
# read data
data_raw = pd.read_csv("../posts/2024-10-02-ts-fundamentals-whats-a-time-series/example_ts_data.csv")

data_raw = (
    # select columns
    data_raw[["Country", "Product", "Date", "Revenue"]]
    # change data types
    .assign(
        Date = pd.to_datetime(data_raw["Date"]), 
        Revenue = pd.to_numeric(data_raw["Revenue"])
    )
)

# print the first few rows
print(data_raw.head())
# filter on specific series
us_ic_raw = data_raw[(data_raw["Country"] == "United States") & (data_raw["Product"] == "Ice Cream")]

# create unique id
us_ic_raw["unique_id"] = us_ic_raw["Country"] + "_" + us_ic_raw["Product"]

# convert date to datetime
us_ic_raw["Date"] = pd.to_datetime(us_ic_raw["Date"])

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(us_ic_raw.index, us_ic_raw["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.title("Ice Cream Revenue in United States")
plt.legend()
# get final data for forecasting
us_ic_clean = us_ic_raw[["unique_id", "Date", "Revenue"]].copy()

# set up models to train
sf = StatsForecast(
    models=[AutoARIMA(season_length=12)],
    freq='ME',
)

# fit the model and forecast for 12 months ahead
Y_hat_df = sf.forecast(df = us_ic_clean, 
                       time_col = "Date", 
                       target_col = "Revenue", 
                       id_col = "unique_id", 
                       h=12, 
                       level=[95], 
                       fitted=True)

print(Y_hat_df.head())

# convert date to be first of the month
Y_hat_df["Date"] = Y_hat_df["Date"].dt.to_period("M").dt.to_timestamp()
# get fitted values of the historical data
# Note: The fitted values are the predicted values for the training data
residual_values =sf.forecast_fitted_values()

print(residual_values.head())
# concat both df together
combined_df = pd.concat([residual_values, Y_hat_df], axis=0)

# make date the index
combined_df.set_index("Date", inplace=True)

print(combined_df.head())
# plot the combined data
plt.figure(figsize=(10, 6))

# plot the original revenue data as line and forecast as dotted line
plt.plot(combined_df.index, combined_df["Revenue"], label="Actual Revenue")
plt.plot(combined_df.index, combined_df["AutoARIMA"], label="Forecasted Revenue", linestyle='dotted')

# plot the prediction intervals as shaded areas
plt.fill_between(combined_df.index, 
                 combined_df["AutoARIMA-lo-95"], 
                 combined_df["AutoARIMA-hi-95"], 
                 color='gray', alpha=0.2, label='95% Prediction Interval')

# chart formatting
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.title("ARIMA Forecasting Results for US Ice Cream Revenue")
plt.legend()

# save the plot
# plt.savefig("chart1", dpi = 300, bbox_inches = "tight")