# import libraries
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
import matplotlib.pyplot as plt
Python Code
# read data
= pd.read_csv("../posts/2024-10-02-ts-fundamentals-whats-a-time-series/example_ts_data.csv")
data_raw
= (
data_raw # select columns
"Country", "Product", "Date", "Revenue"]]
data_raw[[# change data types
.assign(= pd.to_datetime(data_raw["Date"]),
Date = pd.to_numeric(data_raw["Revenue"])
Revenue
)
)
# print the first few rows
print(data_raw.head())
# filter on specific series
= data_raw[(data_raw["Country"] == "United States") & (data_raw["Product"] == "Ice Cream")]
us_ic_raw
# create unique id
"unique_id"] = us_ic_raw["Country"] + "_" + us_ic_raw["Product"]
us_ic_raw[
# convert date to datetime
"Date"] = pd.to_datetime(us_ic_raw["Date"])
us_ic_raw[
# plot the data
=(10, 6))
plt.figure(figsize"Revenue"], label="Ice Cream Revenue")
plt.plot(us_ic_raw.index, us_ic_raw["Date")
plt.xlabel("Revenue")
plt.ylabel("Ice Cream Revenue in United States")
plt.title( plt.legend()
# get final data for forecasting
= us_ic_raw[["unique_id", "Date", "Revenue"]].copy()
us_ic_clean
# set up models to train
= StatsForecast(
sf =[AutoARIMA(season_length=12)],
models='ME',
freq
)
# fit the model and forecast for 12 months ahead
= sf.forecast(df = us_ic_clean,
Y_hat_df = "Date",
time_col = "Revenue",
target_col = "unique_id",
id_col =12,
h=[95],
level=True)
fitted
print(Y_hat_df.head())
# convert date to be first of the month
"Date"] = Y_hat_df["Date"].dt.to_period("M").dt.to_timestamp() Y_hat_df[
# concat both df together
= pd.concat([us_ic_clean, Y_hat_df], axis=0)
future_fcst_df
# make date the index
"Date", inplace=True)
future_fcst_df.set_index(
print(future_fcst_df.tail())
# plot the future fcst data
=(10, 6))
plt.figure(figsize
# plot the original revenue data as line and forecast as dotted line
"Revenue"], label="Actual Revenue")
plt.plot(future_fcst_df.index, future_fcst_df["AutoARIMA"], label="ARIMA Forecast", linestyle='dotted')
plt.plot(future_fcst_df.index, future_fcst_df[
# plot the prediction intervals as shaded areas
plt.fill_between(future_fcst_df.index, "AutoARIMA-lo-95"],
future_fcst_df["AutoARIMA-hi-95"],
future_fcst_df[='gray', alpha=0.2, label='95% Prediction Interval')
color
# chart formatting
"Date")
plt.xlabel("Revenue")
plt.ylabel("Forecast Results for US Ice Cream Revenue")
plt.title(
plt.legend()
# save the plot
# plt.savefig("chart1", dpi = 300, bbox_inches = "tight")
# get fitted values of the historical data
# Note: The fitted values are the predicted values for the training data
= sf.forecast_fitted_values()
residual_values
print(residual_values.head())
# plot the historical fitted values
=(10, 6))
plt.figure(figsize
# plot the original revenue data as line and forecast as dotted line
"Revenue"], label="Actual Revenue")
plt.plot(residual_values.index, residual_values["AutoARIMA"], label="Forecasted Revenue", linestyle='dotted')
plt.plot(residual_values.index, residual_values[
# plot the prediction intervals as shaded areas
plt.fill_between(residual_values.index, "AutoARIMA-lo-95"],
residual_values["AutoARIMA-hi-95"],
residual_values[='gray', alpha=0.2, label='95% Prediction Interval')
color
# chart formatting
"Date")
plt.xlabel("Revenue")
plt.ylabel("ARIMA Residuals for US Ice Cream Revenue")
plt.title(
plt.legend()
# save the plot
# plt.savefig("chart2", dpi = 300, bbox_inches = "tight")
# calculate residuals and plot the residuals directly as a line chart
= residual_values["Revenue"] - residual_values["AutoARIMA"]
residuals
=(10, 6))
plt.figure(figsize="Residuals", color='orange')
plt.plot(residuals.index, residuals, label=0, color='red', linestyle='--')
plt.axhline(y"Date")
plt.xlabel("Residuals")
plt.ylabel("Residuals Over Time")
plt.title(
# save the plot
# plt.savefig("chart3", dpi = 300, bbox_inches = "tight")
# create histogram of residuals
=(10, 6))
plt.figure(figsize=6, color='blue', alpha=0.7)
plt.hist(residuals, bins=0, color='red', linestyle='--')
plt.axvline(x"Residuals")
plt.xlabel("Frequency")
plt.ylabel("Histogram of Residuals")
plt.title(
# save the plot
# plt.savefig("chart4", dpi = 300, bbox_inches = "tight")
# create chart that puts the actual values on one side and the fitted values on the other as a scatter plot
=(10, 6))
plt.figure(figsize# plot the actual values
"Revenue"], residual_values["AutoARIMA"], label="Fitted Values", alpha=0.5)
plt.scatter(residual_values[# plot the 45 degree line
"Revenue"].min(), residual_values["Revenue"].max()],
plt.plot([residual_values["Revenue"].min(), residual_values["Revenue"].max()],
[residual_values[='red', linestyle='--', label="45 Degree Line")
color# chart formatting
"Actual Revenue")
plt.xlabel("Fitted Revenue")
plt.ylabel("Q-Q Plot of Actual vs Fitted Values")
plt.title(
# save the plot
# plt.savefig("chart5", dpi = 300, bbox_inches = "tight")