# import libraries
import pandas as pd
import matplotlib.pyplot as plt
Python Code
# read data
= pd.read_csv("../posts/2024-10-02-ts-fundamentals-whats-a-time-series/example_ts_data.csv")
data_raw
= (
data_raw # select columns
"Country", "Product", "Date", "Revenue"]]
data_raw[[# change data types
.assign(= pd.to_datetime(data_raw["Date"]),
Date = pd.to_numeric(data_raw["Revenue"])
Revenue
)
)
# print the first few rows
print(data_raw.head())
# filter on specific series
= data_raw[(data_raw["Country"] == "United States") & (data_raw["Product"] == "Cookies")]
us_cookie_raw
"Date", inplace=True)
us_cookie_raw.set_index(
print(us_cookie_raw.head())
# plot the data
=(10,6))
plt.figure(figsize"Revenue"], label = "Cookies Revenue", color = "blue")
plt.plot(us_cookie_raw.index, us_cookie_raw["US Cookie Revenue")
plt.title("Date")
plt.xlabel("Revenue")
plt.ylabel(True) plt.grid(
# calculate the 2x12 month moving average
= us_cookie_raw.copy()
us_cookie_ma
"2x12_MA"] = us_cookie_ma["Revenue"].rolling(window = 12, center = True).mean().rolling(window=2, center = True).mean()
us_cookie_ma[
print(us_cookie_ma.head())
# plot the moving average
=(10,6))
plt.figure(figsize"Revenue"], label = "Original Revenue", color = "blue")
plt.plot(us_cookie_ma.index, us_cookie_ma["2x12_MA"], label = "2x12 Moving Average", color = "red")
plt.plot(us_cookie_ma.index, us_cookie_ma[
"United States - Cookie Revenue Trend")
plt.title("Date")
plt.xlabel("Revenue")
plt.ylabel(True)
plt.grid(
plt.legend()
= plt.gca()
ax "top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines[
# save the plot
# plt.savefig("chart3", dpi = 300, bbox_inches = "tight")
# detrend the data
= us_cookie_ma.copy()
us_cookie_detrend
"Detrended"] = us_cookie_detrend["Revenue"] - us_cookie_detrend["2x12_MA"]
us_cookie_detrend[
print(us_cookie_detrend.head(20))
# plot the detrended data
=(10,6)
plt.figsize"Detrended"], label = "Detrended Revenue", color = "purple")
plt.plot(us_cookie_detrend.index, us_cookie_detrend["United States - Cookie Revenue Detrended")
plt.title("Date")
plt.xlabel("Revenue")
plt.ylabel(True)
plt.grid(
plt.legend()
= plt.gca()
ax "top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines[
# make date easier to read
plt.gcf().autofmt_xdate()
# save
"chart4", dpi = 300, bbox_inches = "tight") plt.savefig(
# calculate the seasonal component
= us_cookie_detrend.copy()
us_cookie_seasonal "Seasonal"] = us_cookie_seasonal["Detrended"].groupby(us_cookie_seasonal.index.month).transform("mean")
us_cookie_seasonal[
# divide by mean across all months to have it sum to zero
"Seasonal"] = us_cookie_seasonal["Seasonal"] / us_cookie_seasonal["Seasonal"].mean() us_cookie_seasonal[
# plot the seasonal component
=(10,6))
plt.figure(figsize"Seasonal"], label = "Seasonal Component", color = "green")
plt.plot(us_cookie_seasonal.index, us_cookie_seasonal["United States - Cookie Revenue Seasonality")
plt.title("Date")
plt.xlabel("Revenue")
plt.ylabel(True)
plt.grid(
plt.legend()
= plt.gca()
ax "top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines[
# save
# plt.savefig("chart5", dpi = 300, bbox_inches = "tight")
# calculate the residual component
= us_cookie_seasonal.copy()
us_cookie_residual "Residual"] = us_cookie_residual["Detrended"] - us_cookie_residual["Seasonal"]
us_cookie_residual[
# plot the residual component
=(10,6))
plt.figure(figsize"Residual"], label = "Residual Component", color = "orange")
plt.plot(us_cookie_residual.index, us_cookie_residual["United States - Cookie Revenue Residual")
plt.title("Date")
plt.xlabel("Revenue")
plt.ylabel(True)
plt.grid(
plt.legend()
= plt.gca()
ax "top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines[
# save
# plt.savefig("chart6", dpi = 300, bbox_inches = "tight")
# STL decomposition
from statsmodels.tsa.seasonal import STL
= STL(us_cookie_raw["Revenue"], seasonal = 13, period = 12)
stl = stl.fit()
res
# plot the decomposition in a stacked chart
= plt.subplots(4, 1, figsize = (10, 8))
fig, (ax1, ax2, ax3, ax4)
# original data
= "Original", color = "blue")
ax1.plot(res.observed, label "Original Data")
ax1.set_title("Revenue")
ax1.set_ylabel(True)
ax1.grid(
ax1.legend()
# trend
= "Trend", color = "red")
ax2.plot(res.trend, label "Trend")
ax2.set_title("Revenue")
ax2.set_ylabel(True)
ax2.grid(
ax2.legend()
# seasonal
= "Seasonal", color = "green")
ax3.plot(res.seasonal, label "Seasonal")
ax3.set_title("Revenue")
ax3.set_ylabel(True)
ax3.grid(
ax3.legend()
# residual
= "Residual", color = "orange")
ax4.plot(res.resid, label "Residual")
ax4.set_title("Date")
ax4.set_xlabel("Revenue")
ax4.set_ylabel(True)
ax4.grid(
ax4.legend()
# Formatting and final touches
"Date")
plt.xlabel(
plt.tight_layout()
# save
# plt.savefig("chart7", dpi = 300, bbox_inches = "tight")