Python Code

# import libraries
import pandas as pd
import matplotlib.pyplot as plt
# read data
data_raw = pd.read_csv("../posts/2024-10-02-ts-fundamentals-whats-a-time-series/example_ts_data.csv")

data_raw = (
    # select columns
    data_raw[["Country", "Product", "Date", "Revenue"]]
    # change data types
    .assign(
        Date = pd.to_datetime(data_raw["Date"]), 
        Revenue = pd.to_numeric(data_raw["Revenue"])
    )
)

# print the first few rows
print(data_raw.head())
# filter on specific series
us_cookie_raw = data_raw[(data_raw["Country"] == "United States") & (data_raw["Product"] == "Cookies")]

us_cookie_raw.set_index("Date", inplace=True)

print(us_cookie_raw.head())
# plot the data
plt.figure(figsize=(10,6))
plt.plot(us_cookie_raw.index, us_cookie_raw["Revenue"], label = "Cookies Revenue", color = "blue")
plt.title("US Cookie Revenue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.grid(True)
# calculate the 2x12 month moving average
us_cookie_ma = us_cookie_raw.copy()

us_cookie_ma["2x12_MA"] = us_cookie_ma["Revenue"].rolling(window = 12, center = True).mean().rolling(window=2, center = True).mean()

print(us_cookie_ma.head())
# plot the moving average
plt.figure(figsize=(10,6))
plt.plot(us_cookie_ma.index, us_cookie_ma["Revenue"], label = "Original Revenue", color = "blue")
plt.plot(us_cookie_ma.index, us_cookie_ma["2x12_MA"], label = "2x12 Moving Average", color = "red")

plt.title("United States - Cookie Revenue Trend")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.grid(True)
plt.legend()

ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# save the plot
# plt.savefig("chart3", dpi = 300, bbox_inches = "tight")
# detrend the data
us_cookie_detrend = us_cookie_ma.copy()

us_cookie_detrend["Detrended"] = us_cookie_detrend["Revenue"] - us_cookie_detrend["2x12_MA"]

print(us_cookie_detrend.head(20))
# plot the detrended data
plt.figsize=(10,6)
plt.plot(us_cookie_detrend.index, us_cookie_detrend["Detrended"], label = "Detrended Revenue", color = "purple")
plt.title("United States - Cookie Revenue Detrended")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.grid(True)
plt.legend()

ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# make date easier to read
plt.gcf().autofmt_xdate()

# save 
plt.savefig("chart4", dpi = 300, bbox_inches = "tight")
# calculate the seasonal component
us_cookie_seasonal = us_cookie_detrend.copy()
us_cookie_seasonal["Seasonal"] = us_cookie_seasonal["Detrended"].groupby(us_cookie_seasonal.index.month).transform("mean")

# divide by mean across all months to have it sum to zero
us_cookie_seasonal["Seasonal"] = us_cookie_seasonal["Seasonal"] / us_cookie_seasonal["Seasonal"].mean()
# plot the seasonal component
plt.figure(figsize=(10,6))
plt.plot(us_cookie_seasonal.index, us_cookie_seasonal["Seasonal"], label = "Seasonal Component", color = "green")
plt.title("United States - Cookie Revenue Seasonality")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.grid(True)
plt.legend()

ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# save
# plt.savefig("chart5", dpi = 300, bbox_inches = "tight")
# calculate the residual component
us_cookie_residual = us_cookie_seasonal.copy()
us_cookie_residual["Residual"] = us_cookie_residual["Detrended"] - us_cookie_residual["Seasonal"]

# plot the residual component
plt.figure(figsize=(10,6))
plt.plot(us_cookie_residual.index, us_cookie_residual["Residual"], label = "Residual Component", color = "orange")
plt.title("United States - Cookie Revenue Residual")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.grid(True)
plt.legend()

ax = plt.gca()
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)

# save
# plt.savefig("chart6", dpi = 300, bbox_inches = "tight")
# STL decomposition
from statsmodels.tsa.seasonal import STL

stl = STL(us_cookie_raw["Revenue"], seasonal = 13, period = 12)
res = stl.fit()

# plot the decomposition in a stacked chart
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize = (10, 8))

# original data
ax1.plot(res.observed, label = "Original", color = "blue")
ax1.set_title("Original Data")
ax1.set_ylabel("Revenue")
ax1.grid(True)
ax1.legend()

# trend
ax2.plot(res.trend, label = "Trend", color = "red")
ax2.set_title("Trend")
ax2.set_ylabel("Revenue")
ax2.grid(True)
ax2.legend()

# seasonal
ax3.plot(res.seasonal, label = "Seasonal", color = "green")
ax3.set_title("Seasonal")
ax3.set_ylabel("Revenue")
ax3.grid(True)
ax3.legend()

# residual
ax4.plot(res.resid, label = "Residual", color = "orange")
ax4.set_title("Residual")
ax4.set_xlabel("Date")
ax4.set_ylabel("Revenue")
ax4.grid(True)
ax4.legend()

# Formatting and final touches
plt.xlabel("Date")
plt.tight_layout()

# save
# plt.savefig("chart7", dpi = 300, bbox_inches = "tight")