Python Code

# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import boxcox
from scipy.special import inv_boxcox
# create a simple monthly time series with a linear trend and no noise
np.random.seed(0)
n = 100
x = np.arange(n)
y = 0.5 * x
df = pd.DataFrame({'x': x, 'y': y})

# plot the time series
plt.figure(figsize=(10, 6))
plt.plot(df['x'], df['y'])
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Simple Linear Time Series')

# save the plot
# plt.savefig("chart1", dpi = 300, bbox_inches = "tight")
# read data
data_raw = pd.read_csv("../posts/2024-10-02-ts-fundamentals-whats-a-time-series/example_ts_data.csv")

data_raw = (
    # select columns
    data_raw[["Country", "Product", "Date", "Revenue"]]
    # change data types
    .assign(
        Date = pd.to_datetime(data_raw["Date"]), 
        Revenue = pd.to_numeric(data_raw["Revenue"])
    )
)

# print the first few rows
print(data_raw.head())
# filter on specific series
cd_ic_raw = data_raw[(data_raw["Country"] == "Canada") & (data_raw["Product"] == "Ice Cream")]

cd_ic_raw.set_index("Date", inplace=True)

print(cd_ic_raw.head())

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(cd_ic_raw.index, cd_ic_raw["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.title("Ice Cream Revenue in Canada")
plt.legend()

# save the plot
# plt.savefig("chart2", dpi = 300, bbox_inches = "tight")
# copy data
cd_ic_bc = cd_ic_raw.copy()

# apply Box-Cox transformation with lambda = 0
cd_ic_bc["Revenue"]= boxcox(x = cd_ic_bc["Revenue"], lmbda = 0.0)

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(cd_ic_bc.index, cd_ic_bc["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Transformed Revenue")
plt.title("Box-Cox Transformed Data (Lambda = 0)")
plt.legend()

# save the plot
# plt.savefig("chart3", dpi = 300, bbox_inches = "tight")
# copy data
cd_ic_bc = cd_ic_raw.copy()

# apply Box-Cox transformation with lambda = 0.5
cd_ic_bc["Revenue"]= boxcox(x = cd_ic_bc["Revenue"], lmbda = 0.5)

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(cd_ic_bc.index, cd_ic_bc["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Transformed Revenue")
plt.title("Box-Cox Transformed Data (Lambda = 0.5)")
plt.legend()

# save the plot
# plt.savefig("chart4", dpi = 300, bbox_inches = "tight")
# copy data
cd_ic_bc = cd_ic_raw.copy()

# set random seed for reproducibility
np.random.seed(100)

# apply Box-Cox transformation with lambda = None
cd_ic_bc["Revenue"], lambda_ = boxcox(x = cd_ic_bc["Revenue"])
print(lambda_)

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(cd_ic_bc.index, cd_ic_bc["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Transformed Revenue")
plt.title("Box-Cox Transformed Data (Estimated Lambda = -0.0017)")
plt.legend()

# save the plot
# plt.savefig("chart5", dpi = 300, bbox_inches = "tight")
# inverse Box-Cox transformation
cd_ic_bc["Revenue"] = inv_boxcox(cd_ic_bc["Revenue"], lambda_)
print(cd_ic_bc.head())

# plot the data
plt.figure(figsize=(10, 6))
plt.plot(cd_ic_bc.index, cd_ic_bc["Revenue"], label="Ice Cream Revenue")
plt.xlabel("Date")
plt.ylabel("Revenue")
plt.title("Ice Cream Revenue in Canada (Original Scale)")
plt.legend()