🎪 Come to PyCon Italy (Bologna, May 29-31)! 🍝
domains
where is the (business) value? 💰
take better decisions! 💡
time is the most important dimension
other dimensions (e.g. product, market, …) usually lead to forecasting multiple time series (or multivariate ones)
import polars as pl
url = "https://datasets-nixtla.s3.amazonaws.com/air-passengers.csv"
air = pl.read_csv(url).with_columns(pl.col("ds").str.to_date())
air.head(3)
from datetime import date
cutoff = date(1959, 1, 1)
df = air.with_columns(pl.when(pl.col("ds") < cutoff).then(pl.lit("train"))
.otherwise(pl.lit("test")).alias("dataset"))
px.line(df, x="ds", y="y", color="dataset")
from statsforecast import StatsForecast
from statsforecast.models import Naive, HistoricAverage, WindowAverage, SeasonalNaive
models=[Naive(),HistoricAverage(), WindowAverage(window_size=12),
SeasonalNaive(season_length=12)]
sf = StatsForecast(models=models, freq="MS")
sf.fit(train_df)
predict_df = sf.predict(h=24)
sf.plot(df, predict_df)
from statsforecast.models import AutoARIMA, AutoETS
sf = StatsForecast(
models=[
AutoARIMA(season_length=12),
AutoETS(season_length=12),
], freq="MS")
sf.fit(train_df)
predict_df = sf.predict(h=24, level=[95])
sf.plot(df, predict_df, level=[95])
Note that Nixtla provides for (almost) all its models a probablistic forecast (using levels
keyword argument), either through model specific estimates or with conformal prediction (model agnostic)
from utilsforecast.losses import rmse
cv_df = sf.cross_validation(df = df, h = 24, step_size = 24, n_windows = 3)
rmse(cv_df, models=["AutoARIMA", "AutoETS"])
import lightgbm as lgbm
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences
fcst = MLForecast(
models=[lgbm.LGBMRegressor()],
freq='D',
lags=[7, 14],
lag_transforms={
1: [ExpandingMean()],
7: [RollingMean(window_size=28)]
},
date_features=['dayofweek'],
target_transforms=[Differences([1])],
)
from datasetsforecast.hierarchical import HierarchicalData
from hierarchicalforecast.core import HierarchicalReconciliation
from hierarchicalforecast.methods import BottomUp, TopDown, MiddleOut
# Create timeseries for all levels of the hierarchy
Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')
# ...
Y_train_df, Y_test_df = ...
# Compute base predictions
fcst = StatsForecast(models=[AutoARIMA(season_length=4), freq='QE')
Y_hat_df = fcst.forecast(df=Y_train_df, h=4)
# Reconcile the base predictions
reconcilers = [
BottomUp(),
TopDown(method='forecast_proportions'),
MiddleOut(middle_level='Country/Purpose/State',
top_down_method='forecast_proportions')
]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)
Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, S=S, tags=tags)
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNHITS, AutoLSTM
# ...
horizon = len(Y_test_df)
models = [NBEATS(input_size=2 * horizon, h=horizon, max_steps=100),
NHITS(input_size=2 * horizon, h=horizon, max_steps=100)]
nf = NeuralForecast(models=models, freq='ME')
nf.fit(df=Y_train_df)
Y_hat_df = nf.predict()
plot_series(Y_df, Y_hat_df)
from nixtla import NixtlaClient
nixtla_client = NixtlaClient(api_key = nixtla_api_key)
df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/electricity-short.csv')
fcst_df = nixtla_client.forecast(df, h=24, level=[80, 90])
nixtla_client.plot(df, fcst_df, level=[80, 90])
PyCon Lithuania, Vilnius, 250424