I am trying to implement a forecasting model, and have followed this Medium guide. I have changed the code minimally, to get it working with the most recent version of skforecast (e.g. changing ForecasterAutoreg to ForecasterRecursive). When I use the dataset that they use, everything works, including the end with the exogenous features.
I want to use this implementation to train with my own data, and specifically using exogenous features. When using the code from the guide, but changing the dataframe to have my own data (with the exact same date-range), everything still work, except when I use exogenous features. Then I get the following error:
TypeError: unsupported operand type(s) for +: 'Timestamp' and 'NoneType'
.
This is regarding this codeblock
forecaster_exog = ForecasterAutoreg(
regressor = DecisionTreeRegressor(random_state = 123),
lags = 30
)
# Model Fit
forecaster_exog.fit(y = df_exog.loc[train_start:train_end, 'y'],
exog = df_exog.loc[train_start:train_end, ['exog_1', 'exog_2']]
)
# Model Predict
predicted_test_exog = forecaster_exog.predict(steps = len(df.loc[test_start:test_end]),
exog = df_exog.loc[test_start:test_end, ['exog_1', 'exog_2']])
# Visualize
fig, ax = plt.subplots(figsize=(7, 3))
df.loc[test_start:test_end].plot(ax=ax, label = "Test")
predicted_test_exog.plot(ax=ax, label = 'Predicted DT Exog')
ax.legend()
I have double checked that there are no dates missing in my data, both in the training and the test set. This is the case for the actual dates, the y value, and exog_1 and exog_2. The index is of type pandas.core.indexes.datetimes.DatetimeIndex
.
Does anyone know a feature of the data that might be the issue? Or an edit to the code as a workaround?
The full traceback of the error is below:
File <command-7029261884050460>, line 12
9 # Model Predict
10 # Set the dataset frequency to be (D)aily data
11 df = df.asfreq('D', method = 'bfill')
---> 12 predicted_test_exog = forecaster_exog.predict(steps = len(df.loc[test_start:test_end]) if not df.loc[test_start:test_end].empty else 0,
13 exog = df_exog.loc[test_start:test_end, ['exog_1', 'exog_2']])
15 # Visualize
16 fig, ax = plt.subplots(figsize=(7, 3))
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive.py:1437, in ForecasterRecursive.predict(self, steps, last_window, exog, check_inputs)
1398 def predict(
1399 self,
1400 steps: Union[int, str, pd.Timestamp],
(...)
1403 check_inputs: bool = True
1404 ) -> pd.Series:
1405 """
1406 Predict n steps ahead. It is an recursive process in which, each prediction,
1407 is used as a predictor for the next step.
(...)
1433
1434 """
1436 last_window_values, exog_values, prediction_index, steps = (
-> 1437 self._create_predict_inputs(
1438 steps=steps,
1439 last_window=last_window,
1440 exog=exog,
1441 check_inputs=check_inputs,
1442 )
1443 )
1445 with warnings.catch_warnings():
1446 warnings.filterwarnings(
1447 "ignore",
1448 message="X does not have valid feature names",
1449 category=UserWarning
1450 )
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive.py:1149, in ForecasterRecursive._create_predict_inputs(self, steps, last_window, exog, predict_boot, use_in_sample_residuals, use_binned_residuals, check_inputs)
1142 steps = date_to_index_position(
1143 index = last_window.index,
1144 date_input = steps,
1145 date_literal = 'steps'
1146 )
1148 if check_inputs:
-> 1149 check_predict_input(
1150 forecaster_name = type(self).__name__,
1151 steps = steps,
1152 is_fitted = self.is_fitted,
1153 exog_in_ = self.exog_in_,
1154 index_type_ = self.index_type_,
1155 index_freq_ = self.index_freq_,
1156 window_size = self.window_size,
1157 last_window = last_window,
1158 exog = exog,
1159 exog_type_in_ = self.exog_type_in_,
1160 exog_names_in_ = self.exog_names_in_,
1161 interval = None
1162 )
1164 if predict_boot and not use_in_sample_residuals:
1165 if not use_binned_residuals and self.out_sample_residuals_ is None:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/skforecast/utils/utils.py:1033, in check_predict_input(forecaster_name, steps, is_fitted, exog_in_, index_type_, index_freq_, window_size, last_window, last_window_exog, exog, exog_type_in_, exog_names_in_, interval, alpha, max_steps, levels, levels_forecaster, series_names_in_, encoding)
1027 raise TypeError(
1028 (f"Expected frequency of type {index_freq_} for {exog_name}. "
1029 f"Got {exog_index.freqstr}.")
1030 )
1032 # Check exog starts one step ahead of last_window end.
-> 1033 expected_index = expand_index(last_window.index, 1)[0]
1034 if expected_index != exog_to_check.index[0]:
1035 if forecaster_name in ['ForecasterRecursiveMultiSeries']:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.12/site-packages/skforecast/utils/utils.py:1585, in expand_index(index, steps)
1581 if isinstance(index, pd.Index):
1583 if isinstance(index, pd.DatetimeIndex):
1584 new_index = pd.date_range(
-> 1585 start = index[-1] + index.freq,
1586 periods = steps,
1587 freq = index.freq
1588 )
1589 elif isinstance(index, pd.RangeIndex):
1590 new_index = pd.RangeIndex(
1591 start = index[-1] + 1,
1592 stop = index[-1] + 1 + steps
1593 )