The model I have created is running smoothly and as expected. The model_output
dispenses a dataframe with the original column "DIFF_BY_DAY" and my predictions "prediction." The issue I am experiencing is when following the Model Evaluation demonstration from Palantir's website. I receive this error:
TypeError: Unable to represent an input/output object of type TransformOutput as a parameter. Please verify that your inputs/outputs conform to this model's API definition.
My goal is to utilize the Model Objective's interface to compute the model's evaluation metrics.
Below is the code in the model's repository (3 scripts):
model_adapter.py scipt:
import palantir_models as pm
from palantir_models_serializers import DillSerializer, JsonSerializer
import pandas as pd
import numpy as np
import datetime
class ExampleModelAdapter(pm.ModelAdapter):
@pm.auto_serialize(
model=DillSerializer(),
#config=JsonSerializer()
)
def __init__(self, model):
self.model = model
#self.config = config
@classmethod
def api(cls):
columns = [
('EVENT_DATE', datetime.datetime),
('SAS_DIFF_BY_DAY', np.float64)
]
# TODO: Edit this method to define the model API.
inputs = {
"df_in": pm.Pandas(columns = columns),
"param_in": pm.Parameter(type=int, default=12)
}
outputs = {
"df_out": pm.Pandas(columns=columns+[("prediction", np.float64)])
}
return inputs, outputs
def predict(self, df_in, param_in):
#forecast_period = 12
df_in = df_in[['EVENT_DATE', 'DIFF_BY_DAY']]
df_in['EVENT_DATE'] = pd.to_datetime(df_in['EVENT_DATE'])
df_in = df_in.set_index('EVENT_DATE')
df_in = df_in.resample('M').mean()
forecast = self.model.get_forecast(steps = int(param_in))#forecast_period
forecast_df = forecast.predicted_mean.to_frame(name = 'prediction')
#print(forecast.conf_int().__dict__)
forecast_df = pd.DataFrame({
'DATE': forecast_df.index,
'prediction': forecast.predicted_mean,
'lower_bound' : forecast.conf_int()['lower SAS_DIFF_BY_DAY'],
'upper_bound' : forecast.conf_int()['upper SAS_DIFF_BY_DAY']
})
forecast_df['DATE'] = pd.to_datetime(forecast_df['DATE'])
df_in = pd.merge(df_in, forecast_df, left_index=True, right_index=True, how = 'inner')
#df_in['DIFF_BY_DAY'] = df_in['DIFF_BY_DAY'].astype(np.float32)
#df_in['prediction'] = df_in['prediction'].astype(np.float32)
print(df_in.dtypes)
#return {"df_out": df_in.reset_index(drop = True)}
return df_in.reset_index(drop = True)
model_training.py script:
from transforms.api import transform, Input
from palantir_models.transforms import ModelOutput
from main.model_adapters.adapter import ExampleModelAdapter
@transform(
training_data_input=Input("/filepath/.../training_dataset"),
model_output=ModelOutput("/filepath/.../Model Name"),
)
def compute(training_data_input, model_output):
training_df = training_data_input.pandas() # Load a pandas dataframe from the TransformsInput
model = train_model(training_df) # Train the model
# Wrap the trained model in a ModelAdapter
foundry_model = ExampleModelAdapter(model) # Edit ExampleModelAdapter for your model
# Publish and write the trained model to Foundry
model_output.publish(
model_adapter=foundry_model
)
def train_model(training_df):
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
training_df = training_df[['DATE', 'DIFF_BY_DAY']]
training_df['DATE'] = pd.to_datetime(training_df['DATE'])
training_df = training_df.set_index('DATE')
training_df = training_df.resample('M').mean()
# Define SARIMA parameters
p, d, q = 1, 1, 1
P, D, Q, s = 1, 1, 1, 12 # Assuming monthly seasonality
# Fit the SARIMA model
model = sm.tsa.statespace.SARIMAX(training_df['DIFF_BY_DAY'], order = (p, d, q), seasonal_order = (P, D, Q, s))
results = model.fit()
return results
run_inference.py script:
from transforms.api import transform, Input, Output
from palantir_models.transforms import ModelInput, ModelOutput
import pandas as pd
@transform(
testing_data_input=Input("/filepath/.../testing_dataset),
model_input=ModelInput("/filepath/.../Model Name'),
predictions_output=ModelOutput("/filepath/.../model_output"),
)
def compute(testing_data_input, model_input, predictions_output):
inference_outputs = model_input.transform(testing_data_input)
print(inference_outputs)
predictions_output.write_pandas(inference_outputs.df_out)