I wrote the following fast API code to train a classification model: However, all hyperparameters are being received as strings, even when they should be integers, floats, or booleans.
Fast API Code:
class Parameter(BaseModel):
key: str
selectedValue: Union[str, int, float, bool]
class Model(BaseModel):
modelCategory: str
modelName: str
parameters: Optional[List[Parameter]] = None
class HyperparameterTuningRequest(BaseModel):
models: List[Model]
@app.post("/tune_hyperparameters/")
def hyperparameter_tuning(request: HyperparameterTuningRequest):
for model in request.models:
if model.parameters:
for param in model.parameters:
print(f"Parameter Key: {param.key}")
print(f"Parameter Value: {param.selectedValue}")
print(f"Parameter Type: {type(param.selectedValue)}") # Always prints <class 'str'>
classification code:
#Boolean conversion
def convert_to_bool(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() == 'true'
if isinstance(value, int):
return bool(value)
return False
def convert_to_float(value, param_name, default_value):
try:
float_value = float(value)
if param_name == 'l1_ratio':
if 0 <= float_value <= 1:
return float_value
return default_value
elif float_value <= 0:
return default_value
return float_value
except (ValueError, TypeError):
return default_value
elif modelCategory == "Classification" and "Random Forest" in modelName:
if trial is not None: # Optuna mode
# All suggestions must be independent
params = {
'n_estimators': trial.suggest_int('n_estimators', 50, 300),
'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
'max_depth': trial.suggest_int('max_depth', 1, 64),
'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
'oob_score': trial.suggest_categorical('oob_score', [True, False]),
'random_state': trial.suggest_int('random_state', 1, 1000),
'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
'min_impurity_decrease': trial.suggest_float('min_impurity_decrease', 0.0, 0.5),
'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
'n_jobs': -1 # Fixed value for parallelization
}
return params
else: # Manual mode
# Default hyperparameters
default_params = {
'n_estimators': 100,
'criterion': 'gini',
'max_depth': None,
'min_samples_split': 2,
'min_samples_leaf': 1,
'min_weight_fraction_leaf': 0.0,
'max_features': 'sqrt',
'max_leaf_nodes': None,
'min_impurity_decrease': 0.0,
'bootstrap': True,
'oob_score': False,
'random_state': 42,
'warm_start': False,
'class_weight': 'balanced',
'n_jobs': -1
}
hyperparams = default_params.copy()
if parameters:
for key, value in parameters.items():
if value is not None and value != "null":
param_value = value.get('value') if isinstance(value, dict) else value
# Special handling for null/None values
if param_value == "null" or param_value == "None":
hyperparams[key] = None
continue
if parameters:
for key, value in parameters.items():
if value is not None and value != "null":
param_value = value.get('value') if isinstance(value, dict) else value
# Special handling for null/None values
if param_value == "null":
hyperparams[key] = None
continue
if param_value is not None:
if key in ['bootstrap', 'oob_score', 'warm_start']:
hyperparams[key] = convert_to_bool(param_value)
elif key in ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'n_jobs']:
hyperparams[key] = convert_to_int(param_value) #if param_value != "null" else None
elif key in ['min_weight_fraction_leaf', 'min_impurity_decrease']:
hyperparams[key] = convert_to_float(param_value, key, default_params[key])
elif key == 'random_state':
hyperparams[key] = None if param_value == "null" else convert_to_int(param_value)
else:
hyperparams[key] = param_value
# Validate parameters
if hyperparams['n_estimators'] is not None and hyperparams['n_estimators'] < 1:
raise ValueError("n_estimators must be greater than 0")
if hyperparams['max_depth'] is not None and hyperparams['max_depth'] < 1:
raise ValueError("max_depth must be greater than 0")
if hyperparams['min_samples_split'] < 2:
raise ValueError("min_samples_split must be greater than or equal to 2")
if hyperparams['min_samples_leaf'] < 1:
raise ValueError("min_samples_leaf must be greater than or equal to 1")
if not 0.0 <= hyperparams['min_weight_fraction_leaf'] <= 0.5:
raise ValueError("min_weight_fraction_leaf must be between 0.0 and 0.5")
if hyperparams['criterion'] not in ['gini', 'entropy']:
raise ValueError("criterion must be 'gini' or 'entropy'")
if hyperparams['max_features'] not in [None, 'sqrt', 'log2'] and not isinstance(hyperparams['max_features'], (int, float)):
raise ValueError("max_features must be None, 'sqrt', 'log2', int or float")
# print("THE CHOSEN Random Forest HPs areeeee:", hyperparams)
return hyperparams
I'm explicitly converting each hyperparameter to it's actual data type, but when I check the data types of the HPs fetched from the postman payload, it shows all the values as strings. Can someone help me figure out the issue?
Here's my payload:
"parameters":
{
"key": "n_estimators",
"selectedValue": 100,
"_id": "67a608e3a069a0fdf9096468"
},
{
"key": "criterion",
"selectedValue": "gini",
"_id": "67a608e3a069a0fdf9096469"
},
{
"key": "max_depth",
"selectedValue": "None",
"_id": "67a608e3a069a0fdf909646a"
},
{
"key": "min_samples_split",
"selectedValue": 2,
"_id": "67a608e3a069a0fdf909646b"
},
{
"key": "min_samples_leaf",
"selectedValue": 1,
"_id": "67a608e3a069a0fdf909646c"
},
{
"key": "min_weight_fraction_leaf",
"selectedValue": 0,
"_id": "67a608e3a069a0fdf909646d"
},
{
"key": "max_features",
"selectedValue": "sqrt",
"_id": "67a608e3a069a0fdf909646e"
},
{
"key": "max_leaf_nodes",
"selectedValue": "None",
"_id": "67a608e3a069a0fdf909646f"
},
{
"key": "min_impurity_decrease",
"selectedValue": 0,
"_id": "67a608e3a069a0fdf9096470"
},
{
"key": "bootstrap",
"selectedValue": "True",
"_id": "67a608e3a069a0fdf9096471"
},
{
"key": "oob_score",
"selectedValue": "False",
"_id": "67a608e3a069a0fdf9096472"
},
{
"key": "n_jobs",
"selectedValue": -1,
"_id": "67a608e3a069a0fdf9096473"
},
{
"key": "random_state",
"selectedValue": 42,
"_id": "67a608e3a069a0fdf9096474"
},
{
"key": "warm_start",
"selectedValue": "False",
"_id": "67a608e3a069a0fdf9096475"
},
{
"key": "class_weight",
"selectedValue": "balanced",
"_id": "67a608e3a069a0fdf9096476"
}
Output for the print staements:
model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>...
I wrote the following fast API code to train a classification model: However, all hyperparameters are being received as strings, even when they should be integers, floats, or booleans.
Fast API Code:
class Parameter(BaseModel):
key: str
selectedValue: Union[str, int, float, bool]
class Model(BaseModel):
modelCategory: str
modelName: str
parameters: Optional[List[Parameter]] = None
class HyperparameterTuningRequest(BaseModel):
models: List[Model]
@app.post("/tune_hyperparameters/")
def hyperparameter_tuning(request: HyperparameterTuningRequest):
for model in request.models:
if model.parameters:
for param in model.parameters:
print(f"Parameter Key: {param.key}")
print(f"Parameter Value: {param.selectedValue}")
print(f"Parameter Type: {type(param.selectedValue)}") # Always prints <class 'str'>
classification code:
#Boolean conversion
def convert_to_bool(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() == 'true'
if isinstance(value, int):
return bool(value)
return False
def convert_to_float(value, param_name, default_value):
try:
float_value = float(value)
if param_name == 'l1_ratio':
if 0 <= float_value <= 1:
return float_value
return default_value
elif float_value <= 0:
return default_value
return float_value
except (ValueError, TypeError):
return default_value
elif modelCategory == "Classification" and "Random Forest" in modelName:
if trial is not None: # Optuna mode
# All suggestions must be independent
params = {
'n_estimators': trial.suggest_int('n_estimators', 50, 300),
'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
'max_depth': trial.suggest_int('max_depth', 1, 64),
'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
'oob_score': trial.suggest_categorical('oob_score', [True, False]),
'random_state': trial.suggest_int('random_state', 1, 1000),
'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
'min_impurity_decrease': trial.suggest_float('min_impurity_decrease', 0.0, 0.5),
'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
'n_jobs': -1 # Fixed value for parallelization
}
return params
else: # Manual mode
# Default hyperparameters
default_params = {
'n_estimators': 100,
'criterion': 'gini',
'max_depth': None,
'min_samples_split': 2,
'min_samples_leaf': 1,
'min_weight_fraction_leaf': 0.0,
'max_features': 'sqrt',
'max_leaf_nodes': None,
'min_impurity_decrease': 0.0,
'bootstrap': True,
'oob_score': False,
'random_state': 42,
'warm_start': False,
'class_weight': 'balanced',
'n_jobs': -1
}
hyperparams = default_params.copy()
if parameters:
for key, value in parameters.items():
if value is not None and value != "null":
param_value = value.get('value') if isinstance(value, dict) else value
# Special handling for null/None values
if param_value == "null" or param_value == "None":
hyperparams[key] = None
continue
if parameters:
for key, value in parameters.items():
if value is not None and value != "null":
param_value = value.get('value') if isinstance(value, dict) else value
# Special handling for null/None values
if param_value == "null":
hyperparams[key] = None
continue
if param_value is not None:
if key in ['bootstrap', 'oob_score', 'warm_start']:
hyperparams[key] = convert_to_bool(param_value)
elif key in ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'n_jobs']:
hyperparams[key] = convert_to_int(param_value) #if param_value != "null" else None
elif key in ['min_weight_fraction_leaf', 'min_impurity_decrease']:
hyperparams[key] = convert_to_float(param_value, key, default_params[key])
elif key == 'random_state':
hyperparams[key] = None if param_value == "null" else convert_to_int(param_value)
else:
hyperparams[key] = param_value
# Validate parameters
if hyperparams['n_estimators'] is not None and hyperparams['n_estimators'] < 1:
raise ValueError("n_estimators must be greater than 0")
if hyperparams['max_depth'] is not None and hyperparams['max_depth'] < 1:
raise ValueError("max_depth must be greater than 0")
if hyperparams['min_samples_split'] < 2:
raise ValueError("min_samples_split must be greater than or equal to 2")
if hyperparams['min_samples_leaf'] < 1:
raise ValueError("min_samples_leaf must be greater than or equal to 1")
if not 0.0 <= hyperparams['min_weight_fraction_leaf'] <= 0.5:
raise ValueError("min_weight_fraction_leaf must be between 0.0 and 0.5")
if hyperparams['criterion'] not in ['gini', 'entropy']:
raise ValueError("criterion must be 'gini' or 'entropy'")
if hyperparams['max_features'] not in [None, 'sqrt', 'log2'] and not isinstance(hyperparams['max_features'], (int, float)):
raise ValueError("max_features must be None, 'sqrt', 'log2', int or float")
# print("THE CHOSEN Random Forest HPs areeeee:", hyperparams)
return hyperparams
I'm explicitly converting each hyperparameter to it's actual data type, but when I check the data types of the HPs fetched from the postman payload, it shows all the values as strings. Can someone help me figure out the issue?
Here's my payload:
"parameters":
{
"key": "n_estimators",
"selectedValue": 100,
"_id": "67a608e3a069a0fdf9096468"
},
{
"key": "criterion",
"selectedValue": "gini",
"_id": "67a608e3a069a0fdf9096469"
},
{
"key": "max_depth",
"selectedValue": "None",
"_id": "67a608e3a069a0fdf909646a"
},
{
"key": "min_samples_split",
"selectedValue": 2,
"_id": "67a608e3a069a0fdf909646b"
},
{
"key": "min_samples_leaf",
"selectedValue": 1,
"_id": "67a608e3a069a0fdf909646c"
},
{
"key": "min_weight_fraction_leaf",
"selectedValue": 0,
"_id": "67a608e3a069a0fdf909646d"
},
{
"key": "max_features",
"selectedValue": "sqrt",
"_id": "67a608e3a069a0fdf909646e"
},
{
"key": "max_leaf_nodes",
"selectedValue": "None",
"_id": "67a608e3a069a0fdf909646f"
},
{
"key": "min_impurity_decrease",
"selectedValue": 0,
"_id": "67a608e3a069a0fdf9096470"
},
{
"key": "bootstrap",
"selectedValue": "True",
"_id": "67a608e3a069a0fdf9096471"
},
{
"key": "oob_score",
"selectedValue": "False",
"_id": "67a608e3a069a0fdf9096472"
},
{
"key": "n_jobs",
"selectedValue": -1,
"_id": "67a608e3a069a0fdf9096473"
},
{
"key": "random_state",
"selectedValue": 42,
"_id": "67a608e3a069a0fdf9096474"
},
{
"key": "warm_start",
"selectedValue": "False",
"_id": "67a608e3a069a0fdf9096475"
},
{
"key": "class_weight",
"selectedValue": "balanced",
"_id": "67a608e3a069a0fdf9096476"
}
Output for the print staements:
model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>...
Share
Improve this question
edited Feb 17 at 9:29
Apoorva
asked Feb 16 at 21:18
ApoorvaApoorva
1151 silver badge9 bronze badges
1
- I have been facing this issue with multiple models. So gave the output from one of those models. Updated my code for clarity. – Apoorva Commented Feb 17 at 9:32
1 Answer
Reset to default 0I fixed the issue by explicitly converting the parameters in the Fast API code:
# Convert selectedValue to the correct dtype
def convert_type(self):
val = self.selectedValue
# Convert boolean strings
if isinstance(val, str):
if val.lower() == "true":
return True
elif val.lower() == "false":
return False
# Convert to int or float
try:
if "." in val:
return float(val) # Convert to float
return int(val) # Convert to int
except ValueError:
return val # Keep as string if conversion fails
return val # Return as is if already correct
...
for model in request.models:
try:
for param in model.parameters:
param.selectedValue = param.convert_type() # Apply the explicit conversion
# Convert parameter values to correct dtypes
model_params = {
param.key: param.convert_type() for param in model.parameters
} if model.parameters else {}
print("model name isssssssssssssssssss",model.modelName)
if model.parameters:
for i in model.parameters:
print(f"Parameter Keyyyyyy: {i.key}")
print(f"Parameter Valueeeee: {i.selectedValue}")
print(f"Parameter Value Typeeee: {type(i.selectedValue)}")
output
model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'int'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: min_samples_split
Parameter Valueeeee: 2
Parameter Value Typeeee: <class 'int'>
Parameter Keyyyyyy: min_samples_leaf
Parameter Valueeeee: 1
Parameter Value Typeeee: <class 'int'>
....