最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - All Hyperparameters being fetched Strings instead of their actual data types - Stack Overflow

programmeradmin3浏览0评论

I wrote the following fast API code to train a classification model: However, all hyperparameters are being received as strings, even when they should be integers, floats, or booleans.

Fast API Code:

class Parameter(BaseModel):
    key: str
    selectedValue: Union[str, int, float, bool]

class Model(BaseModel):
    modelCategory: str
    modelName: str
    parameters: Optional[List[Parameter]] = None

class HyperparameterTuningRequest(BaseModel):
    models: List[Model]

@app.post("/tune_hyperparameters/")
def hyperparameter_tuning(request: HyperparameterTuningRequest):
    for model in request.models:
        if model.parameters:
            for param in model.parameters:
                print(f"Parameter Key: {param.key}")
                print(f"Parameter Value: {param.selectedValue}")
                print(f"Parameter Type: {type(param.selectedValue)}")  # Always prints <class 'str'>

classification code:

#Boolean conversion

def convert_to_bool(value):
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        return value.lower() == 'true'
    if isinstance(value, int):
        return bool(value)
    return False

def convert_to_float(value, param_name, default_value):
    try:
        float_value = float(value)
        if param_name == 'l1_ratio':
            if 0 <= float_value <= 1:
                return float_value
            return default_value
        elif float_value <= 0:
            return default_value
        return float_value
    except (ValueError, TypeError):
        return default_value

elif modelCategory == "Classification" and "Random Forest" in modelName:
        if trial is not None:  # Optuna mode
            # All suggestions must be independent
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
                'max_depth': trial.suggest_int('max_depth', 1, 64),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
                'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
                'oob_score': trial.suggest_categorical('oob_score', [True, False]),
                'random_state': trial.suggest_int('random_state', 1, 1000),
                'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
                'min_impurity_decrease': trial.suggest_float('min_impurity_decrease', 0.0, 0.5),
                'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
                'n_jobs': -1  # Fixed value for parallelization
            }
            
            return params
        
        else:  # Manual mode
            # Default hyperparameters
            default_params = {
                'n_estimators': 100,
                'criterion': 'gini',
                'max_depth': None,
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0.0,
                'max_features': 'sqrt',
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0.0,
                'bootstrap': True,
                'oob_score': False,
                'random_state': 42,
                'warm_start': False,
                'class_weight': 'balanced',
                'n_jobs': -1
            }

            hyperparams = default_params.copy()
            
            if parameters:
                for key, value in parameters.items():
                    if value is not None and value != "null":
                        param_value = value.get('value') if isinstance(value, dict) else value
                        
                        # Special handling for null/None values
                        if param_value == "null" or param_value == "None":
                            hyperparams[key] = None
                            continue

                        if parameters:
                            for key, value in parameters.items():
                                if value is not None and value != "null":
                                    param_value = value.get('value') if isinstance(value, dict) else value
                                    
                                    # Special handling for null/None values
                                    if param_value == "null":
                                        hyperparams[key] = None
                                        continue


                        if param_value is not None:
                            if key in ['bootstrap', 'oob_score', 'warm_start']:
                                hyperparams[key] = convert_to_bool(param_value)
                            elif key in ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'n_jobs']:
                                hyperparams[key] = convert_to_int(param_value) #if param_value != "null" else None
                            elif key in ['min_weight_fraction_leaf', 'min_impurity_decrease']:
                                hyperparams[key] = convert_to_float(param_value, key, default_params[key])
                            elif key == 'random_state':
                                hyperparams[key] = None if param_value == "null" else convert_to_int(param_value)
                            else:
                                hyperparams[key] = param_value

            # Validate parameters
            if hyperparams['n_estimators'] is not None and hyperparams['n_estimators'] < 1:
                raise ValueError("n_estimators must be greater than 0")
            
            if hyperparams['max_depth'] is not None and hyperparams['max_depth'] < 1:
                raise ValueError("max_depth must be greater than 0")
            
            if hyperparams['min_samples_split'] < 2:
                raise ValueError("min_samples_split must be greater than or equal to 2")
            
            if hyperparams['min_samples_leaf'] < 1:
                raise ValueError("min_samples_leaf must be greater than or equal to 1")
            
            if not 0.0 <= hyperparams['min_weight_fraction_leaf'] <= 0.5:
                raise ValueError("min_weight_fraction_leaf must be between 0.0 and 0.5")
            
            if hyperparams['criterion'] not in ['gini', 'entropy']:
                raise ValueError("criterion must be 'gini' or 'entropy'")
            
            if hyperparams['max_features'] not in [None, 'sqrt', 'log2'] and not isinstance(hyperparams['max_features'], (int, float)):
                raise ValueError("max_features must be None, 'sqrt', 'log2', int or float")

            # print("THE CHOSEN Random Forest HPs areeeee:", hyperparams)
            return hyperparams

I'm explicitly converting each hyperparameter to it's actual data type, but when I check the data types of the HPs fetched from the postman payload, it shows all the values as strings. Can someone help me figure out the issue?

Here's my payload:

"parameters": 
{
                            "key": "n_estimators",
                            "selectedValue": 100,
                            "_id": "67a608e3a069a0fdf9096468"
                        },
                        {
                            "key": "criterion",
                            "selectedValue": "gini",
                            "_id": "67a608e3a069a0fdf9096469"
                        },
                        {
                            "key": "max_depth",
                            "selectedValue": "None",
                            "_id": "67a608e3a069a0fdf909646a"
                        },
                        {
                            "key": "min_samples_split",
                            "selectedValue": 2,
                            "_id": "67a608e3a069a0fdf909646b"
                        },
                        {
                            "key": "min_samples_leaf",
                            "selectedValue": 1,
                            "_id": "67a608e3a069a0fdf909646c"
                        },
                        {
                            "key": "min_weight_fraction_leaf",
                            "selectedValue": 0,
                            "_id": "67a608e3a069a0fdf909646d"
                        },
                        {
                            "key": "max_features",
                            "selectedValue": "sqrt",
                            "_id": "67a608e3a069a0fdf909646e"
                        },
                        {
                            "key": "max_leaf_nodes",
                            "selectedValue": "None",
                            "_id": "67a608e3a069a0fdf909646f"
                        },
                        {
                            "key": "min_impurity_decrease",
                            "selectedValue": 0,
                            "_id": "67a608e3a069a0fdf9096470"
                        },
                        {
                            "key": "bootstrap",
                            "selectedValue": "True",
                            "_id": "67a608e3a069a0fdf9096471"
                        },
                        {
                            "key": "oob_score",
                            "selectedValue": "False",
                            "_id": "67a608e3a069a0fdf9096472"
                        },
                        {
                            "key": "n_jobs",
                            "selectedValue": -1,
                            "_id": "67a608e3a069a0fdf9096473"
                        },
                        {
                            "key": "random_state",
                            "selectedValue": 42,
                            "_id": "67a608e3a069a0fdf9096474"
                        },
                        {
                            "key": "warm_start",
                            "selectedValue": "False",
                            "_id": "67a608e3a069a0fdf9096475"
                        },
                        {
                            "key": "class_weight",
                            "selectedValue": "balanced",
                            "_id": "67a608e3a069a0fdf9096476"
                        }

Output for the print staements:

model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>...

I wrote the following fast API code to train a classification model: However, all hyperparameters are being received as strings, even when they should be integers, floats, or booleans.

Fast API Code:

class Parameter(BaseModel):
    key: str
    selectedValue: Union[str, int, float, bool]

class Model(BaseModel):
    modelCategory: str
    modelName: str
    parameters: Optional[List[Parameter]] = None

class HyperparameterTuningRequest(BaseModel):
    models: List[Model]

@app.post("/tune_hyperparameters/")
def hyperparameter_tuning(request: HyperparameterTuningRequest):
    for model in request.models:
        if model.parameters:
            for param in model.parameters:
                print(f"Parameter Key: {param.key}")
                print(f"Parameter Value: {param.selectedValue}")
                print(f"Parameter Type: {type(param.selectedValue)}")  # Always prints <class 'str'>

classification code:

#Boolean conversion

def convert_to_bool(value):
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        return value.lower() == 'true'
    if isinstance(value, int):
        return bool(value)
    return False

def convert_to_float(value, param_name, default_value):
    try:
        float_value = float(value)
        if param_name == 'l1_ratio':
            if 0 <= float_value <= 1:
                return float_value
            return default_value
        elif float_value <= 0:
            return default_value
        return float_value
    except (ValueError, TypeError):
        return default_value

elif modelCategory == "Classification" and "Random Forest" in modelName:
        if trial is not None:  # Optuna mode
            # All suggestions must be independent
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
                'max_depth': trial.suggest_int('max_depth', 1, 64),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2']),
                'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
                'oob_score': trial.suggest_categorical('oob_score', [True, False]),
                'random_state': trial.suggest_int('random_state', 1, 1000),
                'min_weight_fraction_leaf': trial.suggest_float('min_weight_fraction_leaf', 0.0, 0.5),
                'min_impurity_decrease': trial.suggest_float('min_impurity_decrease', 0.0, 0.5),
                'class_weight': trial.suggest_categorical('class_weight', ['balanced', 'balanced_subsample', None]),
                'n_jobs': -1  # Fixed value for parallelization
            }
            
            return params
        
        else:  # Manual mode
            # Default hyperparameters
            default_params = {
                'n_estimators': 100,
                'criterion': 'gini',
                'max_depth': None,
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0.0,
                'max_features': 'sqrt',
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0.0,
                'bootstrap': True,
                'oob_score': False,
                'random_state': 42,
                'warm_start': False,
                'class_weight': 'balanced',
                'n_jobs': -1
            }

            hyperparams = default_params.copy()
            
            if parameters:
                for key, value in parameters.items():
                    if value is not None and value != "null":
                        param_value = value.get('value') if isinstance(value, dict) else value
                        
                        # Special handling for null/None values
                        if param_value == "null" or param_value == "None":
                            hyperparams[key] = None
                            continue

                        if parameters:
                            for key, value in parameters.items():
                                if value is not None and value != "null":
                                    param_value = value.get('value') if isinstance(value, dict) else value
                                    
                                    # Special handling for null/None values
                                    if param_value == "null":
                                        hyperparams[key] = None
                                        continue


                        if param_value is not None:
                            if key in ['bootstrap', 'oob_score', 'warm_start']:
                                hyperparams[key] = convert_to_bool(param_value)
                            elif key in ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_leaf_nodes', 'n_jobs']:
                                hyperparams[key] = convert_to_int(param_value) #if param_value != "null" else None
                            elif key in ['min_weight_fraction_leaf', 'min_impurity_decrease']:
                                hyperparams[key] = convert_to_float(param_value, key, default_params[key])
                            elif key == 'random_state':
                                hyperparams[key] = None if param_value == "null" else convert_to_int(param_value)
                            else:
                                hyperparams[key] = param_value

            # Validate parameters
            if hyperparams['n_estimators'] is not None and hyperparams['n_estimators'] < 1:
                raise ValueError("n_estimators must be greater than 0")
            
            if hyperparams['max_depth'] is not None and hyperparams['max_depth'] < 1:
                raise ValueError("max_depth must be greater than 0")
            
            if hyperparams['min_samples_split'] < 2:
                raise ValueError("min_samples_split must be greater than or equal to 2")
            
            if hyperparams['min_samples_leaf'] < 1:
                raise ValueError("min_samples_leaf must be greater than or equal to 1")
            
            if not 0.0 <= hyperparams['min_weight_fraction_leaf'] <= 0.5:
                raise ValueError("min_weight_fraction_leaf must be between 0.0 and 0.5")
            
            if hyperparams['criterion'] not in ['gini', 'entropy']:
                raise ValueError("criterion must be 'gini' or 'entropy'")
            
            if hyperparams['max_features'] not in [None, 'sqrt', 'log2'] and not isinstance(hyperparams['max_features'], (int, float)):
                raise ValueError("max_features must be None, 'sqrt', 'log2', int or float")

            # print("THE CHOSEN Random Forest HPs areeeee:", hyperparams)
            return hyperparams

I'm explicitly converting each hyperparameter to it's actual data type, but when I check the data types of the HPs fetched from the postman payload, it shows all the values as strings. Can someone help me figure out the issue?

Here's my payload:

"parameters": 
{
                            "key": "n_estimators",
                            "selectedValue": 100,
                            "_id": "67a608e3a069a0fdf9096468"
                        },
                        {
                            "key": "criterion",
                            "selectedValue": "gini",
                            "_id": "67a608e3a069a0fdf9096469"
                        },
                        {
                            "key": "max_depth",
                            "selectedValue": "None",
                            "_id": "67a608e3a069a0fdf909646a"
                        },
                        {
                            "key": "min_samples_split",
                            "selectedValue": 2,
                            "_id": "67a608e3a069a0fdf909646b"
                        },
                        {
                            "key": "min_samples_leaf",
                            "selectedValue": 1,
                            "_id": "67a608e3a069a0fdf909646c"
                        },
                        {
                            "key": "min_weight_fraction_leaf",
                            "selectedValue": 0,
                            "_id": "67a608e3a069a0fdf909646d"
                        },
                        {
                            "key": "max_features",
                            "selectedValue": "sqrt",
                            "_id": "67a608e3a069a0fdf909646e"
                        },
                        {
                            "key": "max_leaf_nodes",
                            "selectedValue": "None",
                            "_id": "67a608e3a069a0fdf909646f"
                        },
                        {
                            "key": "min_impurity_decrease",
                            "selectedValue": 0,
                            "_id": "67a608e3a069a0fdf9096470"
                        },
                        {
                            "key": "bootstrap",
                            "selectedValue": "True",
                            "_id": "67a608e3a069a0fdf9096471"
                        },
                        {
                            "key": "oob_score",
                            "selectedValue": "False",
                            "_id": "67a608e3a069a0fdf9096472"
                        },
                        {
                            "key": "n_jobs",
                            "selectedValue": -1,
                            "_id": "67a608e3a069a0fdf9096473"
                        },
                        {
                            "key": "random_state",
                            "selectedValue": 42,
                            "_id": "67a608e3a069a0fdf9096474"
                        },
                        {
                            "key": "warm_start",
                            "selectedValue": "False",
                            "_id": "67a608e3a069a0fdf9096475"
                        },
                        {
                            "key": "class_weight",
                            "selectedValue": "balanced",
                            "_id": "67a608e3a069a0fdf9096476"
                        }

Output for the print staements:

model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>...
Share Improve this question edited Feb 17 at 9:29 Apoorva asked Feb 16 at 21:18 ApoorvaApoorva 1151 silver badge9 bronze badges 1
  • I have been facing this issue with multiple models. So gave the output from one of those models. Updated my code for clarity. – Apoorva Commented Feb 17 at 9:32
Add a comment  | 

1 Answer 1

Reset to default 0

I fixed the issue by explicitly converting the parameters in the Fast API code:

# Convert selectedValue to the correct dtype
    def convert_type(self):
        val = self.selectedValue

        # Convert boolean strings
        if isinstance(val, str):
            if val.lower() == "true":
                return True
            elif val.lower() == "false":
                return False

            # Convert to int or float
            try:
                if "." in val:
                    return float(val)  # Convert to float
                return int(val)  # Convert to int
            except ValueError:
                return val  # Keep as string if conversion fails

        return val  # Return as is if already correct

...

for model in request.models:
                    try:

                        for param in model.parameters:
                            param.selectedValue = param.convert_type()  # Apply the explicit conversion 


                        # Convert parameter values to correct dtypes 
                        model_params = {
                            param.key: param.convert_type() for param in model.parameters
                        } if model.parameters else {}

                        print("model name isssssssssssssssssss",model.modelName)
                        if model.parameters:
                                    for i in model.parameters:
                                        print(f"Parameter Keyyyyyy: {i.key}")
                                        print(f"Parameter Valueeeee: {i.selectedValue}")
                                        print(f"Parameter Value Typeeee: {type(i.selectedValue)}")

output

model name isssssssssssssssssss Random Forest v1
Parameter Keyyyyyy: n_estimators
Parameter Valueeeee: 100
Parameter Value Typeeee: <class 'int'>
Parameter Keyyyyyy: criterion
Parameter Valueeeee: gini
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: max_depth
Parameter Valueeeee: None
Parameter Value Typeeee: <class 'str'>
Parameter Keyyyyyy: min_samples_split
Parameter Valueeeee: 2
Parameter Value Typeeee: <class 'int'>
Parameter Keyyyyyy: min_samples_leaf
Parameter Valueeeee: 1
Parameter Value Typeeee: <class 'int'>
....
发布评论

评论列表(0)

  1. 暂无评论