def _execute(): env = Environment( train_dataset=get_breast_cancer_data(), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=StratifiedKFold, cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32), runs=1, ) optimizer = RandomForestOptimization( iterations=100, read_experiments=True, ) optimizer.set_experiment_guidelines( model_initializer=LGBMClassifier, model_init_params=dict(boosting_type=Categorical(['gbdt', 'dart']), num_leaves=Integer(5, 20), max_depth=-1, min_child_samples=5, subsample=0.5), ) optimizer.go()
def opt_xgb_0(): optimizer = RandomForestOptimization(iterations=2, random_state=1337) optimizer.set_experiment_guidelines( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), subsample=0.5, booster=Categorical(["gbtree", "dart"]), ), ) optimizer.go() yield optimizer
experiment_4 = CVExperiment(DecisionTreeClassifier, {}) experiment_5 = CVExperiment(RandomForestClassifier, {}) experiment_6 = CVExperiment(AdaBoostClassifier, {}) experiment_7 = CVExperiment(GradientBoostingClassifier, {}) experiment_8 = CVExperiment(GaussianNB, {}) experiment_9 = CVExperiment(LinearDiscriminantAnalysis, {}) experiment_10 = CVExperiment(QuadraticDiscriminantAnalysis, {}) experiment_11 = CVExperiment(MLPClassifier, {}) # Of course, SKLearn has many more algorithms than those shown here, but I think you get the idea # Notice that in all the above experiments, we gave `CVExperiment` `model_init_params={}`. # Passing an empty dict tells it to use the default hyperparameters for the `model_initializer`, which it'll figure out on its own. #################### 2. Hyperparameter Optimization #################### # We're just going to do optimization on one of the algorithms used above (`AdaBoostClassifier`); # ... although, HyperparameterHunter can certainly do consecutive optimization rounds. # Notice below that `optimizer` correctly identifies `experiment_6` as being the only saved # ... experiment it can learn from because it's optimizing `AdaBoostClassifier`. optimizer = RandomForestOptimization(iterations=12, random_state=42) optimizer.set_experiment_guidelines( model_initializer=AdaBoostClassifier, model_init_params=dict( n_estimators=Integer(25, 100), learning_rate=Real(0.5, 1.0), algorithm=Categorical(["SAMME", "SAMME.R"]), ), ) optimizer.go()
cross_validation_params=dict(n_splits=5, random_state=32), ) # Now that HyperparameterHunter has an active `Environment`, we can do two things: #################### 1. Perform Experiments #################### experiment = CVExperiment( model_initializer=LGBMClassifier, model_init_params=dict(boosting_type="gbdt", num_leaves=31, max_depth=-1, subsample=0.5), ) # And/or... #################### 2. Hyperparameter Optimization #################### optimizer = RandomForestOptimization(iterations=10, random_state=32) optimizer.set_experiment_guidelines( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type=Categorical(["gbdt", "dart"]), num_leaves=Integer(10, 40), max_depth=-1, subsample=Real(0.3, 0.7), ), ) optimizer.go() # Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search # space/guidelines set for `optimizer`. # Then, when optimization is started, it automatically learns from `experiment`'s results
# Now that HyperparameterHunter has an active `Environment`, we can do two things: #################### 1. Perform Experiments #################### experiment = CVExperiment( model_initializer=LGBMClassifier, model_init_params=dict(boosting_type="gbdt", num_leaves=31, max_depth=-1, subsample=0.5), model_extra_params=dict( fit=dict( feature_name=train_df.columns.values[:-1].tolist(), categorical_feature=train_df.columns.values[11:-1].tolist(), ) ), ) # And/or... #################### 2. Hyperparameter Optimization #################### optimizer = RandomForestOptimization(iterations=10, random_state=32) optimizer.set_experiment_guidelines( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type=Categorical(["gbdt", "dart"]), num_leaves=Integer(10, 40), max_depth=-1, subsample=Real(0.3, 0.7), ), model_extra_params=dict( fit=dict( feature_name=train_df.columns.values[:-1].tolist(), categorical_feature=train_df.columns.values[11:-1].tolist(), ) ), )