Пример #1
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = CategoricalHyperparameter(
            "loss", ["ls", "lad", "huber", "quantile"], default_value="ls")
        learning_rate = UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
        n_estimators = UniformIntegerHyperparameter(
            "n_estimators", 50, 500, default_value=100)
        max_depth = UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=10, default_value=3)
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2, log=False)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1, log=False)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(
            name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False)
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0, default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(
            name="max_leaf_nodes", value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        alpha = UniformFloatHyperparameter(
            "alpha", lower=0.75, upper=0.99, default_value=0.9)

        cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth,
                                min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, subsample, max_features,
                                max_leaf_nodes, min_impurity_decrease, alpha])

        cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile']))
        return cs
Пример #2
0
def get_cs():
    cs = ConfigurationSpace()
    criterion = CategoricalHyperparameter(
        "criterion", ["gini", "entropy"], default_value="gini")

    # The maximum number of features used in the forest is calculated as m^max_features, where
    # m is the total number of features, and max_features is the hyperparameter specified below.
    # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
    # corresponds with Geurts' heuristic.
    max_features = UniformFloatHyperparameter(
        "max_features", 0., 1., default_value=0.5)

    max_depth = UnParametrizedHyperparameter("max_depth", "None")
    min_samples_split = UniformIntegerHyperparameter(
        "min_samples_split", 2, 20, default_value=2)
    min_samples_leaf = UniformIntegerHyperparameter(
        "min_samples_leaf", 1, 20, default_value=1)
    min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
    max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
    min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
    bootstrap = CategoricalHyperparameter(
        "bootstrap", ["True", "False"], default_value="True")
    cs.add_hyperparameters([criterion, max_features,
                            max_depth, min_samples_split, min_samples_leaf,
                            min_weight_fraction_leaf, max_leaf_nodes,
                            bootstrap, min_impurity_decrease])
    return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = CategoricalHyperparameter(
            "criterion", ["gini", "entropy"], default_value="gini")
        max_features = UniformFloatHyperparameter("max_features", 0, 1,
                                                  default_value=0.5, q=0.05)

        max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        min_samples_split = UniformIntegerHyperparameter(
            "min_samples_split", 2, 20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            "min_samples_leaf", 1, 20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            'min_weight_fraction_leaf', 0.)
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.)

        bootstrap = CategoricalHyperparameter(
            "bootstrap", ["True", "False"], default_value="False")

        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, max_leaf_nodes, min_samples_split,
                                min_samples_leaf, min_weight_fraction_leaf,
                                min_impurity_decrease, bootstrap])

        return cs
Пример #4
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "auto")
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=1,
                                                   default_value=0.1,
                                                   log=True)
        max_iter = UniformIntegerHyperparameter("max_iter",
                                                32,
                                                512,
                                                default_value=100)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf",
            lower=1,
            upper=200,
            default_value=20,
            log=True)
        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")
        max_leaf_nodes = UniformIntegerHyperparameter(name="max_leaf_nodes",
                                                      lower=3,
                                                      upper=2047,
                                                      default_value=31,
                                                      log=True)
        max_bins = Constant("max_bins", 256)
        l2_regularization = UniformFloatHyperparameter(
            name="l2_regularization",
            lower=1E-10,
            upper=1,
            default_value=1E-10,
            log=True)
        early_stop = CategoricalHyperparameter(
            name="early_stop",
            choices=["off", "train", "valid"],
            default_value="off")
        tol = UnParametrizedHyperparameter(name="tol", value=1e-7)
        scoring = UnParametrizedHyperparameter(name="scoring", value="loss")
        n_iter_no_change = UniformIntegerHyperparameter(
            name="n_iter_no_change", lower=1, upper=20, default_value=10)
        validation_fraction = UniformFloatHyperparameter(
            name="validation_fraction",
            lower=0.01,
            upper=0.4,
            default_value=0.1)

        cs.add_hyperparameters([
            loss, learning_rate, max_iter, min_samples_leaf, max_depth,
            max_leaf_nodes, max_bins, l2_regularization, early_stop, tol,
            scoring, n_iter_no_change, validation_fraction
        ])

        n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop,
                                            ["valid", "train"])
        validation_fraction_cond = EqualsCondition(validation_fraction,
                                                   early_stop, "valid")

        cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond])

        return cs
Пример #5
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        criterion = CategoricalHyperparameter("criterion",
                                              ['mse', 'friedman_mse', 'mae'])
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.1,
                                                  1.0,
                                                  default_value=1)

        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="False")

        cs.add_hyperparameters([
            criterion, max_features, max_depth, max_leaf_nodes,
            min_samples_split, min_samples_leaf, min_impurity_decrease,
            bootstrap
        ])

        return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "deviance")
        learning_rate = UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
        # n_estimators = UniformIntegerHyperparameter(
        #     "n_estimators", 50, 500, default_value=100)
        n_estimators = Constant("n_estimators", 100)
        max_depth = UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=8, default_value=3)
        criterion = CategoricalHyperparameter(
            'criterion', ['friedman_mse', 'mse'],
            default_value='mse')
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(
            name="subsample", lower=0.01, upper=1.0, default_value=1.0)
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0, default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(
            name="max_leaf_nodes", value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth,
                                criterion, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, subsample,
                                max_features, max_leaf_nodes,
                                min_impurity_decrease])

        return cs
Пример #7
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            criterion = CategoricalHyperparameter("criterion",
                                                  ["gini", "entropy"],
                                                  default_value="gini")
            max_depth_factor = UniformFloatHyperparameter('max_depth_factor',
                                                          0.,
                                                          2.,
                                                          default_value=0.5)
            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                            1,
                                                            20,
                                                            default_value=1)
            min_weight_fraction_leaf = Constant("min_weight_fraction_leaf",
                                                0.0)
            max_features = UnParametrizedHyperparameter('max_features', 1.0)
            max_leaf_nodes = UnParametrizedHyperparameter(
                "max_leaf_nodes", "None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                'min_impurity_decrease', 0.0)

            cs.add_hyperparameters([
                criterion, max_features, max_depth_factor, min_samples_split,
                min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
                min_impurity_decrease
            ])
            return cs
Пример #8
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        criterion = CategoricalHyperparameter("criterion", ["gini"],
                                              default_value="gini")
        #, "entropy"], default_value="gini")
        max_depth = UniformFloatHyperparameter(
            # 增大决策树的最大深度到10,之前默认的深度太低了
            'max_depth',
            0.,
            10.,
            default_value=0.5)
        #2., default_value=0.5)
        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
        max_features = UnParametrizedHyperparameter('max_features', 1.0)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        cs.add_hyperparameters([
            criterion, max_features, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
            min_impurity_decrease
        ])

        return cs
Пример #9
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        Window_size = UniformIntegerHyperparameter(name="Window_size",
                                                   lower=5,
                                                   upper=50,
                                                   default_value=20)

        Difference = CategoricalHyperparameter(name="Difference",
                                               choices=["True", "False"],
                                               default_value="True")

        tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature",
                                                    choices=["True", "False"],
                                                    default_value="True")

        n_iter = UnParametrizedHyperparameter("n_iter", value=50)

        tol = UniformFloatHyperparameter("tol",
                                         10**-5,
                                         10**-1,
                                         default_value=10**-3,
                                         log=True)

        alpha_1 = UniformFloatHyperparameter(name="alpha_1",
                                             lower=10**-10,
                                             upper=10**-3,
                                             default_value=10**-6)

        alpha_2 = UniformFloatHyperparameter(name="alpha_2",
                                             log=True,
                                             lower=10**-10,
                                             upper=10**-3,
                                             default_value=10**-6)

        lambda_1 = UniformFloatHyperparameter(name="lambda_1",
                                              log=True,
                                              lower=10**-10,
                                              upper=10**-3,
                                              default_value=10**-6)

        lambda_2 = UniformFloatHyperparameter(name="lambda_2",
                                              log=True,
                                              lower=10**-10,
                                              upper=10**-3,
                                              default_value=10**-6)
        threshold_lambda = UniformFloatHyperparameter(name="threshold_lambda",
                                                      log=True,
                                                      lower=10**3,
                                                      upper=10**5,
                                                      default_value=10**4)

        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")

        cs.add_hyperparameters([
            n_iter, tol, alpha_1, alpha_2, lambda_1, Difference, lambda_2,
            threshold_lambda, fit_intercept, Window_size, tsfresh_feature
        ])

        return cs
Пример #10
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        criterion = CategoricalHyperparameter('criterion',
                                              ['mse', 'friedman_mse', 'mae'])
        max_features = Constant('max_features', 1.0)
        max_depth = UniformFloatHyperparameter('max_depth',
                                               0.,
                                               2.,
                                               default_value=0.5)
        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        cs.add_hyperparameters([
            criterion, max_features, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
            min_impurity_decrease
        ])

        return cs
Пример #11
0
def get_configspace():
    if benchmark == 'hpo':
        cs = _classifiers[algo_name].get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", algo_name)
        cs.add_hyperparameter(model)
        return cs

    train_data, test_data = load_train_test_data('splice',
                                                 task_type=MULTICLASS_CLS)
    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)
    fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)
    hyper_space = fe_optimizer.hyperparameter_space
    return hyper_space
Пример #12
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        n_estimators = Constant("n_estimators", 100)
        criterion = Constant("criterion", "mse")
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.5,
                                                  5,
                                                  default=1)
        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default=1)
        min_weight_fraction_leaf = \
            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default="True")

        cs.add_hyperparameters([
            n_estimators, criterion, max_features, max_depth,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            max_leaf_nodes, bootstrap
        ])

        return cs
Пример #13
0
    def evaluate(_config):
        _config = _config.get_dictionary()
        # print(_config)
        arm = None
        cs = ConfigurationSpace()
        for key in _config:
            key_str = key.split(":")
            if key_str[0] == 'classifier':
                if key_str[1] == '__choice__':
                    arm = _config[key]
                    cs.add_hyperparameter(UnParametrizedHyperparameter("estimator", _config[key]))
                else:
                    cs.add_hyperparameter(UnParametrizedHyperparameter(key_str[2], _config[key]))

        if arm in first_bandit.arms:
            transformed_node = apply_metalearning_fe(first_bandit.sub_bandits[arm].optimizer['fe'], _config)
            default_config = cs.sample_configuration(1)
            hpo_evaluator = ClassificationEvaluator(None,
                                                    data_node=transformed_node, name='hpo',
                                                    resampling_strategy=first_bandit.eval_type,
                                                    seed=first_bandit.seed)

            start_time = time.time()
            score1 = 1 - hpo_evaluator(default_config)
            time_cost1 = time.time() - start_time

            # Evaluate the default config
            start_time = time.time()
            score2 = 1 - hpo_evaluator(first_bandit.sub_bandits[arm].default_config)
            time_cost2 = time.time() - start_time
            transformed_node.score2 = max(score1, score2)

            return (arm, score1, default_config, transformed_node, time_cost1), (
                arm, score2, first_bandit.sub_bandits[arm].default_config, transformed_node, time_cost2)
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter(
                "criterion", ["gini", "entropy"], default_value="gini")

            # The maximum number of features used in the forest is calculated as m^max_features, where
            # m is the total number of features, and max_features is the hyperparameter specified below.
            # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
            # corresponds with Geurts' heuristic.
            max_features = UniformFloatHyperparameter(
                "max_features", 0., 1., default_value=0.5)

            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                "min_samples_leaf", 1, 20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
            min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
            bootstrap = CategoricalHyperparameter(
                "bootstrap", ["True", "False"], default_value="True")
            sampling_strategy = CategoricalHyperparameter(
                name="sampling_strategy", choices=["majority", "not minority", "not majority", "all"],
                default_value="not minority")
            replacement = CategoricalHyperparameter(
                "replacement", ["True", "False"], default_value="False")
            cs.add_hyperparameters([n_estimators, criterion, max_features,
                                    min_samples_split, min_samples_leaf,
                                    min_weight_fraction_leaf,
                                    bootstrap, min_impurity_decrease, sampling_strategy, replacement])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {'n_estimators': hp.choice('bal_rf_n_estimators', [100]),
                     'criterion': hp.choice('bal_rf_criterion', ["gini", "entropy"]),
                     'max_features': hp.uniform('bal_rf_max_features', 0, 1),
                     'min_samples_split': hp.randint('bal_rf_min_samples_split', 19) + 2,
                     'min_samples_leaf': hp.randint('bal_rf_min_samples_leaf', 20) + 1,
                     'min_weight_fraction_leaf': hp.choice('bal_rf_min_weight_fraction_leaf', [0]),
                     'min_impurity_decrease': hp.choice('bal_rf_min_impurity_decrease', [0]),
                     'bootstrap': hp.choice('bal_rf_bootstrap', ["True", "False"]),
                     'sampling_strategy': hp.choice('bal_rf_sampling_strategy',
                                                    ["majority", "not minority", "not majority", "all"]),
                     'replacement': hp.choice('bal_rf_replacement', ["True", "False"]),
                     }

            init_trial = {'n_estimators': 100,
                          'criterion': "gini",
                          'max_features': 0.5,
                          'min_samples_split': 2,
                          'min_samples_leaf': 1,
                          'min_weight_fraction_leaf': 0,
                          'min_impurity_decrease': 0,
                          'bootstrap': "False",
                          'sampling_strategy': "not minority",
                          'replacement': "False"
                          }
            return space
Пример #15
0
    def get_hyperparameter_search_space(dataset_properties=None):
        degree = UnParametrizedHyperparameter(name="degree", value=2)
        interaction_only = UnParametrizedHyperparameter("interaction_only", "True")
        include_bias = UnParametrizedHyperparameter("include_bias", "False")

        cs = ConfigurationSpace()
        cs.add_hyperparameters([degree, interaction_only, include_bias])

        return cs
Пример #16
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter("criterion",
                                                  ["gini", "entropy"],
                                                  default_value="gini")
            max_features = UniformFloatHyperparameter("max_features",
                                                      0,
                                                      1,
                                                      default_value=0.5,
                                                      q=0.05)

            max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                     value="None")
            max_leaf_nodes = UnParametrizedHyperparameter(
                "max_leaf_nodes", "None")

            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                            1,
                                                            20,
                                                            default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                'min_weight_fraction_leaf', 0.)
            min_impurity_decrease = UnParametrizedHyperparameter(
                'min_impurity_decrease', 0.)

            bootstrap = CategoricalHyperparameter("bootstrap",
                                                  ["True", "False"],
                                                  default_value="False")

            cs.add_hyperparameters([
                n_estimators, criterion, max_features, max_depth,
                max_leaf_nodes, min_samples_split, min_samples_leaf,
                min_weight_fraction_leaf, min_impurity_decrease, bootstrap
            ])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'n_estimators': 100,
                'criterion': hp.choice('ets_criterion', ['gini', 'entropy']),
                'max_features': hp.uniform('ets_max_features', 0, 1),
                'max_depth': "None",
                'max_leaf_nodes': "None",
                'min_samples_leaf': hp.randint('ets_samples_leaf', 20) + 1,
                'min_samples_split': hp.randint('ets_samples_split', 19) + 2,
                'min_weight_fraction_leaf': 0.,
                'min_impurity_decrease': 0.,
                'bootstrap': hp.choice('ets_bootstrap', ['True', 'False'])
            }
            return space
Пример #17
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        Window_size = UniformIntegerHyperparameter(name="Window_size",
                                                   lower=5,
                                                   upper=99,
                                                   default_value=20)

        Difference = CategoricalHyperparameter(name="Difference",
                                               choices=["True", "False"],
                                               default_value="True")

        tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature",
                                                    choices=["True", "False"],
                                                    default_value="True")

        n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                    10,
                                                    200,
                                                    default_value=100)

        criterion = CategoricalHyperparameter("criterion",
                                              ['mse', 'friedman_mse', 'mae'],
                                              default_value='mse')

        max_features = UniformFloatHyperparameter("max_features",
                                                  0.1,
                                                  1.0,
                                                  default_value=1)

        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="False")

        cs.add_hyperparameters([
            n_estimators, criterion, max_features, Difference, max_depth,
            max_leaf_nodes, min_samples_split, min_samples_leaf,
            min_impurity_decrease, bootstrap, Window_size, tsfresh_feature
        ])

        return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = UnParametrizedHyperparameter("criterion", "gini")
        max_features = UnParametrizedHyperparameter("max_features", "auto")

        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UnParametrizedHyperparameter(
            "min_samples_split", 2)
        min_samples_leaf = UnParametrizedHyperparameter("min_samples_leaf", 1)

        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)
        bootstrap = UnParametrizedHyperparameter("bootstrap", "True")

        cs.add_hyperparameters([
            n_estimators, criterion, max_features, max_depth,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            max_leaf_nodes, bootstrap, min_impurity_decrease
        ])
        return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = cs.add_hyperparameter(
            CategoricalHyperparameter("loss",
                                      ["ls", "lad", "huber", "quantile"],
                                      default="ls"))
        learning_rate = cs.add_hyperparameter(
            UniformFloatHyperparameter(name="learning_rate",
                                       lower=0.01,
                                       upper=1,
                                       default=0.1,
                                       log=True))
        n_estimators = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="n_estimators",
                                         lower=50,
                                         upper=500,
                                         default=100))
        max_depth = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="max_depth",
                                         lower=1,
                                         upper=10,
                                         default=3))
        min_samples_split = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="min_samples_split",
                                         lower=2,
                                         upper=20,
                                         default=2,
                                         log=False))
        min_samples_leaf = cs.add_hyperparameter(
            UniformIntegerHyperparameter(name="min_samples_leaf",
                                         lower=1,
                                         upper=20,
                                         default=1,
                                         log=False))
        min_weight_fraction_leaf = cs.add_hyperparameter(
            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.))
        subsample = cs.add_hyperparameter(
            UniformFloatHyperparameter(name="subsample",
                                       lower=0.01,
                                       upper=1.0,
                                       default=1.0,
                                       log=False))
        max_features = cs.add_hyperparameter(
            UniformFloatHyperparameter("max_features", 0.5, 5, default=1))
        max_leaf_nodes = cs.add_hyperparameter(
            UnParametrizedHyperparameter(name="max_leaf_nodes", value="None"))
        alpha = cs.add_hyperparameter(
            UniformFloatHyperparameter("alpha",
                                       lower=0.75,
                                       upper=0.99,
                                       default=0.9))

        cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile']))
        return cs
Пример #20
0
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            loss = CategoricalHyperparameter("loss", ['ls', 'lad'], default_value='ls')
            learning_rate = UniformFloatHyperparameter(
                name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
            n_estimators = UniformIntegerHyperparameter(
                "n_estimators", 50, 500, default_value=200)
            max_depth = UniformIntegerHyperparameter(
                name="max_depth", lower=1, upper=10, default_value=3)
            criterion = CategoricalHyperparameter(
                'criterion', ['friedman_mse', 'mse', 'mae'],
                default_value='friedman_mse')
            min_samples_split = UniformIntegerHyperparameter(
                name="min_samples_split", lower=2, upper=20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                name="min_samples_leaf", lower=1, upper=20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
            subsample = UniformFloatHyperparameter(
                name="subsample", lower=0.1, upper=1.0, default_value=1.0)
            max_features = UniformFloatHyperparameter(
                "max_features", 0.1, 1.0, default_value=1)
            max_leaf_nodes = UnParametrizedHyperparameter(
                name="max_leaf_nodes", value="None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                name='min_impurity_decrease', value=0.0)
            cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth,
                                    criterion, min_samples_split, min_samples_leaf,
                                    min_weight_fraction_leaf, subsample,
                                    max_features, max_leaf_nodes,
                                    min_impurity_decrease])

            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {'loss': hp.choice('gb_loss', ["ls", "lad"]),
                     'learning_rate': hp.loguniform('gb_learning_rate', np.log(0.01), np.log(1)),
                     # 'n_estimators': hp.randint('gb_n_estimators', 451) + 50,
                     'n_estimators': hp.choice('gb_n_estimators', [100]),
                     'max_depth': hp.randint('gb_max_depth', 8) + 1,
                     'criterion': hp.choice('gb_criterion', ['friedman_mse', 'mse', 'mae']),
                     'min_samples_split': hp.randint('gb_min_samples_split', 19) + 2,
                     'min_samples_leaf': hp.randint('gb_min_samples_leaf', 20) + 1,
                     'min_weight_fraction_leaf': hp.choice('gb_min_weight_fraction_leaf', [0]),
                     'subsample': hp.uniform('gb_subsample', 0.1, 1),
                     'max_features': hp.uniform('gb_max_features', 0.1, 1),
                     'max_leaf_nodes': hp.choice('gb_max_leaf_nodes', [None]),
                     'min_impurity_decrease': hp.choice('gb_min_impurity_decrease', [0])}

            init_trial = {'loss': "ls", 'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 3,
                          'criterion': "friedman_mse", 'min_samples_split': 2, 'min_samples_leaf': 1,
                          'min_weight_fraction_leaf': 0, 'subsample': 1, 'max_features': 1,
                          'max_leaf_nodes': None, 'min_impurity_decrease': 0}
            return space
Пример #21
0
def get_random_forest_default_search_space():
    classif_prefix = 'classifier:random_forest:'

    cs = ConfigurationSpace()
    model_type = CategoricalHyperparameter('classifier:__choice__',
                                           ['random_forest'])
    imputation = CategoricalHyperparameter('imputation:strategy',
                                           ['mean', 'median', 'most_frequent'])
    n_estimators = Constant(classif_prefix + "n_estimators", 100)
    criterion = CategoricalHyperparameter(classif_prefix + "criterion",
                                          ["gini", "entropy"],
                                          default_value="gini")

    # The maximum number of features used in the forest is calculated as m^max_features, where
    # m is the total number of features, and max_features is the hyperparameter specified below.
    # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
    # corresponds with Geurts' heuristic.
    max_features = UniformFloatHyperparameter(classif_prefix + "max_features",
                                              0.,
                                              1.,
                                              default_value=0.5)

    max_depth = UnParametrizedHyperparameter(classif_prefix + "max_depth",
                                             "None")
    min_samples_split = UniformIntegerHyperparameter(classif_prefix +
                                                     "min_samples_split",
                                                     2,
                                                     20,
                                                     default_value=2)
    min_samples_leaf = UniformIntegerHyperparameter(classif_prefix +
                                                    "min_samples_leaf",
                                                    1,
                                                    20,
                                                    default_value=1)
    min_weight_fraction_leaf = UnParametrizedHyperparameter(
        classif_prefix + "min_weight_fraction_leaf", 0.)
    max_leaf_nodes = UnParametrizedHyperparameter(
        classif_prefix + "max_leaf_nodes", "None")
    min_impurity_decrease = UnParametrizedHyperparameter(
        classif_prefix + 'min_impurity_decrease', 0.0)
    bootstrap = CategoricalHyperparameter(classif_prefix + "bootstrap",
                                          ["True", "False"],
                                          default_value="True")
    cs.add_hyperparameters([
        model_type, imputation, n_estimators, criterion, max_features,
        max_depth, min_samples_split, min_samples_leaf,
        min_weight_fraction_leaf, max_leaf_nodes, bootstrap,
        min_impurity_decrease
    ])

    return cs
Пример #22
0
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            criterion = CategoricalHyperparameter(
                "criterion", ["mse", "mae"], default_value="mse")

            # The maximum number of features used in the forest is calculated as m^max_features, where
            # m is the total number of features, and max_features is the hyperparameter specified below.
            # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
            # corresponds with Geurts' heuristic.
            max_features = UniformFloatHyperparameter(
                "max_features", 0., 1., default_value=0.5)

            max_depth = UnParametrizedHyperparameter("max_depth", "None")
            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                "min_samples_leaf", 1, 20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
            max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
            min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
            bootstrap = CategoricalHyperparameter(
                "bootstrap", ["True", "False"], default_value="True")
            cs.add_hyperparameters([criterion, max_features,
                                    max_depth, min_samples_split, min_samples_leaf,
                                    min_weight_fraction_leaf, max_leaf_nodes,
                                    bootstrap, min_impurity_decrease])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {'criterion': hp.choice('rf_criterion', ["mse", "mae"]),
                     'max_features': hp.uniform('rf_max_features', 0, 1),
                     'max_depth': hp.choice('rf_max_depth', [None]),
                     'min_samples_split': hp.randint('rf_min_samples_split', 19) + 2,
                     'min_samples_leaf': hp.randint('rf_min_samples_leaf', 20) + 1,
                     'min_weight_fraction_leaf': hp.choice('rf_min_weight_fraction_leaf', [0]),
                     'max_leaf_nodes': hp.choice('rf_max_leaf_nodes', [None]),
                     'min_impurity_decrease': hp.choice('rf_min_impurity_decrease', [0]),
                     'bootstrap': hp.choice('rf_bootstrap', ["True", "False"])}

            init_trial = {'criterion': "mse",
                          'max_features': 0.5,
                          'max_depth': None,
                          'min_samples_split': 2,
                          'min_samples_leaf': 1,
                          'min_weight_fraction_leaf': 0,
                          'max_leaf_nodes': None,
                          'min_impurity_decrease': 0,
                          'bootstrap': "False"}

            return space
Пример #23
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        criterion = Constant('criterion', 'mse')
        splitter = Constant("splitter", "best")
        max_features = Constant('max_features', 1.0)
        max_depth = UniformFloatHyperparameter('max_depth',
                                               0.,
                                               2.,
                                               default=0.5)
        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default=1)
        min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        cs.add_hyperparameters([
            criterion, splitter, max_features, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes
        ])

        return cs
Пример #24
0
def _construct_hyperparameter(hyperparameter: Dict) -> Hyperparameter:
    hp_type = hyperparameter['type']
    name = hyperparameter['name']
    if hp_type == 'constant':
        return Constant(
            name=name,
            value=hyperparameter['value'],
        )
    elif hp_type == 'unparametrized':
        return UnParametrizedHyperparameter(
            name=name,
            value=hyperparameter['value'],
        )
    elif hp_type == 'uniform_float':
        return UniformFloatHyperparameter(
            name=name,
            log=hyperparameter['log'],
            lower=hyperparameter['lower'],
            upper=hyperparameter['upper'],
            default_value=hyperparameter['default'],
        )
    elif hp_type == 'normal_float':
        return NormalFloatHyperparameter(
            name=name,
            log=hyperparameter['log'],
            mu=hyperparameter['mu'],
            sigma=hyperparameter['sigma'],
            default_value=hyperparameter['default'],
        )
    elif hp_type == 'uniform_int':
        return UniformIntegerHyperparameter(
            name=name,
            log=hyperparameter['log'],
            lower=hyperparameter['lower'],
            upper=hyperparameter['upper'],
            default_value=hyperparameter['default'],
        )
    elif hp_type == 'normal_int':
        return NormalIntegerHyperparameter(
            name=name,
            log=hyperparameter['log'],
            lower=hyperparameter['lower'],
            upper=hyperparameter['upper'],
            default_value=hyperparameter['default'],
        )
    elif hp_type == 'categorical':
        return CategoricalHyperparameter(
            name=name,
            choices=hyperparameter['choices'],
            default_value=hyperparameter['default'],
            weights=hyperparameter.get('probabilities'),
        )
    elif hp_type == 'ordinal':
        return OrdinalHyperparameter(
            name=name,
            sequence=hyperparameter['sequence'],
            default_value=hyperparameter['default'],
        )
    else:
        raise ValueError(hp_type)
def create_configspace():
    cs = ConfigurationSpace()
    batch_size = UniformIntegerHyperparameter("train_batch_size",
                                              32,
                                              256,
                                              default_value=64,
                                              q=8)
    init_lr = UniformFloatHyperparameter('init_lr',
                                         lower=1e-3,
                                         upper=0.3,
                                         default_value=0.1,
                                         log=True)
    lr_decay_factor = UnParametrizedHyperparameter('lr_decay_factor', 0.1)
    weight_decay = UniformFloatHyperparameter('weight_decay',
                                              lower=1e-5,
                                              upper=1e-2,
                                              default_value=0.0002,
                                              log=True)
    momentum = UniformFloatHyperparameter("momentum",
                                          0.5,
                                          .99,
                                          default_value=0.9)
    nesterov = CategoricalHyperparameter('nesterov', ['True', 'False'],
                                         default_value='True')
    cs.add_hyperparameters([
        batch_size, init_lr, lr_decay_factor, weight_decay, momentum, nesterov
    ])
    return cs
Пример #26
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        Window_size = UniformIntegerHyperparameter(name="Window_size",
                                                   lower=5,
                                                   upper=50,
                                                   default_value=20)

        tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature",
                                                    choices=["True", "False"],
                                                    default_value="True")

        Difference = CategoricalHyperparameter(name="Difference",
                                               choices=["True", "False"],
                                               default_value="True")

        alpha = UniformFloatHyperparameter("alpha",
                                           10**-5,
                                           10.,
                                           log=True,
                                           q=0.00001,
                                           default_value=1.)

        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")

        tol = UniformFloatHyperparameter("tol",
                                         1e-5,
                                         1e-1,
                                         default_value=1e-3,
                                         q=0.00001,
                                         log=True)
        cs.add_hyperparameters([
            alpha, fit_intercept, tol, Window_size, Difference, tsfresh_feature
        ])
        return cs
Пример #27
0
def get_hyperparameter_search_space(seed):

    imputation = CategoricalHyperparameter('imputation__strategy', ['mean', 'median', 'most_frequent'])

    C = UniformFloatHyperparameter("classifier__C", 0.03125, 32768, log=True, default_value=1.0)
    # No linear kernel here, because we have liblinear
    kernel = CategoricalHyperparameter(name="classifier__kernel", choices=["rbf", "poly", "sigmoid"], default_value="rbf")
    degree = UniformIntegerHyperparameter("classifier__degree", 2, 5, default_value=3)
    gamma = UniformFloatHyperparameter("classifier__gamma", 3.0517578125e-05, 8, log=True, default_value=0.1)
    # TODO this is totally ad-hoc
    coef0 = UniformFloatHyperparameter("classifier__coef0", -1, 1, default_value=0)
    # probability is no hyperparameter, but an argument to the SVM algo
    shrinking = CategoricalHyperparameter("classifier__shrinking", [True, False], default_value=True)
    tol = UniformFloatHyperparameter("classifier__tol", 1e-5, 1e-1, default_value=1e-3, log=True)
    # cache size is not a hyperparameter, but an argument to the program!
    max_iter = UnParametrizedHyperparameter("classifier__max_iter", -1)

    cs = ConfigurationSpace('sklearn.svm.SVC', seed)
    cs.add_hyperparameters([imputation, C, kernel, degree, gamma, coef0, shrinking, tol, max_iter])

    degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
    coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])
    cs.add_condition(degree_depends_on_poly)
    cs.add_condition(coef0_condition)

    return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = cs.add_hyperparameter(Constant("n_estimators", 100))
        criterion = cs.add_hyperparameter(
            CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                      default="gini"))
        max_features = cs.add_hyperparameter(
            UniformFloatHyperparameter("max_features", 0.5, 5, default=1))

        max_depth = cs.add_hyperparameter(
            UnParametrizedHyperparameter(name="max_depth", value="None"))

        min_samples_split = cs.add_hyperparameter(
            UniformIntegerHyperparameter("min_samples_split", 2, 20,
                                         default=2))
        min_samples_leaf = cs.add_hyperparameter(
            UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1))
        min_weight_fraction_leaf = cs.add_hyperparameter(
            Constant('min_weight_fraction_leaf', 0.))

        bootstrap = cs.add_hyperparameter(
            CategoricalHyperparameter("bootstrap", ["True", "False"],
                                      default="False"))

        return cs
def evaluate_ml_algorithm(dataset, algo, obj_metric, seed=1, task_type=None):
    print('EVALUATE-%s-%s-%s' % (dataset, algo, obj_metric))
    train_data = load_data(dataset,
                           task_type=task_type,
                           datanode_returned=True)
    print(set(train_data.data[1]))
    metric = get_metric(obj_metric)

    cs = _classifiers[algo].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    hpo_evaluator = ClassificationEvaluator(default_hpo_config,
                                            scorer=metric,
                                            data_node=train_data,
                                            name='hpo',
                                            resampling_strategy='holdout',
                                            seed=seed)
    hpo_optimizer = SMACOptimizer(evaluator=hpo_evaluator,
                                  config_space=cs,
                                  per_run_time_limit=600,
                                  per_run_mem_limit=5120,
                                  output_dir='./logs',
                                  trials_per_iter=args.iter)
    hpo_optimizer.iterate()
    hpo_eval_dict = dict()
    for key, value in hpo_optimizer.eval_dict.items():
        hpo_eval_dict[key[1]] = value

    save_path = save_dir + '%s-%s-%s-hpo.pkl' % (dataset, algo, obj_metric)
    with open(save_path, 'wb') as f:
        pickle.dump(hpo_eval_dict, f)
Пример #30
0
    def get_hyperparameter_search_space(dataset_properties=None):
        C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True,
                                       default_value=1.0)
        # No linear kernel here, because we have liblinear
        kernel = CategoricalHyperparameter(name="kernel",
                                           choices=["rbf", "poly", "sigmoid"],
                                           default_value="rbf")
        degree = UniformIntegerHyperparameter("degree", 2, 5, default_value=3)
        gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8,
                                           log=True, default_value=0.1)
        # TODO this is totally ad-hoc
        coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)
        # probability is no hyperparameter, but an argument to the SVM algo
        shrinking = CategoricalHyperparameter("shrinking", ["True", "False"],
                                              default_value="True")
        tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3,
                                         log=True)
        # cache size is not a hyperparameter, but an argument to the program!
        max_iter = UnParametrizedHyperparameter("max_iter", -1)

        cs = ConfigurationSpace()
        cs.add_hyperparameters([C, kernel, degree, gamma, coef0, shrinking,
                                tol, max_iter])

        degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
        coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])
        cs.add_condition(degree_depends_on_poly)
        cs.add_condition(coef0_condition)

        return cs