Пример #1
0
    def __init__(self, data, device, iter_num, timer, n_class, **args):
        self.data = data.to(device)
        self.device = device
        self.timer = timer
        self.n_class = n_class
        self.iter_num = iter_num

        self.flag_end = False

        self.params = {
                'features_num': self.data.x.size()[1],
                'num_class': self.n_class,
                #'epoches': 150,
            }
        self.space = {
                'num_layers': scope.int(hp.choice('num_layers', [1, 2])),
                'agg': hp.choice('agg', ['concat', 'self']),
                'hidden': scope.int(hp.quniform('hidden', 4, 128, 1)),
                'hidden2': scope.int(hp.quniform('hidden2', 4, 64, 1)),
                'dropout': hp.uniform('dropout', 0.1, 0.9),
                'lr': hp.loguniform('lr', np.log(0.001), np.log(1.0)),
                'epoches': scope.int(hp.quniform('epoches', 100, 300, 10)),
                'weight_decay': hp.loguniform('weight_decay', np.log(1e-4), np.log(1e-2))
                }
        self.points = [{
                'num_layers': 2,
                'agg': 'concat',
                'hidden': 64,
                'hidden2': 32,
                'dropout': 0.5,
                'lr': 0.005,
                'epoches': 200,
                'weight_decay': 5e-3,
                },]
Пример #2
0
def run(data_path, num_trials):

    X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
    X_valid, y_valid = load_pickle(os.path.join(data_path, "valid.pkl"))

    def objective(params):

        with mlflow.start_run():
            rf = RandomForestRegressor(**params)
            rf.fit(X_train, y_train)
            y_pred = rf.predict(X_valid)
            rmse = mean_squared_error(y_valid, y_pred, squared=False)
            mlflow.log_metric("rmse", rmse)
            
            return {'loss': rmse, 'status': STATUS_OK}

    search_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 1)),
        'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)),
        'random_state': 42
    }

    rstate = np.random.default_rng(42)  # for reproducible results
    fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=num_trials,
        trials=Trials(),
        rstate=rstate
    )
Пример #3
0
def get_xgboost_params(name="xgboost_common"):
    return scope.get_xgb_model(
        n_estimators=scope.int(
            hp.quniform(
                get_full_name(name, "n_estimators"),
                1, 200, 1,
            ),
        ),
        max_depth=scope.int(
            hp.quniform(
                get_full_name(name, 'max_depth'),
                1, 13, 1,
            ),
        ),
        min_child_weight=scope.int(
            hp.quniform(
                get_full_name(name, 'min_child_weight'),
                1, 6, 1,
            ),
        ),
        subsample=scope.int(
            hp.uniform(
                get_full_name(name, 'subsample'),
                0.5, 1,
            ),
        ),
        gamma=hp.uniform(
            get_full_name(name, 'gamma'),
            0.5, 1,
        ),
        nthread=1,
        seed=RANDOM_STATE,
    )
Пример #4
0
def CNN_Tuning(Tuning_function, features, labels, n_worker, name, n_cv,
               train_rate, max_eval, conv_key, gpu_key, dropout_key, n_fit):
    params = {
        'n_units1': scope.int(hp.quniform('n_units1', 100, 300, 100)),
        'n_units2': scope.int(hp.quniform('n_units2', 100, 300, 100)),
        'n_units3': scope.int(hp.quniform('n_units3', 100, 300, 100)),
        'n_units4': scope.int(hp.quniform('n_units4', 100, 300, 100)),
        'n_units5': scope.int(hp.quniform('n_units5', 100, 300, 100)),
        'n_units6': scope.int(hp.quniform('n_units6', 100, 300, 100)),
        'layer_num': scope.int(hp.quniform('layer_num', 2, 7, 1)),
        'activate': hp.choice('activate', ('relu', 'leaky_relu')),
        'epoch': scope.int(hp.quniform('epoch', 50, 110, 10)),
        'batch_size': scope.int(hp.quniform('batch_size', 40, 200, 40)),
        'c_out': scope.int(hp.quniform('c_out', 20, 40, 10)),
    }

    tuning_object = Tuning_Object(Tuning_function, features, labels, n_worker,
                                  name, n_cv, train_rate, conv_key, gpu_key,
                                  dropout_key, n_fit)
    best = fmin(tuning_object,
                params,
                algo=tpe.suggest,
                max_evals=max_eval,
                rstate=np.random.RandomState(0))
    best = hyperopt.space_eval(params, best)
    return best
Пример #5
0
 def get_hyperopt_space(self, params={}, random_state=None):
     if random_state is None:
         random_state = self.random_state
     result = {
         'n_estimators': scope.int(hp.quniform('n_estimators', 100, 1000,
                                               1)),
         'eta': hp.quniform('eta', 0.025, 0.5, 0.025),
         # A problem with max_depth casted to float instead of int with
         # the hp.quniform method.
         'max_depth': scope.int(hp.quniform('max_depth', 1, 14, 1)),
         'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1),
         'subsample': hp.quniform('subsample', 0.5, 1, 0.05),
         'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
         'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05),
         'eval_metric': 'auc',
         'objective': 'binary:logistic',
         # Increase this number if you have more cores. Otherwise, remove it and it will default
         # to the maxium number.
         # 'nthread': 4,
         'booster': 'gbtree',
         'tree_method': 'exact',
         'silent': 1,
         'seed': random_state
     }
     if params != {}:
         result.update(params)
     return result
Пример #6
0
def get_xgboost_model(name="xgboost_common"):
    return scope.get_xgb_model(
        n_estimators=scope.int(
            hp.quniform(
                get_full_name(name, "n_estimators"),
                1, 200, 1,
            ),
        ),
        max_depth=scope.int(
            hp.quniform(
                get_full_name(name, 'max_depth'),
                1, 13, 1,
            ),
        ),
        min_child_weight=scope.int(
            hp.quniform(
                get_full_name(name, 'min_child_weight'),
                1, 6, 1,
            ),
        ),
        subsample=scope.int(
            hp.uniform(
                get_full_name(name, 'subsample'),
                0.5, 1,
            ),
        ),
        gamma=hp.uniform(
            get_full_name(name, 'gamma'),
            0.5, 1,
        ),
        nthread=1,
        seed=RANDOM_STATE,
    )
Пример #7
0
    def visitSearchSpaceNumber(self, space: SearchSpaceNumber, path: str, counter=None):
        label = self.mk_label(path, counter)

        if space.pgo is not None:
            return scope.pgo_sample(
                space.pgo, hp.quniform(label, 0, len(space.pgo) - 1, 1)
            )

        dist = "uniform"
        if space.distribution:
            dist = space.distribution

        if space.maximum is None:
            raise SearchSpaceError(
                path, f"maximum not specified for a number with distribution {dist}"
            )
        max = space.getInclusiveMax()
        # if the maximum is not None, the inclusive maximum should not be none
        assert max is not None

        # These distributions need only a maximum
        if dist == "integer":
            if not space.discrete:
                raise SearchSpaceError(
                    path,
                    "integer distribution specified for a non discrete numeric type",
                )
            return hp.randint(label, max)

        if space.minimum is None:
            raise SearchSpaceError(
                path, f"minimum not specified for a number with distribution {dist}"
            )
        min = space.getInclusiveMin()
        # if the minimum is not None, the inclusive minimum should not be none
        assert min is not None

        if dist == "uniform":
            if space.discrete:
                return scope.int(hp.quniform(label, min, max, 1))
            else:
                return hp.uniform(label, min, max)
        elif dist == "loguniform":
            # for log distributions, hyperopt requires that we provide the log of the min/max
            if min <= 0:
                raise SearchSpaceError(
                    path,
                    f"minimum of 0 specified with a {dist} distribution.  This is not allowed; please set it (possibly using minimumForOptimizer) to be positive",
                )
            if min > 0:
                min = math.log(min)
            if max > 0:
                max = math.log(max)
            if space.discrete:
                return scope.int(hp.qloguniform(label, min, max, 1))
            else:
                return hp.loguniform(label, min, max)

        else:
            raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
Пример #8
0
def bernoulli_rbm_hp_space(n_components=None,
                           learning_rate=None,
                           batch_size=None,
                           n_iter=None,
                           verbose=False,
                           random_state=None):

    rval = dict(
        n_components=scope.int(
            hp.qloguniform(
                'n_components', low=np.log(0.51), high=np.log(999.5), q=1.0))
        if n_components is None else n_components,
        learning_rate=hp.lognormal(
            'learning_rate',
            np.log(0.01),
            np.log(10),
        ) if learning_rate is None else learning_rate,
        batch_size=scope.int(
            hp.qloguniform(
                '.batch_size',
                np.log(1),
                np.log(100),
                q=1,
            )) if batch_size is None else batch_size,
        n_iter=scope.int(
            hp.qloguniform(
                'n_iter',
                np.log(1),
                np.log(1000),  # -- max sweeps over the *whole* train set
                q=1,
            )) if n_iter is None else n_iter,
        verbose=verbose,
        random_state=_random_state('rstate', random_state),
    )
    return rval
Пример #9
0
class DecisionTreeModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return DecisionTreeRegressor(random_state=RANDOM_STATE, presort=True, **args)

    hp_space = {
        "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]),
        "max_depth": hp.pchoice(
            "max_depth_enabled",
            [
                (0.7, None),
                (0.3, 1 + scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))),
            ],
        ),
        "splitter": hp.choice("splitter_str", ["best", "random"]),
        "max_features": hp.pchoice(
            "max_features_str",
            [
                (0.2, "sqrt"),  # most common choice.
                (0.1, "log2"),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)),
            ],
        ),
        "min_samples_split": scope.int(hp.quniform("min_samples_split_str", 2, 10, 1)),
        "min_samples_leaf": hp.choice(
            "min_samples_leaf_enabled",
            [
                1,
                scope.int(
                    hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1)
                ),
            ],
        ),
    }
Пример #10
0
    def optimize(cls, trials, score, evals_rounds, mon_cons, categorical):
        """
        This function specifies the hyperparameter search space and minimises the score function

        :param trials: hyperopt.Trials
            hyperopt trials object responsible for the hyperparameter search
        :param score: function
            the loss or score function to be minimised
        :param evals_rounds: int
            number of evaluation rounds for hyperparameter tuning
        :param mon_cons: str(tuple) for xgboost, tuple for lightgbm
            index of monotonic constraints
        :param categorical: list
            index of categorical feature for lightgbm
        :return best: dict
            the best hyperparameters
        """
        space = {
            "n_estimators":
            scope.int(hp.quniform("n_estimators", 10, 3000, 5)),
            "learning_rate":
            hp.quniform("learning_rate", 0.05, 0.3, 0.025),
            "max_depth":
            scope.int(hp.quniform("max_depth", 1, 20, 1)),
            "num_leaves":
            scope.int(hp.quniform("num_leaves", 2, 1024, 2)),
            "min_child_samples":
            scope.int(hp.quniform("min_child_samples", 2, 100, 1)),
            "subsample":
            hp.quniform("subsample", 0.6, 1, 0.05),  # bagging_fraction
            "colsample_bytree":
            hp.quniform("colsample_bytree", 0.4, 1, 0.1),  # feature_fraction
            "min_sum_hessian_in_leaf":
            hp.quniform("min_sum_hessian_in_leaf", 0.001, 0.9, 0.001),
            "reg_lambda":
            hp.quniform("reg_lambda", 0.01, 1, 0.01),
            "reg_alpha":
            hp.quniform("reg_alpha", 1, 10, 0.01),
            "monotone_constraints":
            mon_cons,
            # 'categorical_feature': categorical
        }

        best = fmin(score,
                    space,
                    algo=tpe.suggest,
                    trials=trials,
                    max_evals=evals_rounds)

        # Convert the relevant hyperparameters to int
        best["n_estimators"] = int(best["n_estimators"])
        best["max_depth"] = int(best["max_depth"])
        best["num_leaves"] = int(best["num_leaves"])
        best["min_child_samples"] = int(best["min_child_samples"])

        logger.info("BEST_PARAMETERS")
        logger.info(best)
        return best
Пример #11
0
    def get_hyperopt(self, label):
        from hyperopt import hp
        from hyperopt.pyll import scope

        if self.log:
            return scope.int(
                hp.qloguniform(label, np.log(self.lower), np.log(self.upper),
                               1))
        else:
            return scope.int(hp.quniform(label, self.lower, self.upper, 1))
Пример #12
0
class RandomForestConf(ModelConf):
    param_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)),
        'max_features': scope.int(hp.quniform('max_features', 1, 150, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 1)),
        'criterion': hp.choice('criterion', ["gini", "entropy"])
    }
    name = "random_forest"

    def instance(self, param):
        return RandomForestClassifier(**param)
Пример #13
0
    def optimize(
            # trials,
            random_state=SEED):
        """
        This is the optimization function that given a space (space here) of
        hyperparameters and a scoring function (score here),
        finds the best hyperparameters.
        """

        space = {
            'max_depth':
            scope.int(hp.uniform('max_depth', 5, 15)),
            'subsample':
            hp.uniform('subsample', 0.03, 1),
            'learning_rate':
            hp.loguniform('learning_rate', np.log(0.005), np.log(0.5)) -
            0.0001,
            'colsample_bytree':
            hp.uniform('colsample_bytree', 0.3, 1),
            'reg_alpha':
            hp.loguniform('reg_alpha', np.log(0.005), np.log(5)) - 0.0001,
            'reg_lambda':
            hp.loguniform('reg_lambda', np.log(1), np.log(5)),
            'bagging_freq':
            hp.choice('bagging_freq', [0, 1]),
            'num_leaves':
            scope.int(hp.uniform('num_leaves', 10, 128)),
            'n_estimators':
            1000,
            'boosting':
            'gbdt',
            'objective':
            'multiclass',
            'num_class':
            12,
            'metric':
            'None',
            'is_unbalance':
            'true',
            #         'min_data_per_group': 1000,
            'verbose':
            -1,
            'random_seed':
            42,
        }

        # Use the fmin function from Hyperopt to find the best hyperparameters
        best = fmin(
            score_model,
            space,
            algo=tpe.suggest,
            # trials=trials,
            max_evals=hyperopt_niters)
        return best
Пример #14
0
 def param_space(self) -> Dict[str, Any]:
     return {
         'batch_size': hp.choice('batch_size', options=[2 ** x for x in range(4, 6 + 1)]),
         'learning_rate': hp.loguniform('learning_rate', low=np.log(0.0001), high=np.log(1)),
         'num_blocks': scope.int(hp.quniform('num_blocks', low=2, high=6, q=1)),
         'block_size': scope.int(hp.quniform('block_size', low=1, high=3, q=1)),
         'fcl_num_layers': scope.int(hp.quniform('fcl_num_layers', low=1, high=4, q=1)),
         'fcl_layer_size': hp.choice('fcl_layer_size', options=[512, 768, 1024, 1536]),
         'fcl_dropout_rate': hp.quniform('fcl_dropout_rate', low=0.05, high=0.5, q=0.05),
         'activation': hp.choice('activation', options=['relu', 'selu', 'tanh']),
         'optimizer': hp.choice('optimizer', options=['adam', 'adamax', 'nadam', 'rms-prop'])
     }
class GradientBoostingModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return GradientBoostingRegressor(random_state=RANDOM_STATE,
                                         presort=True,
                                         **args)

    loss_alpha = hp.choice('loss_alpha',
                           [('ls', 0.9), ('lad', 0.9),
                            ('huber', hp.uniform('gbr_alpha', 0.85, 0.95)),
                            ('quantile', 0.5)])

    hp_space = {
        'n_estimators':
        scope.int(
            hp.qloguniform('n_estimators', np.log(10.5), np.log(1000.5), 1)),
        'learning_rate':
        hp.lognormal('learning_rate', np.log(0.01), np.log(10.0)),
        'criterion':
        hp.choice('criterion', ['mse', 'friedman_mse', 'mae']),
        'max_depth':
        hp.pchoice('max_depth', [(0.2, 2), (0.5, 3), (0.2, 4), (0.1, 5)]),
        'min_samples_leaf':
        hp.choice(
            'min_samples_leaf_enabled',
            [
                1,  # most common choice.
                scope.int(
                    hp.qloguniform('min_samples_leaf', np.log(1.5),
                                   np.log(50.5), 1))
            ]),
        'subsample':
        hp.pchoice(
            'subsample_enabled',
            [
                (0.2, 1.0),  # default choice.
                (0.8, hp.uniform('subsample', 0.5, 1.0)
                 )  # stochastic grad boosting.
            ]),
        'max_features':
        hp.pchoice(
            'max_features_str',
            [
                (0.1, 'sqrt'),  # most common choice.
                (0.2, 'log2'),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform('max_features_str_frac', 0., 1.))
            ]),
        'loss':
        loss_alpha[0],
        'alpha':
        loss_alpha[1]
    }
Пример #16
0
class GradientBoostingModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        return GradientBoostingRegressor(
            random_state=RANDOM_STATE, presort=True, **args
        )

    loss_alpha = hp.choice(
        "loss_alpha",
        [
            ("ls", 0.9),
            ("lad", 0.9),
            ("huber", hp.uniform("gbr_alpha", 0.85, 0.95)),
            ("quantile", 0.5),
        ],
    )

    hp_space = {
        "n_estimators": scope.int(
            hp.qloguniform("n_estimators", np.log(10.5), np.log(1000.5), 1)
        ),
        "learning_rate": hp.lognormal("learning_rate", np.log(0.01), np.log(10.0)),
        "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]),
        "max_depth": hp.pchoice("max_depth", [(0.2, 2), (0.5, 3), (0.2, 4), (0.1, 5)]),
        "min_samples_leaf": hp.choice(
            "min_samples_leaf_enabled",
            [
                1,  # most common choice.
                scope.int(
                    hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1)
                ),
            ],
        ),
        "subsample": hp.pchoice(
            "subsample_enabled",
            [
                (0.2, 1.0),  # default choice.
                (0.8, hp.uniform("subsample", 0.5, 1.0)),  # stochastic grad boosting.
            ],
        ),
        "max_features": hp.pchoice(
            "max_features_str",
            [
                (0.1, "sqrt"),  # most common choice.
                (0.2, "log2"),  # less common choice.
                (0.1, None),  # all features, less common choice.
                (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)),
            ],
        ),
        "loss": loss_alpha[0],
        "alpha": loss_alpha[1],
    }
Пример #17
0
def hyperopt(X_train, X_test, y_train, y_test, param_space, num_eval):
    
    ##Setting HyperParamter Grid
    param_hyperopt={
        'learning rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)),
        'max_depth': scope.int(hp.quniform('max_depth', 3, 15, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 5, 100, 1)),
        'num_leaves': scope.int(hp.quniform('num_leaves', 5, 50, 1)),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0),
        'bagging_fraction': hp.uniform('bagging_fraction', 0.6, 1.0),
        'boosting_type': 'gbdt',
    }


    ##Defining Objective Function for Tuning
    def objective_function(params):

        #Evaluating LightGBM Classification Model on Tuning Parameters
        clf=lgb.LGBMClassifier(**params)

        evaluation = [(X_train, y_train), (X_test, y_test)]
            
        #Training Model
        clf.fit(X_train, y_train,
                eval_set=evaluation, eval_metric='auc',
                early_stopping_rounds=10, verbose=False)

        #Score Model on Validation to Obtain Predicted Probabilities
        preds=clf.predict_proba(X_test)
        preds=preds[:,1]

        #Adjusting Intercept of Predictions to Account for Oversampling Bias
        #Change to fit Target Proportion in Training and Oversampled Training
        newpreds=(preds * 0.8 * 0.02)/((1-preds) * 0.2 * 0.98 + preds * 0.8 * 0.02)

        #Evaluate Model and Adjust Hyperparamters to Maximize AUC
        auc=roc_auc_score(y_test, newpreds)

        print('Score:', auc)
        return {'loss': -auc, 'status': STATUS_OK}

    trials=Trials()

    #Parameter Tuning
    best_param=fmin(objective_function,
                        param_space,
                        algo=tpe.suggest,
                        max_evals=num_eval,
                        trials=trials,
                        rstate=np.random.RandomState(1))

    return best_param
Пример #18
0
class SvmConf(ModelConf):
    param_space = {
        'C': hp.uniform('C', 0.1, 2.0),
        'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'degree': scope.int(hp.quniform('degree', 2, 5, 1)),
        'gamma': hp.choice('gamma', ['auto', 'scale']),
        'tol': hp.loguniform('tol', np.log(1e-5), np.log(1e-2)),
        'max_iter': scope.int(hp.quniform('max_iter', -1, 100, 1))
    }
    name = "svm_classifier"

    def instance(self, param):
        return SVC(**param)
Пример #19
0
def hyperopt_constructor(loader: yaml.BaseLoader, suffix: str,
                         node: yaml.Node):
    if suffix not in [
            "choice",
            "pchoice",
            "normal",
            "qnormal",
            "lognormal",
            "qlognormal",
            "uniform",
            "quniform",
            "shiftedquniform",
            "loguniform",
            "qloguniform",
            "randint",
    ]:
        raise ValueError(f"{suffix} is not a valid function")

    from hyperopt import hp
    from hyperopt.pyll import scope
    loader.hp_label_inc = getattr(loader, "hp_label_inc", -1) + 1
    label = f"label{loader.hp_label_inc}"
    func = getattr(hp, suffix, None)
    if suffix == "choice" or suffix == "pchoice":
        return func(label, loader.construct_sequence(node, deep=True))
    if suffix == "shiftedquniform":
        # shift hp.uniform so that low is 0 and shift back after rounding
        kwargs = _construct_hyperopt_params(loader, node, ["low", "high", "q"])
        low = kwargs["low"]
        kwargs["low"] = 0
        kwargs["high"] -= low
        apply = getattr(hp, "quniform")(label, **kwargs) + low
        if isinstance(kwargs["q"], int) and isinstance(low, int):
            # convert to int if low and q are ints
            apply = scope.int(apply)
        return apply
    if suffix[0] == "q":
        if suffix == "quniform" or suffix == "qloguniform":
            args_order = ["low", "high", "q"]
        else:
            args_order = ["mu", "sigma", "q"]
        kwargs = _construct_hyperopt_params(loader, node, args_order)
        if isinstance(kwargs["q"], int):
            # convert to int if q is an int
            return scope.int(func(label, **kwargs))
        return func(label, **kwargs)
    if isinstance(node, yaml.SequenceNode):
        return func(label, *loader.construct_sequence(node, deep=True))
    else:
        return func(label, **loader.construct_mapping(node, deep=True))
Пример #20
0
def find_hyperopt(df_train: pd.DataFrame, folds: pd.DataFrame) -> Dict:
    log = logging.getLogger(__name__)
    cols_all, col_target = get_cols(df_train)

    results = {}
    space = {
        'num_leaves': scope.int(hp.quniform('num_leaves', 3, 100, 1)),
        'max_depth': scope.int(hp.quniform('max_depth', 10, 70, 1)),
        'min_data_in_leaf':
        scope.int(hp.quniform('min_data_in_leaf', 5, 150, 1)),
        'feature_fraction': hp.uniform('feature_fraction', 0.85, 1.0),
        'bagging_fraction': hp.uniform('bagging_fraction', 0.85, 1.0),
        'min_sum_hessian_in_leaf': hp.loguniform('min_sum_hessian_in_leaf', 0,
                                                 2.3),
        'lambda_l1': hp.uniform('lambda_l1', 1e-4, 2),
        'lambda_l2': hp.uniform('lambda_l2', 1e-4, 2),
        'seed': random_state,
        'feature_fraction_seed': random_state,
        'bagging_seed': random_state,
        'drop_seed': random_state,
        'data_random_seed': random_state,
        'verbose': -1,
        'bagging_freq': 5,
        'max_bin': 255,
        'learning_rate': 0.001,
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'auc',
    }
    for col in col_target:
        cols_all, _ = get_cols(df_train, col)

        def score(params):
            cv_score = CV_score(params=params,
                                cols_all=cols_all,
                                col_target=col,
                                num_boost_round=99999999,
                                early_stopping_rounds=50,
                                valid=True)
            return cv_score.fit(df=df_train, folds=folds)

        trials = Trials()
        best = fmin(fn=score,
                    space=space,
                    algo=tpe.suggest,
                    trials=trials,
                    max_evals=max_evals)
        results[col] = space_eval(space, best)
    return results
def xgboost_classifier_bayesian_space():
    return {
        "max_depth": scope.int(hp.quniform("x_max_depth", 1, 3, 1)),
        "n_estimators": scope.int(hp.quniform("x_n_estimators", 100, 1000, 1)),
        "min_child_weight": scope.int(hp.quniform("x_min_child", 1, 10, 1)),
        "subsample": hp.uniform("x_subsample", 0.5, 0.9),
        "gamma": hp.uniform("x_gamma", 0.0, 0.2),
        "colsample_bytree": hp.uniform("x_colsample_bytree", 0.5, 1.),
        "colsample_bylevel": hp.uniform("x_colsample_bylevel", 0.5, 1.),
        "colsample_bynode": hp.uniform("x_colsample_bynode", 0.5, 1.),
        #"max_delta_step": scope.int(hp.quniform("x_max_delta_step", 0, 8, 1)),
        "reg_lambda": hp.uniform("x_reg_lambda", 0, 1),
        "reg_alpha": hp.uniform("x_reg_alpha", 0, 1),
        "learning_rate": hp.uniform("x_learning_rate", 0.01, 0.5)
    }
Пример #22
0
def lgbm_hp_space(**kwargs):
    space = {
        'n_estimators': scope.int(hp.quniform('n_estimators', 10, 700, 1)),
        'num_leaves': scope.int(hp.quniform ('num_leaves', 10, 200, 1)),
        'feature_fraction': hp.uniform('feature_fraction', 0.75, 1.0),
        'bagging_fraction': hp.uniform('bagging_fraction', 0.75, 1.0),
        'learning_rate': hp.loguniform('learning_rate', -5.0, -2.3),
        'max_bin': scope.int(hp.quniform('max_bin', 64, 512, 1)),
        'bagging_freq': scope.int(hp.quniform('bagging_freq', 1, 5, 1)),
        'lambda_l1': hp.uniform('lambda_l1', 0, 10),
        'lambda_l2': hp.uniform('lambda_l2', 0, 10),
        **kwargs
       }

    return space
Пример #23
0
 def get_hyperopt_space(self, params={}, random_state=None):
     if random_state is None:
         random_state = self.random_state
     result = {
         'num_leaves':
         scope.int(hp.quniform('num_leaves', 100, 500, 1)),
         'max_depth':
         scope.int(hp.quniform('max_depth', 10, 70, 1)),
         'min_data_in_leaf':
         scope.int(hp.quniform('min_data_in_leaf', 10, 150, 1)),
         'feature_fraction':
         hp.uniform('feature_fraction', 0.75, 1.0),
         'bagging_fraction':
         hp.uniform('bagging_fraction', 0.75, 1.0),
         'min_sum_hessian_in_leaf':
         hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3),
         'lambda_l1':
         hp.uniform('lambda_l1', 1e-4, 2),
         'lambda_l2':
         hp.uniform('lambda_l2', 1e-4, 2),
         'seed':
         random_state,
         'feature_fraction_seed':
         random_state,
         'bagging_seed':
         random_state,
         'drop_seed':
         random_state,
         'data_random_seed':
         random_state,
         'verbose':
         -1,
         'bagging_freq':
         5,
         'max_bin':
         255,
         'learning_rate':
         0.03,
         'boosting_type':
         'gbdt',
         'objective':
         'binary',
         'metric':
         'auc',
     }
     if params != {}:
         result.update(params)
     return result
Пример #24
0
    def test_preproc(self):
        """
        As a domain expert, I have a particular pre-processing that I believe
        reveals important patterns in my data.  I would like to know how good
        a classifier can be built on top of my preprocessing algorithm.
        """

        # -- for testing purpose, suppose that the RBM is our "domain-specific
        #    pre-processing"

        algo = SklearnClassifier(
            partial(
                hyperopt_estimator,
                preprocessing=hp.choice('pp',
                    [
                        # -- VQ (alone)
                        [
                            hpc.colkmeans('vq0',
                                n_init=1),
                        ],
                        # -- VQ -> RBM
                        [
                            hpc.colkmeans('vq1',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq1.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:alone',
                                verbose=0)
                        ],
                        # -- VQ -> RBM -> PCA
                        [
                            hpc.colkmeans('vq2',
                                n_clusters=scope.int(
                                    hp.quniform(
                                        'vq2.n_clusters', 1, 5, q=1)),
                                n_init=1),
                            hpc.rbm(name='rbm:pre-pca',
                                verbose=0),
                            hpc.pca('pca')
                        ],
                    ]),
                classifier=hpc.any_classifier('classif'),
                algo=tpe.suggest,
                max_evals=10,
                ))
        mean_test_error = self.view.protocol(algo)
        print('mean test error:', mean_test_error)
Пример #25
0
def ts_lagselector(name, lower_lags=1, upper_lags=1):
    rval = scope.ts_LagSelector(
        lag_size=scope.int(
            hp.quniform(name + '.lags', 
                        lower_lags - .5, upper_lags + .5, 1))
    )
    return rval
Пример #26
0
def linear_discriminant_analysis(name,
    solver=None,
    shrinkage=None,
    priors=None,
    n_components=None,
    store_covariance=False,
    tol=0.00001):

    def _name(msg):
        return '%s.%s_%s' % (name, 'lda', msg)

    solver_shrinkage = hp.choice(_name('solver_shrinkage_dual'),
                                     [('svd', None),
                                      ('lsqr', None),
                                      ('lsqr', 'auto'),
                                      ('eigen', None),
                                      ('eigen', 'auto')])

    rval = scope.sklearn_LinearDiscriminantAnalysis(
        solver=solver_shrinkage[0] if solver is None else solver,
        shrinkage=solver_shrinkage[1] if shrinkage is None else shrinkage,
        priors=priors,
        n_components=4 * scope.int(
            hp.qloguniform(
                _name('n_components'),
                low=np.log(0.51),
                high=np.log(30.5),
                q=1.0)) if n_components is None else n_components,
        store_covariance=store_covariance,
        tol=tol
        )
    return rval
Пример #27
0
def ts_lagselector(name, lower_lags=1, upper_lags=1):
    rval = scope.ts_LagSelector(
        lag_size=scope.int(
            hp.quniform(name + '.lags', 
                        lower_lags - .5, upper_lags + .5, 1))
    )
    return rval
    def test_sparse_random_projection(self):
        # restrict n_components to be less than or equal to data dimension
        # to prevent sklearn warnings from printing during tests
        n_components = scope.int(hp.quniform(
            'preprocessing.n_components', low=1, high=8, q=1
        ))
        model = hyperopt_estimator(
            classifier=components.gaussian_nb('classifier'),
            preprocessing=[
                components.sparse_random_projection(
                    'preprocessing',
                    n_components=n_components,
                )
            ],
            algo=rand.suggest,
            trial_timeout=5.0,
            max_evals=5,
        )

        X_train = np.random.randn(1000, 8)
        Y_train = (self.X_train[:, 0] > 0).astype('int')
        X_test = np.random.randn(1000, 8)
        Y_test = (self.X_test[:, 0] > 0).astype('int')

        model.fit(X_train, Y_train)
        model.score(X_test, Y_test)
Пример #29
0
class ExplainableBoostingMachineModel(TreeBasedModel):
    @staticmethod
    def build_estimator(args, train_data=None):
        feature_names = [f"featur_{i}" for i in range(train_data[0].shape[1])]
        return ExplainableBoostingClassifier(random_state=RANDOM_STATE,
                                             feature_names=feature_names,
                                             **args)

    hp_space = {
        "learning_rate":
        hp.loguniform("learning_rate", np.log(0.0001), np.log(1.0)),
        "max_bins":
        scope.int(hp.quniform("max_bins", 20, 400, 3)),
        "max_leaves":
        scope.int(hp.loguniform("max_leaves", np.log(2), np.log(100))),
    }
Пример #30
0
def passive_aggressive(name,
    loss=None,
    C=None,
    fit_intercept=False,
    n_iter=None,
    n_jobs=1,
    shuffle=True,
    random_state=None,
    verbose=False):

    def _name(msg):
        return '%s.%s_%s' % (name, 'sgd', msg)

    rval = scope.sklearn_PassiveAggressiveClassifier(
        loss=hp.choice(
            _name('loss'),
            ['hinge', 'squared_hinge']) if loss is None else loss,
        C=hp.lognormal(
            _name('learning_rate'),
            np.log(0.01),
            np.log(10),
            ) if C is None else C,
        fit_intercept=fit_intercept,
        n_iter=scope.int(
            hp.qloguniform(
                _name('n_iter'),
                np.log(1),
                np.log(1000),
                q=1,
                )) if n_iter is None else n_iter,
        n_jobs=n_jobs,
        random_state=_random_state(_name('rstate'), random_state),
        verbose=verbose
        )
    return rval
Пример #31
0
def linear_discriminant_analysis(name,
    solver=None,
    shrinkage=None,
    priors=None,
    n_components=None,
    store_covariance=False,
    tol=0.00001):

    def _name(msg):
        return '%s.%s_%s' % (name, 'lda', msg)

    solver_shrinkage = hp.choice(_name('solver_shrinkage_dual'),
                                     [('svd', None),
                                      ('lsqr', None),
                                      ('lsqr', 'auto'),
                                      ('eigen', None),
                                      ('eigen', 'auto')])

    rval = scope.sklearn_LinearDiscriminantAnalysis(
        solver=solver_shrinkage[0] if solver is None else solver,
        shrinkage=solver_shrinkage[1] if shrinkage is None else shrinkage,
        priors=priors,
        n_components=4 * scope.int(
            hp.qloguniform(
                _name('n_components'),
                low=np.log(0.51),
                high=np.log(30.5),
                q=1.0)) if n_components is None else n_components,
        store_covariance=store_covariance,
        tol=tol
        )
    return rval
Пример #32
0
    def test_sparse_random_projection(self):
        # restrict n_components to be less than or equal to data dimension
        # to prevent sklearn warnings from printing during tests
        n_components = scope.int(
            hp.quniform('preprocessing.n_components', low=1, high=8, q=1))
        model = hyperopt_estimator(
            classifier=components.gaussian_nb('classifier'),
            preprocessing=[
                components.sparse_random_projection(
                    'preprocessing',
                    n_components=n_components,
                )
            ],
            algo=rand.suggest,
            trial_timeout=5.0,
            max_evals=5,
        )

        X_train = np.random.randn(1000, 8)
        Y_train = (self.X_train[:, 0] > 0).astype('int')
        X_test = np.random.randn(1000, 8)
        Y_test = (self.X_test[:, 0] > 0).astype('int')

        model.fit(X_train, Y_train)
        model.score(X_test, Y_test)
Пример #33
0
def tfidf(
    name,
    analyzer=None,
    ngram_range=None,
    stop_words=None,
    lowercase=None,
    max_df=1.0,
    min_df=1,
    max_features=None,
    binary=None,
    norm=None,
    use_idf=False,
    smooth_idf=False,
    sublinear_tf=False,
):
    def _name(msg):
        return '%s.%s_%s' % (name, 'tfidf', msg)

    max_ngram = scope.int(hp.quniform(_name('max_ngram'), 1, 4, 1))

    rval = scope.sklearn_Tfidf(
        stop_words=hp.choice(_name('stop_words'), ['english', None])
        if analyzer is None else analyzer,
        lowercase=hp_bool(_name('lowercase'), )
        if lowercase is None else lowercase,
        max_df=max_df,
        min_df=min_df,
        binary=hp_bool(_name('binary'), ) if binary is None else binary,
        ngram_range=(1, max_ngram) if ngram_range is None else ngram_range,
        norm=norm,
        use_idf=use_idf,
        smooth_idf=smooth_idf,
        sublinear_tf=sublinear_tf,
    )
    return rval
Пример #34
0
def passive_aggressive(name,
    loss=None,
    C=None,
    fit_intercept=False,
    n_iter=None,
    n_jobs=1,
    shuffle=True,
    random_state=None,
    verbose=False):

    def _name(msg):
        return '%s.%s_%s' % (name, 'sgd', msg)

    rval = scope.sklearn_PassiveAggressiveClassifier(
        loss=hp.choice(
            _name('loss'),
            ['hinge', 'squared_hinge']) if loss is None else loss,
        C=hp.lognormal(
            _name('learning_rate'),
            np.log(0.01),
            np.log(10),
            ) if C is None else C,
        fit_intercept=fit_intercept,
        n_iter=scope.int(
            hp.qloguniform(
                _name('n_iter'),
                np.log(1),
                np.log(1000),
                q=1,
                )) if n_iter is None else n_iter,
        n_jobs=n_jobs,
        random_state=_random_state(_name('rstate'), random_state),
        verbose=verbose
        )
    return rval
Пример #35
0
def _svc_max_iter(name):
    return scope.patience_param(
        scope.int(
            hp.loguniform(
                name + '.max_iter',
                np.log(1e7),
                np.log(1e9))))
Пример #36
0
class KnnConf(ModelConf):
    param_space = {
        "n_neighbors": scope.int(hp.quniform("n_neighbors", 1, 50, 1))
    }
    name = "kneibors_classifier"

    def instance(self, param):
        return KNeighborsClassifier(**param)
Пример #37
0
def colkmeans(name,
              n_clusters=None,
              init=None,
              n_init=None,
              max_iter=None,
              tol=None,
              precompute_distances=True,
              verbose=0,
              random_state=None,
              copy_x=True,
              n_jobs=1):
    rval = scope.sklearn_ColumnKMeans(
        n_clusters=scope.int(
            hp.qloguniform(
                name + '.n_clusters',
                low=np.log(1.51),
                high=np.log(19.5),
                q=1.0)) if n_clusters is None else n_clusters,
        init=hp.choice(
            name + '.init',
            ['k-means++', 'random'],
            ) if init is None else init,
        n_init=hp.choice(
            name + '.n_init',
            [1, 2, 10, 20],
            ) if n_init is None else n_init,
        max_iter=scope.int(
            hp.qlognormal(
                name + '.max_iter',
                np.log(300),
                np.log(10),
                q=1,
                )) if max_iter is None else max_iter,
        tol=hp.lognormal(
            name + '.tol',
            np.log(0.0001),
            np.log(10),
            ) if tol is None else tol,
        precompute_distances=precompute_distances,
        verbose=verbose,
        random_state=random_state,
        copy_x=copy_x,
        n_jobs=n_jobs,
        )
    return rval
Пример #38
0
def random_forest(name,
                  n_estimators=None,
                  criterion=None,
                  max_features=None,
                  max_depth=None,
                  min_samples_split=None,
                  min_samples_leaf=None,
                  bootstrap=None,
                  oob_score=None,
                  n_jobs=1,
                  random_state=None,
                  verbose=False):

    def _name(msg):
        return '%s.%s_%s' % (name, 'random_forest', msg)

    """
    Out of bag estimation only available if bootstrap=True
    """

    bootstrap_oob = hp.choice(_name('bootstrap_oob'),
                              [(True, True),
                               (True, False),
                               (False, False)])

    rval = scope.sklearn_RandomForestClassifier(
        n_estimators=scope.int(hp.quniform(
            _name('n_estimators'),
            1, 50, 1)) if n_estimators is None else n_estimators,
        criterion=hp.choice(
            _name('criterion'),
            ['gini', 'entropy']) if criterion is None else criterion,
        max_features=hp.choice(
            _name('max_features'),
            ['sqrt', 'log2',
             None]) if max_features is None else max_features,
        max_depth=max_depth,
        min_samples_split=hp.quniform(
            _name('min_samples_split'),
            1, 10, 1) if min_samples_split is None else min_samples_split,
        min_samples_leaf=hp.quniform(
            _name('min_samples_leaf'),
            1, 5, 1) if min_samples_leaf is None else min_samples_leaf,
        bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap,
        oob_score=bootstrap_oob[1] if oob_score is None else oob_score,
        #bootstrap=hp.choice(
        #    _name('bootstrap'),
        #    [ True, False ] ) if bootstrap is None else bootstrap,
        #oob_score=hp.choice(
        #    _name('oob_score'),
        #    [ True, False ] ) if oob_score is None else oob_score,
        n_jobs=n_jobs,
        random_state=_random_state(_name('rstate'), random_state),
        verbose=verbose,
        )
    return rval
Пример #39
0
def rbm(name,
        n_components=None,
        learning_rate=None,
        batch_size=None,
        n_iter=None,
        verbose=False,
        random_state=None):

    def _name(msg):
        return '%s.%s_%s' % (name, 'rbm', msg)

    rval = scope.sklearn_BernoulliRBM(
        n_components=scope.int(
            hp.qloguniform(
                name + '.n_components',
                low=np.log(0.51),
                high=np.log(999.5),
                q=1.0)) if n_components is None else n_components,
        learning_rate=hp.lognormal(
            name + '.learning_rate',
            np.log(0.01),
            np.log(10),
        ) if learning_rate is None else learning_rate,
        batch_size=scope.int(
            hp.qloguniform(
                name + '.batch_size',
                np.log(1),
                np.log(100),
                q=1,
            )) if batch_size is None else batch_size,
        n_iter=scope.int(
            hp.qloguniform(
                name + '.n_iter',
                np.log(1),
                np.log(1000),  # -- max sweeps over the *whole* train set
                q=1,
            )) if n_iter is None else n_iter,
        verbose=verbose,
        random_state=_random_state(_name('rstate'), random_state),
    )
    return rval
Пример #40
0
def knn_regression(name,
        sparse_data=False,
        n_neighbors=None,
        weights=None,
        leaf_size=None,
        metric=None,
        p=None,
        **kwargs):

    def _name(msg):
        return '%s.%s_%s' % (name, 'knn_regression', msg)

    if sparse_data:
      metric_args = { 'metric':'euclidean' }
    else:
      metric_args = hp.pchoice(_name('metric'), [
        (0.05, { 'metric':'euclidean' }),
        (0.10, { 'metric':'manhattan' }),
        (0.10, { 'metric':'chebyshev' }),
        (0.10, { 'metric':'minkowski',
          'p':scope.int(hp.quniform(_name('minkowski_p'), 1, 5, 1))}),
        #(0.05, { 'metric':'wminkowski',
        #  'p':scope.int(hp.quniform(_name('wminkowski_p'), 1, 5, 1)),
        #  'w':hp.uniform( _name('wminkowski_w'), 0, 100 ) }),
      ] )

    rval = scope.sklearn_KNeighborsRegressor(
        n_neighbors=scope.int(hp.quniform(
            _name('n_neighbors'),
            0.5, 50, 1)) if n_neighbors is None else n_neighbors,
        weights=hp.choice(
            _name('weights'),
            ['uniform', 'distance']) if weights is None else weights,
        leaf_size=scope.int(hp.quniform(
            _name('leaf_size'),
            0.51, 100, 1)) if leaf_size is None else leaf_size,
        starstar_kwargs=metric_args
        )
    return rval
Пример #41
0
def pca(name, n_components=None, whiten=None, copy=True):
    rval = scope.sklearn_PCA(
        # -- qloguniform is missing a "scale" parameter so we
        #    lower the "high" parameter and multiply by 4 out front
        n_components=4 * scope.int(
            hp.qloguniform(
                name + '.n_components',
                low=np.log(0.51),
                high=np.log(30.5),
                q=1.0)) if n_components is None else n_components,
        whiten=hp_bool(
            name + '.whiten',
            ) if whiten is None else whiten,
        copy=copy,
        )
    return rval
Пример #42
0
def extra_trees_regressor(name,
                n_estimators=None,
                criterion=None,
                max_features=None,
                max_depth=None,
                min_samples_split=None,
                min_samples_leaf=None,
                bootstrap=None,
                oob_score=None,
                n_jobs=1,
                random_state=None,
                verbose=False):

    def _name(msg):
        return '%s.%s_%s' % (name, 'extra_trees', msg)

    bootstrap_oob = hp.choice(_name('bootstrap_oob'),
                              [(True, True),
                               (True, False),
                               (False, False)])

    rval = scope.sklearn_ExtraTreesRegressor(
        n_estimators=scope.int(hp.quniform(
            _name('n_estimators'),
            1, 50, 1)) if n_estimators is None else n_estimators,
        criterion=hp.choice(
            _name('criterion'),
            ['mse']) if criterion is None else criterion,
        max_features=hp.choice(
            _name('max_features'),
            ['auto', 'sqrt', 'log2',
             None]) if max_features is None else max_features,
        max_depth=max_depth,
        min_samples_split=hp.quniform(
            _name('min_samples_split'),
            1, 10, 1) if min_samples_split is None else min_samples_split,
        min_samples_leaf=hp.quniform(
            _name('min_samples_leaf'),
            1, 5, 1) if min_samples_leaf is None else min_samples_leaf,
        bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap,
        oob_score=bootstrap_oob[1] if oob_score is None else oob_score,
        n_jobs=n_jobs,
        random_state=_random_state(_name('rstate'), random_state),
        verbose=verbose,
        )
    return rval
Пример #43
0
def tfidf(name,
    analyzer=None,
    ngram_range=None,
    stop_words=None,
    lowercase=None,
    max_df=1.0,
    min_df=1,
    max_features=None,
    binary=None,
    norm=None,
    use_idf=False,
    smooth_idf=False,
    sublinear_tf=False,
    ):
    
    def _name(msg):
      return '%s.%s_%s' % (name, 'tfidf', msg)
    
    max_ngram=scope.int( hp.quniform(
        _name('max_ngram'),
        1, 4, 1 ) )

    rval = scope.sklearn_Tfidf(
        stop_words=hp.choice(
            _name('stop_words'),
            [ 'english', None ] ) if analyzer is None else analyzer,
        lowercase=hp_bool(
            _name('lowercase'),
            ) if lowercase is None else lowercase,
        max_df=max_df,
        min_df=min_df,
        binary=hp_bool(
            _name('binary'),
            ) if binary is None else binary,
        ngram_range=(1,max_ngram) if ngram_range is None else ngram_range,
        norm=norm,
        use_idf=use_idf,
        smooth_idf=smooth_idf,
        sublinear_tf=sublinear_tf,
        )
    return rval
Пример #44
0
def nystrom(name, n_components=None, kernel=None, max_components=np.Inf, copy=True):
    
    def _name(msg):
      return '%s.%s_%s' % (name, 'nystrom', msg)
  
    rval = scope.sklearn_Nystrom(
            n_components=4 * scope.int(
            hp.qloguniform(
                name + '.n_components',
                low=np.log(0.51),
                high=np.log(min(max_components / 4, 30.5)),
                q=1.0)) if n_components is None else n_components,
            kernel=hp.pchoice(
            _name('kernel'),
            [ (0.35, 'sigmoid'),
              (0.35, 'rbf'),
              (0.30, 'poly')]) if kernel is None else kernel,
            gamma=_svc_gamma('gamma'),
            coef0=hp.uniform(_name('coef0'), 0.0, 1.0)
        )
    return rval
Пример #45
0
def knn(name,
        n_neighbors=None,
        weights=None,
        algorithm=None,
        leaf_size=None,
        metric=None,
        p=None,
        **kwargs):

    def _name(msg):
        return '%s.%s_%s' % (name, 'knn', msg)

    """
    metric_arg = hp.choice( _name('metric'), [
      ('euclidean', None, None, None ),
      ('manhattan', None, None, None ),
      ('chebyshev', None, None, None ),
      ('minkowski', hp.quniform(_name('minkowski_p'), 1, 5, 1 ), None, None),
      ('wminkowski', hp.quniform(_name('wminkowski_p'), 1, 5, 1 ),
                      hp.uniform(_name('wminkowski_w'), 0, 100 ), None ),
      ('seuclidean', None, None, hp.uniform(_name('seuclidean_V'), 0, 100)),
      ('mahalanobis', None, None, hp.uniform(_name('mahalanobis_V'), 0, 100)),
    ])
    """
    """
    metric_args = hp.choice(_name('metric'), [
      { 'metric':'euclidean' },
      { 'metric':'manhattan' },
      { 'metric':'chebyshev' },
      { 'metric':'minkowski',
        'p':scope.int(hp.quniform(_name('minkowski_p'), 1, 5, 1))},
      { 'metric':'wminkowski',
        'p':scope.int(hp.quniform(_name('wminkowski_p'), 1, 5, 1)),
        'w':hp.uniform( _name('wminkowski_w'), 0, 100 ) },
      { 'metric':'seuclidean',
        'V':hp.uniform( _name('seuclidean_V'), 0, 100 ) },
      { 'metric':'mahalanobis',
        'V':hp.uniform( _name('mahalanobis_V'), 0, 100 ) },
    ] )
    """

    rval = scope.sklearn_KNeighborsClassifier(
        n_neighbors=scope.int(hp.quniform(
            _name('n_neighbors'),
            0.5, 50, 1)) if n_neighbors is None else n_neighbors,
        weights=hp.choice(
            _name('weights'),
            ['uniform', 'distance']) if weights is None else weights,
        algorithm=hp.choice(
            _name('algorithm'),
            ['ball_tree', 'kd_tree',
             'brute', 'auto']) if algorithm is None else algorithm,
        leaf_size=scope.int(hp.quniform(
            _name('leaf_size'),
            0.51, 100, 1)) if leaf_size is None else leaf_size,
        #TODO: more metrics available
        ###metric_args,
        ##metric=metric_arg[0] if metric is None else metric,
        ##p=metric_arg[1],
        ##w=metric_arg[2],
        ##V=metric_arg[3],
        #metric=hp.choice(
        #    _name('metric'),
        #    [ 'euclidean', 'manhattan', 'chebyshev',
        #      'minkowski' ] ) if metric is None else metric,
        #p=hp.quniform(
        #    _name('p'),
        #    1, 5, 1 ) if p is None else p,
        )
    return rval
Пример #46
0
from hyperopt import hp
from hyperopt.pyll import scope

space = {'lrate': scope.int(hp.quniform('lrate', -0.50001, 10.49999, 1)),
         'l2_reg': scope.int(hp.quniform('l2_reg', -0.50001, 5.49999, 1)),
         'batchsize': scope.int(hp.quniform('batchsize', -0.50001, 7.49999, 1)),
         'n_epochs': scope.int(hp.quniform('n_epochs', -0.50001, 9.49999, 1))}
Пример #47
0
def _knn_neighbors(name):
    return scope.int(hp.qloguniform(name, np.log(0.5), np.log(50.5), 1))
Пример #48
0
def _trees_n_estimators(name):
    return scope.int(hp.qloguniform(name, np.log(9.5), np.log(3000.5), 1))
Пример #49
0
def convnet_space_to_tpe(convnet_space):
    """
        Convert a search space defined as ConvNetSearchSpace
        to the TPE format.

        returns: search space in the TPE format.
    """
    assert(isinstance(convnet_space, ConvNetSearchSpace))
    params = []
    #params = {}

    params.append({"format": "tpe"})

    preprocessing_params = convnet_space.get_preprocessing_parameter_subspace()
    params.append(subspace_to_tpe("preprocessing", preprocessing_params))
    #add_to_dict(params, subspace_to_tpe("preprocessing", preprocessing_params))

    network_params = convnet_space.get_network_parameter_subspace()
    if isinstance(network_params["num_conv_layers"], Parameter):
        assert network_params["num_conv_layers"].min_val == 0
    if isinstance(network_params["num_fc_layers"], Parameter):
        assert network_params["num_fc_layers"].min_val == 1

    #in hyperopt we will represent the number of conv layers as a choice object
    #that's why we can strip them here:
    #num_conv_layers = network_params.pop("num_conv_layers")
    #num_fc_layers = network_params.pop("num_fc_layers")

    network_param_subspace = subspace_to_tpe("network", network_params)
    params.append(network_param_subspace)
    #add_to_dict(params, network_param_subspace)

    #Convolutional layers:
    conv_layer_subspaces = []

    for layer_id in range(1, convnet_space.max_conv_layers+1):
        conv_layer_params = convnet_space.get_conv_layer_subspace(layer_id)
        label = "conv-layer-%d" % (layer_id)
        conv_layer_subspace = subspace_to_tpe(label,
                                              conv_layer_params)
        conv_layer_subspaces.append(conv_layer_subspace)


    #to stay consistent with the fc layers we reverse the order, see below
    conv_layer_subspaces.reverse()

    conv_layers_combinations = get_stacked_layers_subspace(conv_layer_subspaces)

#    conv_layers_combinations.insert(0, []) #no conv layers
#    if isinstance(num_conv_layers, int):
#        #fixed number of layers
#        conv_layers_space = conv_layers_combinations[num_conv_layers]
#    else:
#        conv_layers_space = hp.choice('num_conv_layers', conv_layers_combinations)

    #Unfortunately scope.switch is not supported by the converter!
    conv_layers_space = scope.switch(scope.int(network_param_subspace["network/num_conv_layers"]),
                                     [],#no conv layers
                                     *conv_layers_combinations)


    params.append(conv_layers_space)
    #add_to_dict(params, {"conv-layers": conv_layers_space})

    #Fully connected layers
    fc_layer_subspaces = []

    for layer_id in range(1, convnet_space.max_fc_layers+1):
        fc_layer_params = convnet_space.get_fc_layer_subspace(layer_id)
        label = "fc-layer-%d" % (layer_id)
        fc_layer_subspace = subspace_to_tpe(label,
                                                    fc_layer_params)
        fc_layer_subspaces.append(fc_layer_subspace)

    """
        We always want the last layer to show up, because it has special parameters.
        [[fc3], [fc2, fc3], [fc1, fc2, fc3]]
    """
    fc_layer_subspaces.reverse()

    fc_layers_combinations = get_stacked_layers_subspace(fc_layer_subspaces)

#    if isinstance(num_fc_layers, int):
#        #fixed number of layers
#        fc_layers_space = fc_layers_combinations[num_fc_layers]
#    else:
#        fc_layers_space = hp.choice("num_fc_layers",
#                                    fc_layers_combinations)

    fc_layers_space = scope.switch(scope.int(network_param_subspace["network/num_fc_layers"]),
                                     None,#no fc layers
                                     *fc_layers_combinations)

    params.append(fc_layers_space)
    #add_to_dict(params, {"fc-layers": fc_layers_space})

    return params
Пример #50
0
def _trees_min_samples_leaf(name):
    return hp.choice(name, [
        1,  # most common choice.
        scope.int(hp.qloguniform(name + '.gt1', np.log(1.5), np.log(50.5), 1))
    ])
Пример #51
0
weight_norm_0 = hp.uniform("weight_norm_0", 0.25, 8)
weight_norm_1 = hp.uniform("weight_norm_1", 0.25, 8)
weight_norm_2 = hp.uniform("weight_norm_2", 0.25, 8)
weight_norm_3 = hp.uniform("weight_norm_3", 0.25, 8)
weight_norm_4 = hp.uniform("weight_norm_4", 0.25, 8)
weight_norm_5 = hp.uniform("weight_norm_5", 0.25, 8)

dropout_0 = hp.uniform("dropout_0", 0, 0.8)
dropout_1 = hp.uniform("dropout_1", 0, 0.8)
dropout_2 = hp.uniform("dropout_2", 0, 0.8)
dropout_3 = hp.uniform("dropout_3", 0, 0.8)
dropout_4 = hp.uniform("dropout_4", 0, 0.8)
dropout_5 = hp.uniform("dropout_5", 0, 0.8)

space = scope.switch(
    scope.int(depth),
    {"depth": 0, "log_base_epsilon_0": log_base_epsilon_0, "weight_norm_0": weight_norm_0, "dropout_0": dropout_0},
    {
        "depth": 1,
        "log_base_epsilon_0": log_base_epsilon_0,
        "weight_norm_0": weight_norm_0,
        "dropout_0": dropout_0,
        "log_base_epsilon_1": log_base_epsilon_1,
        "weight_norm_1": weight_norm_1,
        "dropout_1": dropout_1,
        "num_units_1": num_units_1,
    },
    {
        "depth": 2,
        "log_base_epsilon_0": log_base_epsilon_0,
        "weight_norm_0": weight_norm_0,
import copy
from collections import OrderedDict
import numpy as np

try:
    from hyperopt.pyll import scope
except ImportError:
    print 'Trying standalone pyll'
    from pyll import scope
from hyperopt.pyll_utils import hp_uniform, hp_loguniform, hp_quniform, hp_qloguniform
from hyperopt.pyll_utils import hp_normal, hp_lognormal, hp_qnormal, hp_qlognormal
from hyperopt.pyll_utils import hp_choice


num_filters1 = scope.int(hp_qloguniform('num_filters1',np.log(16), np.log(96), q=16))
filter1_size = scope.int(hp_quniform('filter1_shape', 2, 12, 1))

num_filters2 = scope.int(hp_qloguniform('num_filters2',np.log(16), np.log(96), q=16))
filter2_size = scope.int(hp_quniform('filter2_shape', 2, 12, 1))

num_filters3 = scope.int(hp_qloguniform('num_filters3',np.log(16), np.log(96), q=16))
filter3_size = scope.int(hp_quniform('filter3_shape', 2, 9, 1))

num_filters4 = scope.int(hp_qloguniform('num_filters4',np.log(16), np.log(64), q=16))
filter4_size = scope.int(hp_quniform('filter4_shape', 2, 9, 1))

pool1_sizex = scope.int(hp_quniform('pool1_sizex', 2, 5, 1))
pool1_type = hp_choice('pool1_type', ['max', 'avg', hp_uniform('pool_order_1', 1, 12)])

pool2_sizex = scope.int(hp_quniform('pool2_sizex', 2, 5, 1))
pool2_type = hp_choice('pool2_type', ['max', 'avg', hp_uniform('pool_order_2', 1, 4)])
Пример #53
0
def _boosting_n_estimators(name):
    return scope.int(hp.qloguniform(name, np.log(10.5), np.log(1000.5), 1))
Пример #54
0
      '_factory': modelFactory,
      'types' : ['phi', 'psi'],
      'sincos': True},
     {'_class': DihedralFeaturizer,
      '_factory': modelFactory,
      'types': ['phi', 'psi', 'chi1'],
      'sincos': True},
     {'_class': DihedralFeaturizer,
      '_factory': modelFactory,
      'types': ['phi', 'psi', 'chi1', 'chi2'],
      'sincos': True},
 ]),
 hp.choice('preprocessing', [
     {'_class': PCA,
      '_factory': modelFactory,
      'n_components': scope.int(hp.quniform('pca_n_components', 2, 20, 1)),
      'copy': False},
     {'_class': tICA,
      '_factory': modelFactory,
      'n_components': scope.int(hp.quniform('tica_n_components', 2, 20, 1)),
      'gamma': hp.choice('tica_gamma', [0, 1e-7, 1e-5, 1e-3, 1e-1]),
      'weighted_transform': hp.choice('tica_weighted_transform', [True, False])
    }
 ]),
 hp.choice('cluster', [
     {'_class': MiniBatchKMeans,
      '_factory': modelFactory,
      'n_clusters': scope.int(hp.quniform('kmeans_n_clusters', 10, 1000, 10)),
      'batch_size': 10000,
      'n_init': 1,
      },
import copy
from collections import OrderedDict
import numpy as np

try:
    from hyperopt.pyll import scope
except ImportError:
    print 'Trying standalone pyll'
    from pyll import scope
from hyperopt.pyll_utils import hp_uniform, hp_loguniform, hp_quniform, hp_qloguniform
from hyperopt.pyll_utils import hp_normal, hp_lognormal, hp_qnormal, hp_qlognormal
from hyperopt.pyll_utils import hp_choice


num_filters1 = scope.int(hp_quniform('num_filters1', 32, 128, 16))
filter1_size = scope.int(hp_quniform('filter1_shape', 5, 12, 1))

num_filters2 = scope.int(hp_quniform('num_filters2', 64, 400, 16))
filter2_size = scope.int(hp_quniform('filter2_shape', 4, 7, 1))

num_filters3 = scope.int(hp_quniform('num_filters3', 64, 400, 16))
filter3_size = scope.int(hp_quniform('filter3_shape', 3, 5, 1))

num_filters4 = scope.int(hp_quniform('num_filters4', 64, 400, 16))
filter4_size = scope.int(hp_quniform('filter4_shape', 3, 4, 1))

num_filters5 = scope.int(hp_quniform('num_filters5', 64, 400, 16))
filter5_size = scope.int(hp_quniform('filter5_shape', 2, 3, 1))

pool1_sizex = scope.int(hp_quniform('pool1_sizex', 2, 4, 1))
pool1_type = hp_choice('pool1_type', ['max', 'avg', hp_uniform('pool_order_1', 1, 4)])
Пример #56
0
from hyperopt import hp
from hyperopt.pyll import scope


space = {"lrate" : hp.uniform("lrate", 0, 10),
         "l2_reg" : hp.uniform("l2_reg", 0, 1),
         "batchsize" : scope.int(hp.quniform("batchsize", 20, 2000, 1)),
         "n_epochs" : scope.int(hp.quniform("n_epochs", 5, 2000, 1))}
from hyperopt import hp
from hyperopt.pyll import scope

pca = {'preprocessing': 'pca', 'pca:keep_variance': scope.int(
    hp.quniform('pca:keep_variance', 0, 1, 1))}

penalty_and_loss = hp.choice('penalty_and_loss',
                             [{'liblinear:penalty': 'l1', 'liblinear:loss': 'l2'},
                              {'liblinear:penalty': 'l2', 'liblinear:loss': 'l1'},
                              {'liblinear:penalty': 'l2', 'liblinear:loss': 'l2'}])
liblinear_LOG2_C = scope.int(hp.quniform('liblinear:LOG2_C', -5, 15, 1))
liblinear = {'classifier': 'liblinear', 'liblinear:penalty_and_loss': penalty_and_loss, 'liblinear:LOG2_C': liblinear_LOG2_C}

libsvm_LOG2_C = scope.int(hp.quniform('libsvm_svc:LOG2_C', -5, 15, 1))
libsvm_LOG2_gamma = scope.int(hp.quniform('libsvm_svc:LOG2_gamma', -15, 3, 1))
libsvm_svc = {'classifier': 'libsvm_svc', 'libsvm_svc:LOG2_C': libsvm_LOG2_C, 'libsvm_svc:LOG2_gamma': libsvm_LOG2_gamma}
criterion = hp.choice('random_forest:criterion', ['gini', 'entropy'])
max_features = scope.int(hp.quniform('random_forest:max_features', 1, 10, 1))
min_samples_split = scope.int(hp.quniform('random_forest:min_samples_split', 0, 4, 1))
random_forest = {'classifier': 'random_forest', 'random_forest:criterion': criterion, 'random_forest:max_features': max_features, 'random_forest:min_samples_split': min_samples_split}

preprocessors = {'None': 'None', 'pca': pca}
classifiers = {'libsvm_svc': libsvm_svc,
               'liblinear': liblinear,
               'random_forest': random_forest}

space = {'classifier': hp.choice('classifier', classifiers.values()),
         'preprocessing': hp.choice('preprocessing', preprocessors.values())}
Пример #58
0
 def quniform_int(label, *args, **kwargs):
     return scope.int(
         scope.hyperopt_param(label,
                              scope.quniform(*args, **kwargs)))