def __init__(self, data, device, iter_num, timer, n_class, **args): self.data = data.to(device) self.device = device self.timer = timer self.n_class = n_class self.iter_num = iter_num self.flag_end = False self.params = { 'features_num': self.data.x.size()[1], 'num_class': self.n_class, #'epoches': 150, } self.space = { 'num_layers': scope.int(hp.choice('num_layers', [1, 2])), 'agg': hp.choice('agg', ['concat', 'self']), 'hidden': scope.int(hp.quniform('hidden', 4, 128, 1)), 'hidden2': scope.int(hp.quniform('hidden2', 4, 64, 1)), 'dropout': hp.uniform('dropout', 0.1, 0.9), 'lr': hp.loguniform('lr', np.log(0.001), np.log(1.0)), 'epoches': scope.int(hp.quniform('epoches', 100, 300, 10)), 'weight_decay': hp.loguniform('weight_decay', np.log(1e-4), np.log(1e-2)) } self.points = [{ 'num_layers': 2, 'agg': 'concat', 'hidden': 64, 'hidden2': 32, 'dropout': 0.5, 'lr': 0.005, 'epoches': 200, 'weight_decay': 5e-3, },]
def run(data_path, num_trials): X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl")) X_valid, y_valid = load_pickle(os.path.join(data_path, "valid.pkl")) def objective(params): with mlflow.start_run(): rf = RandomForestRegressor(**params) rf.fit(X_train, y_train) y_pred = rf.predict(X_valid) rmse = mean_squared_error(y_valid, y_pred, squared=False) mlflow.log_metric("rmse", rmse) return {'loss': rmse, 'status': STATUS_OK} search_space = { 'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)), 'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 1)), 'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)), 'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)), 'random_state': 42 } rstate = np.random.default_rng(42) # for reproducible results fmin( fn=objective, space=search_space, algo=tpe.suggest, max_evals=num_trials, trials=Trials(), rstate=rstate )
def get_xgboost_params(name="xgboost_common"): return scope.get_xgb_model( n_estimators=scope.int( hp.quniform( get_full_name(name, "n_estimators"), 1, 200, 1, ), ), max_depth=scope.int( hp.quniform( get_full_name(name, 'max_depth'), 1, 13, 1, ), ), min_child_weight=scope.int( hp.quniform( get_full_name(name, 'min_child_weight'), 1, 6, 1, ), ), subsample=scope.int( hp.uniform( get_full_name(name, 'subsample'), 0.5, 1, ), ), gamma=hp.uniform( get_full_name(name, 'gamma'), 0.5, 1, ), nthread=1, seed=RANDOM_STATE, )
def CNN_Tuning(Tuning_function, features, labels, n_worker, name, n_cv, train_rate, max_eval, conv_key, gpu_key, dropout_key, n_fit): params = { 'n_units1': scope.int(hp.quniform('n_units1', 100, 300, 100)), 'n_units2': scope.int(hp.quniform('n_units2', 100, 300, 100)), 'n_units3': scope.int(hp.quniform('n_units3', 100, 300, 100)), 'n_units4': scope.int(hp.quniform('n_units4', 100, 300, 100)), 'n_units5': scope.int(hp.quniform('n_units5', 100, 300, 100)), 'n_units6': scope.int(hp.quniform('n_units6', 100, 300, 100)), 'layer_num': scope.int(hp.quniform('layer_num', 2, 7, 1)), 'activate': hp.choice('activate', ('relu', 'leaky_relu')), 'epoch': scope.int(hp.quniform('epoch', 50, 110, 10)), 'batch_size': scope.int(hp.quniform('batch_size', 40, 200, 40)), 'c_out': scope.int(hp.quniform('c_out', 20, 40, 10)), } tuning_object = Tuning_Object(Tuning_function, features, labels, n_worker, name, n_cv, train_rate, conv_key, gpu_key, dropout_key, n_fit) best = fmin(tuning_object, params, algo=tpe.suggest, max_evals=max_eval, rstate=np.random.RandomState(0)) best = hyperopt.space_eval(params, best) return best
def get_hyperopt_space(self, params={}, random_state=None): if random_state is None: random_state = self.random_state result = { 'n_estimators': scope.int(hp.quniform('n_estimators', 100, 1000, 1)), 'eta': hp.quniform('eta', 0.025, 0.5, 0.025), # A problem with max_depth casted to float instead of int with # the hp.quniform method. 'max_depth': scope.int(hp.quniform('max_depth', 1, 14, 1)), 'min_child_weight': hp.quniform('min_child_weight', 1, 6, 1), 'subsample': hp.quniform('subsample', 0.5, 1, 0.05), 'gamma': hp.quniform('gamma', 0.5, 1, 0.05), 'colsample_bytree': hp.quniform('colsample_bytree', 0.5, 1, 0.05), 'eval_metric': 'auc', 'objective': 'binary:logistic', # Increase this number if you have more cores. Otherwise, remove it and it will default # to the maxium number. # 'nthread': 4, 'booster': 'gbtree', 'tree_method': 'exact', 'silent': 1, 'seed': random_state } if params != {}: result.update(params) return result
def get_xgboost_model(name="xgboost_common"): return scope.get_xgb_model( n_estimators=scope.int( hp.quniform( get_full_name(name, "n_estimators"), 1, 200, 1, ), ), max_depth=scope.int( hp.quniform( get_full_name(name, 'max_depth'), 1, 13, 1, ), ), min_child_weight=scope.int( hp.quniform( get_full_name(name, 'min_child_weight'), 1, 6, 1, ), ), subsample=scope.int( hp.uniform( get_full_name(name, 'subsample'), 0.5, 1, ), ), gamma=hp.uniform( get_full_name(name, 'gamma'), 0.5, 1, ), nthread=1, seed=RANDOM_STATE, )
def visitSearchSpaceNumber(self, space: SearchSpaceNumber, path: str, counter=None): label = self.mk_label(path, counter) if space.pgo is not None: return scope.pgo_sample( space.pgo, hp.quniform(label, 0, len(space.pgo) - 1, 1) ) dist = "uniform" if space.distribution: dist = space.distribution if space.maximum is None: raise SearchSpaceError( path, f"maximum not specified for a number with distribution {dist}" ) max = space.getInclusiveMax() # if the maximum is not None, the inclusive maximum should not be none assert max is not None # These distributions need only a maximum if dist == "integer": if not space.discrete: raise SearchSpaceError( path, "integer distribution specified for a non discrete numeric type", ) return hp.randint(label, max) if space.minimum is None: raise SearchSpaceError( path, f"minimum not specified for a number with distribution {dist}" ) min = space.getInclusiveMin() # if the minimum is not None, the inclusive minimum should not be none assert min is not None if dist == "uniform": if space.discrete: return scope.int(hp.quniform(label, min, max, 1)) else: return hp.uniform(label, min, max) elif dist == "loguniform": # for log distributions, hyperopt requires that we provide the log of the min/max if min <= 0: raise SearchSpaceError( path, f"minimum of 0 specified with a {dist} distribution. This is not allowed; please set it (possibly using minimumForOptimizer) to be positive", ) if min > 0: min = math.log(min) if max > 0: max = math.log(max) if space.discrete: return scope.int(hp.qloguniform(label, min, max, 1)) else: return hp.loguniform(label, min, max) else: raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
def bernoulli_rbm_hp_space(n_components=None, learning_rate=None, batch_size=None, n_iter=None, verbose=False, random_state=None): rval = dict( n_components=scope.int( hp.qloguniform( 'n_components', low=np.log(0.51), high=np.log(999.5), q=1.0)) if n_components is None else n_components, learning_rate=hp.lognormal( 'learning_rate', np.log(0.01), np.log(10), ) if learning_rate is None else learning_rate, batch_size=scope.int( hp.qloguniform( '.batch_size', np.log(1), np.log(100), q=1, )) if batch_size is None else batch_size, n_iter=scope.int( hp.qloguniform( 'n_iter', np.log(1), np.log(1000), # -- max sweeps over the *whole* train set q=1, )) if n_iter is None else n_iter, verbose=verbose, random_state=_random_state('rstate', random_state), ) return rval
class DecisionTreeModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return DecisionTreeRegressor(random_state=RANDOM_STATE, presort=True, **args) hp_space = { "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]), "max_depth": hp.pchoice( "max_depth_enabled", [ (0.7, None), (0.3, 1 + scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))), ], ), "splitter": hp.choice("splitter_str", ["best", "random"]), "max_features": hp.pchoice( "max_features_str", [ (0.2, "sqrt"), # most common choice. (0.1, "log2"), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)), ], ), "min_samples_split": scope.int(hp.quniform("min_samples_split_str", 2, 10, 1)), "min_samples_leaf": hp.choice( "min_samples_leaf_enabled", [ 1, scope.int( hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1) ), ], ), }
def optimize(cls, trials, score, evals_rounds, mon_cons, categorical): """ This function specifies the hyperparameter search space and minimises the score function :param trials: hyperopt.Trials hyperopt trials object responsible for the hyperparameter search :param score: function the loss or score function to be minimised :param evals_rounds: int number of evaluation rounds for hyperparameter tuning :param mon_cons: str(tuple) for xgboost, tuple for lightgbm index of monotonic constraints :param categorical: list index of categorical feature for lightgbm :return best: dict the best hyperparameters """ space = { "n_estimators": scope.int(hp.quniform("n_estimators", 10, 3000, 5)), "learning_rate": hp.quniform("learning_rate", 0.05, 0.3, 0.025), "max_depth": scope.int(hp.quniform("max_depth", 1, 20, 1)), "num_leaves": scope.int(hp.quniform("num_leaves", 2, 1024, 2)), "min_child_samples": scope.int(hp.quniform("min_child_samples", 2, 100, 1)), "subsample": hp.quniform("subsample", 0.6, 1, 0.05), # bagging_fraction "colsample_bytree": hp.quniform("colsample_bytree", 0.4, 1, 0.1), # feature_fraction "min_sum_hessian_in_leaf": hp.quniform("min_sum_hessian_in_leaf", 0.001, 0.9, 0.001), "reg_lambda": hp.quniform("reg_lambda", 0.01, 1, 0.01), "reg_alpha": hp.quniform("reg_alpha", 1, 10, 0.01), "monotone_constraints": mon_cons, # 'categorical_feature': categorical } best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=evals_rounds) # Convert the relevant hyperparameters to int best["n_estimators"] = int(best["n_estimators"]) best["max_depth"] = int(best["max_depth"]) best["num_leaves"] = int(best["num_leaves"]) best["min_child_samples"] = int(best["min_child_samples"]) logger.info("BEST_PARAMETERS") logger.info(best) return best
def get_hyperopt(self, label): from hyperopt import hp from hyperopt.pyll import scope if self.log: return scope.int( hp.qloguniform(label, np.log(self.lower), np.log(self.upper), 1)) else: return scope.int(hp.quniform(label, self.lower, self.upper, 1))
class RandomForestConf(ModelConf): param_space = { 'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)), 'max_features': scope.int(hp.quniform('max_features', 1, 150, 1)), 'n_estimators': scope.int(hp.quniform('n_estimators', 100, 500, 1)), 'criterion': hp.choice('criterion', ["gini", "entropy"]) } name = "random_forest" def instance(self, param): return RandomForestClassifier(**param)
def optimize( # trials, random_state=SEED): """ This is the optimization function that given a space (space here) of hyperparameters and a scoring function (score here), finds the best hyperparameters. """ space = { 'max_depth': scope.int(hp.uniform('max_depth', 5, 15)), 'subsample': hp.uniform('subsample', 0.03, 1), 'learning_rate': hp.loguniform('learning_rate', np.log(0.005), np.log(0.5)) - 0.0001, 'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1), 'reg_alpha': hp.loguniform('reg_alpha', np.log(0.005), np.log(5)) - 0.0001, 'reg_lambda': hp.loguniform('reg_lambda', np.log(1), np.log(5)), 'bagging_freq': hp.choice('bagging_freq', [0, 1]), 'num_leaves': scope.int(hp.uniform('num_leaves', 10, 128)), 'n_estimators': 1000, 'boosting': 'gbdt', 'objective': 'multiclass', 'num_class': 12, 'metric': 'None', 'is_unbalance': 'true', # 'min_data_per_group': 1000, 'verbose': -1, 'random_seed': 42, } # Use the fmin function from Hyperopt to find the best hyperparameters best = fmin( score_model, space, algo=tpe.suggest, # trials=trials, max_evals=hyperopt_niters) return best
def param_space(self) -> Dict[str, Any]: return { 'batch_size': hp.choice('batch_size', options=[2 ** x for x in range(4, 6 + 1)]), 'learning_rate': hp.loguniform('learning_rate', low=np.log(0.0001), high=np.log(1)), 'num_blocks': scope.int(hp.quniform('num_blocks', low=2, high=6, q=1)), 'block_size': scope.int(hp.quniform('block_size', low=1, high=3, q=1)), 'fcl_num_layers': scope.int(hp.quniform('fcl_num_layers', low=1, high=4, q=1)), 'fcl_layer_size': hp.choice('fcl_layer_size', options=[512, 768, 1024, 1536]), 'fcl_dropout_rate': hp.quniform('fcl_dropout_rate', low=0.05, high=0.5, q=0.05), 'activation': hp.choice('activation', options=['relu', 'selu', 'tanh']), 'optimizer': hp.choice('optimizer', options=['adam', 'adamax', 'nadam', 'rms-prop']) }
class GradientBoostingModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return GradientBoostingRegressor(random_state=RANDOM_STATE, presort=True, **args) loss_alpha = hp.choice('loss_alpha', [('ls', 0.9), ('lad', 0.9), ('huber', hp.uniform('gbr_alpha', 0.85, 0.95)), ('quantile', 0.5)]) hp_space = { 'n_estimators': scope.int( hp.qloguniform('n_estimators', np.log(10.5), np.log(1000.5), 1)), 'learning_rate': hp.lognormal('learning_rate', np.log(0.01), np.log(10.0)), 'criterion': hp.choice('criterion', ['mse', 'friedman_mse', 'mae']), 'max_depth': hp.pchoice('max_depth', [(0.2, 2), (0.5, 3), (0.2, 4), (0.1, 5)]), 'min_samples_leaf': hp.choice( 'min_samples_leaf_enabled', [ 1, # most common choice. scope.int( hp.qloguniform('min_samples_leaf', np.log(1.5), np.log(50.5), 1)) ]), 'subsample': hp.pchoice( 'subsample_enabled', [ (0.2, 1.0), # default choice. (0.8, hp.uniform('subsample', 0.5, 1.0) ) # stochastic grad boosting. ]), 'max_features': hp.pchoice( 'max_features_str', [ (0.1, 'sqrt'), # most common choice. (0.2, 'log2'), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform('max_features_str_frac', 0., 1.)) ]), 'loss': loss_alpha[0], 'alpha': loss_alpha[1] }
class GradientBoostingModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return GradientBoostingRegressor( random_state=RANDOM_STATE, presort=True, **args ) loss_alpha = hp.choice( "loss_alpha", [ ("ls", 0.9), ("lad", 0.9), ("huber", hp.uniform("gbr_alpha", 0.85, 0.95)), ("quantile", 0.5), ], ) hp_space = { "n_estimators": scope.int( hp.qloguniform("n_estimators", np.log(10.5), np.log(1000.5), 1) ), "learning_rate": hp.lognormal("learning_rate", np.log(0.01), np.log(10.0)), "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]), "max_depth": hp.pchoice("max_depth", [(0.2, 2), (0.5, 3), (0.2, 4), (0.1, 5)]), "min_samples_leaf": hp.choice( "min_samples_leaf_enabled", [ 1, # most common choice. scope.int( hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1) ), ], ), "subsample": hp.pchoice( "subsample_enabled", [ (0.2, 1.0), # default choice. (0.8, hp.uniform("subsample", 0.5, 1.0)), # stochastic grad boosting. ], ), "max_features": hp.pchoice( "max_features_str", [ (0.1, "sqrt"), # most common choice. (0.2, "log2"), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)), ], ), "loss": loss_alpha[0], "alpha": loss_alpha[1], }
def hyperopt(X_train, X_test, y_train, y_test, param_space, num_eval): ##Setting HyperParamter Grid param_hyperopt={ 'learning rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)), 'max_depth': scope.int(hp.quniform('max_depth', 3, 15, 1)), 'n_estimators': scope.int(hp.quniform('n_estimators', 5, 100, 1)), 'num_leaves': scope.int(hp.quniform('num_leaves', 5, 50, 1)), 'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.6, 1.0), 'boosting_type': 'gbdt', } ##Defining Objective Function for Tuning def objective_function(params): #Evaluating LightGBM Classification Model on Tuning Parameters clf=lgb.LGBMClassifier(**params) evaluation = [(X_train, y_train), (X_test, y_test)] #Training Model clf.fit(X_train, y_train, eval_set=evaluation, eval_metric='auc', early_stopping_rounds=10, verbose=False) #Score Model on Validation to Obtain Predicted Probabilities preds=clf.predict_proba(X_test) preds=preds[:,1] #Adjusting Intercept of Predictions to Account for Oversampling Bias #Change to fit Target Proportion in Training and Oversampled Training newpreds=(preds * 0.8 * 0.02)/((1-preds) * 0.2 * 0.98 + preds * 0.8 * 0.02) #Evaluate Model and Adjust Hyperparamters to Maximize AUC auc=roc_auc_score(y_test, newpreds) print('Score:', auc) return {'loss': -auc, 'status': STATUS_OK} trials=Trials() #Parameter Tuning best_param=fmin(objective_function, param_space, algo=tpe.suggest, max_evals=num_eval, trials=trials, rstate=np.random.RandomState(1)) return best_param
class SvmConf(ModelConf): param_space = { 'C': hp.uniform('C', 0.1, 2.0), 'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid']), 'degree': scope.int(hp.quniform('degree', 2, 5, 1)), 'gamma': hp.choice('gamma', ['auto', 'scale']), 'tol': hp.loguniform('tol', np.log(1e-5), np.log(1e-2)), 'max_iter': scope.int(hp.quniform('max_iter', -1, 100, 1)) } name = "svm_classifier" def instance(self, param): return SVC(**param)
def hyperopt_constructor(loader: yaml.BaseLoader, suffix: str, node: yaml.Node): if suffix not in [ "choice", "pchoice", "normal", "qnormal", "lognormal", "qlognormal", "uniform", "quniform", "shiftedquniform", "loguniform", "qloguniform", "randint", ]: raise ValueError(f"{suffix} is not a valid function") from hyperopt import hp from hyperopt.pyll import scope loader.hp_label_inc = getattr(loader, "hp_label_inc", -1) + 1 label = f"label{loader.hp_label_inc}" func = getattr(hp, suffix, None) if suffix == "choice" or suffix == "pchoice": return func(label, loader.construct_sequence(node, deep=True)) if suffix == "shiftedquniform": # shift hp.uniform so that low is 0 and shift back after rounding kwargs = _construct_hyperopt_params(loader, node, ["low", "high", "q"]) low = kwargs["low"] kwargs["low"] = 0 kwargs["high"] -= low apply = getattr(hp, "quniform")(label, **kwargs) + low if isinstance(kwargs["q"], int) and isinstance(low, int): # convert to int if low and q are ints apply = scope.int(apply) return apply if suffix[0] == "q": if suffix == "quniform" or suffix == "qloguniform": args_order = ["low", "high", "q"] else: args_order = ["mu", "sigma", "q"] kwargs = _construct_hyperopt_params(loader, node, args_order) if isinstance(kwargs["q"], int): # convert to int if q is an int return scope.int(func(label, **kwargs)) return func(label, **kwargs) if isinstance(node, yaml.SequenceNode): return func(label, *loader.construct_sequence(node, deep=True)) else: return func(label, **loader.construct_mapping(node, deep=True))
def find_hyperopt(df_train: pd.DataFrame, folds: pd.DataFrame) -> Dict: log = logging.getLogger(__name__) cols_all, col_target = get_cols(df_train) results = {} space = { 'num_leaves': scope.int(hp.quniform('num_leaves', 3, 100, 1)), 'max_depth': scope.int(hp.quniform('max_depth', 10, 70, 1)), 'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 5, 150, 1)), 'feature_fraction': hp.uniform('feature_fraction', 0.85, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.85, 1.0), 'min_sum_hessian_in_leaf': hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3), 'lambda_l1': hp.uniform('lambda_l1', 1e-4, 2), 'lambda_l2': hp.uniform('lambda_l2', 1e-4, 2), 'seed': random_state, 'feature_fraction_seed': random_state, 'bagging_seed': random_state, 'drop_seed': random_state, 'data_random_seed': random_state, 'verbose': -1, 'bagging_freq': 5, 'max_bin': 255, 'learning_rate': 0.001, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', } for col in col_target: cols_all, _ = get_cols(df_train, col) def score(params): cv_score = CV_score(params=params, cols_all=cols_all, col_target=col, num_boost_round=99999999, early_stopping_rounds=50, valid=True) return cv_score.fit(df=df_train, folds=folds) trials = Trials() best = fmin(fn=score, space=space, algo=tpe.suggest, trials=trials, max_evals=max_evals) results[col] = space_eval(space, best) return results
def xgboost_classifier_bayesian_space(): return { "max_depth": scope.int(hp.quniform("x_max_depth", 1, 3, 1)), "n_estimators": scope.int(hp.quniform("x_n_estimators", 100, 1000, 1)), "min_child_weight": scope.int(hp.quniform("x_min_child", 1, 10, 1)), "subsample": hp.uniform("x_subsample", 0.5, 0.9), "gamma": hp.uniform("x_gamma", 0.0, 0.2), "colsample_bytree": hp.uniform("x_colsample_bytree", 0.5, 1.), "colsample_bylevel": hp.uniform("x_colsample_bylevel", 0.5, 1.), "colsample_bynode": hp.uniform("x_colsample_bynode", 0.5, 1.), #"max_delta_step": scope.int(hp.quniform("x_max_delta_step", 0, 8, 1)), "reg_lambda": hp.uniform("x_reg_lambda", 0, 1), "reg_alpha": hp.uniform("x_reg_alpha", 0, 1), "learning_rate": hp.uniform("x_learning_rate", 0.01, 0.5) }
def lgbm_hp_space(**kwargs): space = { 'n_estimators': scope.int(hp.quniform('n_estimators', 10, 700, 1)), 'num_leaves': scope.int(hp.quniform ('num_leaves', 10, 200, 1)), 'feature_fraction': hp.uniform('feature_fraction', 0.75, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.75, 1.0), 'learning_rate': hp.loguniform('learning_rate', -5.0, -2.3), 'max_bin': scope.int(hp.quniform('max_bin', 64, 512, 1)), 'bagging_freq': scope.int(hp.quniform('bagging_freq', 1, 5, 1)), 'lambda_l1': hp.uniform('lambda_l1', 0, 10), 'lambda_l2': hp.uniform('lambda_l2', 0, 10), **kwargs } return space
def get_hyperopt_space(self, params={}, random_state=None): if random_state is None: random_state = self.random_state result = { 'num_leaves': scope.int(hp.quniform('num_leaves', 100, 500, 1)), 'max_depth': scope.int(hp.quniform('max_depth', 10, 70, 1)), 'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 10, 150, 1)), 'feature_fraction': hp.uniform('feature_fraction', 0.75, 1.0), 'bagging_fraction': hp.uniform('bagging_fraction', 0.75, 1.0), 'min_sum_hessian_in_leaf': hp.loguniform('min_sum_hessian_in_leaf', 0, 2.3), 'lambda_l1': hp.uniform('lambda_l1', 1e-4, 2), 'lambda_l2': hp.uniform('lambda_l2', 1e-4, 2), 'seed': random_state, 'feature_fraction_seed': random_state, 'bagging_seed': random_state, 'drop_seed': random_state, 'data_random_seed': random_state, 'verbose': -1, 'bagging_freq': 5, 'max_bin': 255, 'learning_rate': 0.03, 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', } if params != {}: result.update(params) return result
def test_preproc(self): """ As a domain expert, I have a particular pre-processing that I believe reveals important patterns in my data. I would like to know how good a classifier can be built on top of my preprocessing algorithm. """ # -- for testing purpose, suppose that the RBM is our "domain-specific # pre-processing" algo = SklearnClassifier( partial( hyperopt_estimator, preprocessing=hp.choice('pp', [ # -- VQ (alone) [ hpc.colkmeans('vq0', n_init=1), ], # -- VQ -> RBM [ hpc.colkmeans('vq1', n_clusters=scope.int( hp.quniform( 'vq1.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:alone', verbose=0) ], # -- VQ -> RBM -> PCA [ hpc.colkmeans('vq2', n_clusters=scope.int( hp.quniform( 'vq2.n_clusters', 1, 5, q=1)), n_init=1), hpc.rbm(name='rbm:pre-pca', verbose=0), hpc.pca('pca') ], ]), classifier=hpc.any_classifier('classif'), algo=tpe.suggest, max_evals=10, )) mean_test_error = self.view.protocol(algo) print('mean test error:', mean_test_error)
def ts_lagselector(name, lower_lags=1, upper_lags=1): rval = scope.ts_LagSelector( lag_size=scope.int( hp.quniform(name + '.lags', lower_lags - .5, upper_lags + .5, 1)) ) return rval
def linear_discriminant_analysis(name, solver=None, shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.00001): def _name(msg): return '%s.%s_%s' % (name, 'lda', msg) solver_shrinkage = hp.choice(_name('solver_shrinkage_dual'), [('svd', None), ('lsqr', None), ('lsqr', 'auto'), ('eigen', None), ('eigen', 'auto')]) rval = scope.sklearn_LinearDiscriminantAnalysis( solver=solver_shrinkage[0] if solver is None else solver, shrinkage=solver_shrinkage[1] if shrinkage is None else shrinkage, priors=priors, n_components=4 * scope.int( hp.qloguniform( _name('n_components'), low=np.log(0.51), high=np.log(30.5), q=1.0)) if n_components is None else n_components, store_covariance=store_covariance, tol=tol ) return rval
def test_sparse_random_projection(self): # restrict n_components to be less than or equal to data dimension # to prevent sklearn warnings from printing during tests n_components = scope.int(hp.quniform( 'preprocessing.n_components', low=1, high=8, q=1 )) model = hyperopt_estimator( classifier=components.gaussian_nb('classifier'), preprocessing=[ components.sparse_random_projection( 'preprocessing', n_components=n_components, ) ], algo=rand.suggest, trial_timeout=5.0, max_evals=5, ) X_train = np.random.randn(1000, 8) Y_train = (self.X_train[:, 0] > 0).astype('int') X_test = np.random.randn(1000, 8) Y_test = (self.X_test[:, 0] > 0).astype('int') model.fit(X_train, Y_train) model.score(X_test, Y_test)
class ExplainableBoostingMachineModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): feature_names = [f"featur_{i}" for i in range(train_data[0].shape[1])] return ExplainableBoostingClassifier(random_state=RANDOM_STATE, feature_names=feature_names, **args) hp_space = { "learning_rate": hp.loguniform("learning_rate", np.log(0.0001), np.log(1.0)), "max_bins": scope.int(hp.quniform("max_bins", 20, 400, 3)), "max_leaves": scope.int(hp.loguniform("max_leaves", np.log(2), np.log(100))), }
def passive_aggressive(name, loss=None, C=None, fit_intercept=False, n_iter=None, n_jobs=1, shuffle=True, random_state=None, verbose=False): def _name(msg): return '%s.%s_%s' % (name, 'sgd', msg) rval = scope.sklearn_PassiveAggressiveClassifier( loss=hp.choice( _name('loss'), ['hinge', 'squared_hinge']) if loss is None else loss, C=hp.lognormal( _name('learning_rate'), np.log(0.01), np.log(10), ) if C is None else C, fit_intercept=fit_intercept, n_iter=scope.int( hp.qloguniform( _name('n_iter'), np.log(1), np.log(1000), q=1, )) if n_iter is None else n_iter, n_jobs=n_jobs, random_state=_random_state(_name('rstate'), random_state), verbose=verbose ) return rval
def test_sparse_random_projection(self): # restrict n_components to be less than or equal to data dimension # to prevent sklearn warnings from printing during tests n_components = scope.int( hp.quniform('preprocessing.n_components', low=1, high=8, q=1)) model = hyperopt_estimator( classifier=components.gaussian_nb('classifier'), preprocessing=[ components.sparse_random_projection( 'preprocessing', n_components=n_components, ) ], algo=rand.suggest, trial_timeout=5.0, max_evals=5, ) X_train = np.random.randn(1000, 8) Y_train = (self.X_train[:, 0] > 0).astype('int') X_test = np.random.randn(1000, 8) Y_test = (self.X_test[:, 0] > 0).astype('int') model.fit(X_train, Y_train) model.score(X_test, Y_test)
def tfidf( name, analyzer=None, ngram_range=None, stop_words=None, lowercase=None, max_df=1.0, min_df=1, max_features=None, binary=None, norm=None, use_idf=False, smooth_idf=False, sublinear_tf=False, ): def _name(msg): return '%s.%s_%s' % (name, 'tfidf', msg) max_ngram = scope.int(hp.quniform(_name('max_ngram'), 1, 4, 1)) rval = scope.sklearn_Tfidf( stop_words=hp.choice(_name('stop_words'), ['english', None]) if analyzer is None else analyzer, lowercase=hp_bool(_name('lowercase'), ) if lowercase is None else lowercase, max_df=max_df, min_df=min_df, binary=hp_bool(_name('binary'), ) if binary is None else binary, ngram_range=(1, max_ngram) if ngram_range is None else ngram_range, norm=norm, use_idf=use_idf, smooth_idf=smooth_idf, sublinear_tf=sublinear_tf, ) return rval
def _svc_max_iter(name): return scope.patience_param( scope.int( hp.loguniform( name + '.max_iter', np.log(1e7), np.log(1e9))))
class KnnConf(ModelConf): param_space = { "n_neighbors": scope.int(hp.quniform("n_neighbors", 1, 50, 1)) } name = "kneibors_classifier" def instance(self, param): return KNeighborsClassifier(**param)
def colkmeans(name, n_clusters=None, init=None, n_init=None, max_iter=None, tol=None, precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1): rval = scope.sklearn_ColumnKMeans( n_clusters=scope.int( hp.qloguniform( name + '.n_clusters', low=np.log(1.51), high=np.log(19.5), q=1.0)) if n_clusters is None else n_clusters, init=hp.choice( name + '.init', ['k-means++', 'random'], ) if init is None else init, n_init=hp.choice( name + '.n_init', [1, 2, 10, 20], ) if n_init is None else n_init, max_iter=scope.int( hp.qlognormal( name + '.max_iter', np.log(300), np.log(10), q=1, )) if max_iter is None else max_iter, tol=hp.lognormal( name + '.tol', np.log(0.0001), np.log(10), ) if tol is None else tol, precompute_distances=precompute_distances, verbose=verbose, random_state=random_state, copy_x=copy_x, n_jobs=n_jobs, ) return rval
def random_forest(name, n_estimators=None, criterion=None, max_features=None, max_depth=None, min_samples_split=None, min_samples_leaf=None, bootstrap=None, oob_score=None, n_jobs=1, random_state=None, verbose=False): def _name(msg): return '%s.%s_%s' % (name, 'random_forest', msg) """ Out of bag estimation only available if bootstrap=True """ bootstrap_oob = hp.choice(_name('bootstrap_oob'), [(True, True), (True, False), (False, False)]) rval = scope.sklearn_RandomForestClassifier( n_estimators=scope.int(hp.quniform( _name('n_estimators'), 1, 50, 1)) if n_estimators is None else n_estimators, criterion=hp.choice( _name('criterion'), ['gini', 'entropy']) if criterion is None else criterion, max_features=hp.choice( _name('max_features'), ['sqrt', 'log2', None]) if max_features is None else max_features, max_depth=max_depth, min_samples_split=hp.quniform( _name('min_samples_split'), 1, 10, 1) if min_samples_split is None else min_samples_split, min_samples_leaf=hp.quniform( _name('min_samples_leaf'), 1, 5, 1) if min_samples_leaf is None else min_samples_leaf, bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap, oob_score=bootstrap_oob[1] if oob_score is None else oob_score, #bootstrap=hp.choice( # _name('bootstrap'), # [ True, False ] ) if bootstrap is None else bootstrap, #oob_score=hp.choice( # _name('oob_score'), # [ True, False ] ) if oob_score is None else oob_score, n_jobs=n_jobs, random_state=_random_state(_name('rstate'), random_state), verbose=verbose, ) return rval
def rbm(name, n_components=None, learning_rate=None, batch_size=None, n_iter=None, verbose=False, random_state=None): def _name(msg): return '%s.%s_%s' % (name, 'rbm', msg) rval = scope.sklearn_BernoulliRBM( n_components=scope.int( hp.qloguniform( name + '.n_components', low=np.log(0.51), high=np.log(999.5), q=1.0)) if n_components is None else n_components, learning_rate=hp.lognormal( name + '.learning_rate', np.log(0.01), np.log(10), ) if learning_rate is None else learning_rate, batch_size=scope.int( hp.qloguniform( name + '.batch_size', np.log(1), np.log(100), q=1, )) if batch_size is None else batch_size, n_iter=scope.int( hp.qloguniform( name + '.n_iter', np.log(1), np.log(1000), # -- max sweeps over the *whole* train set q=1, )) if n_iter is None else n_iter, verbose=verbose, random_state=_random_state(_name('rstate'), random_state), ) return rval
def knn_regression(name, sparse_data=False, n_neighbors=None, weights=None, leaf_size=None, metric=None, p=None, **kwargs): def _name(msg): return '%s.%s_%s' % (name, 'knn_regression', msg) if sparse_data: metric_args = { 'metric':'euclidean' } else: metric_args = hp.pchoice(_name('metric'), [ (0.05, { 'metric':'euclidean' }), (0.10, { 'metric':'manhattan' }), (0.10, { 'metric':'chebyshev' }), (0.10, { 'metric':'minkowski', 'p':scope.int(hp.quniform(_name('minkowski_p'), 1, 5, 1))}), #(0.05, { 'metric':'wminkowski', # 'p':scope.int(hp.quniform(_name('wminkowski_p'), 1, 5, 1)), # 'w':hp.uniform( _name('wminkowski_w'), 0, 100 ) }), ] ) rval = scope.sklearn_KNeighborsRegressor( n_neighbors=scope.int(hp.quniform( _name('n_neighbors'), 0.5, 50, 1)) if n_neighbors is None else n_neighbors, weights=hp.choice( _name('weights'), ['uniform', 'distance']) if weights is None else weights, leaf_size=scope.int(hp.quniform( _name('leaf_size'), 0.51, 100, 1)) if leaf_size is None else leaf_size, starstar_kwargs=metric_args ) return rval
def pca(name, n_components=None, whiten=None, copy=True): rval = scope.sklearn_PCA( # -- qloguniform is missing a "scale" parameter so we # lower the "high" parameter and multiply by 4 out front n_components=4 * scope.int( hp.qloguniform( name + '.n_components', low=np.log(0.51), high=np.log(30.5), q=1.0)) if n_components is None else n_components, whiten=hp_bool( name + '.whiten', ) if whiten is None else whiten, copy=copy, ) return rval
def extra_trees_regressor(name, n_estimators=None, criterion=None, max_features=None, max_depth=None, min_samples_split=None, min_samples_leaf=None, bootstrap=None, oob_score=None, n_jobs=1, random_state=None, verbose=False): def _name(msg): return '%s.%s_%s' % (name, 'extra_trees', msg) bootstrap_oob = hp.choice(_name('bootstrap_oob'), [(True, True), (True, False), (False, False)]) rval = scope.sklearn_ExtraTreesRegressor( n_estimators=scope.int(hp.quniform( _name('n_estimators'), 1, 50, 1)) if n_estimators is None else n_estimators, criterion=hp.choice( _name('criterion'), ['mse']) if criterion is None else criterion, max_features=hp.choice( _name('max_features'), ['auto', 'sqrt', 'log2', None]) if max_features is None else max_features, max_depth=max_depth, min_samples_split=hp.quniform( _name('min_samples_split'), 1, 10, 1) if min_samples_split is None else min_samples_split, min_samples_leaf=hp.quniform( _name('min_samples_leaf'), 1, 5, 1) if min_samples_leaf is None else min_samples_leaf, bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap, oob_score=bootstrap_oob[1] if oob_score is None else oob_score, n_jobs=n_jobs, random_state=_random_state(_name('rstate'), random_state), verbose=verbose, ) return rval
def tfidf(name, analyzer=None, ngram_range=None, stop_words=None, lowercase=None, max_df=1.0, min_df=1, max_features=None, binary=None, norm=None, use_idf=False, smooth_idf=False, sublinear_tf=False, ): def _name(msg): return '%s.%s_%s' % (name, 'tfidf', msg) max_ngram=scope.int( hp.quniform( _name('max_ngram'), 1, 4, 1 ) ) rval = scope.sklearn_Tfidf( stop_words=hp.choice( _name('stop_words'), [ 'english', None ] ) if analyzer is None else analyzer, lowercase=hp_bool( _name('lowercase'), ) if lowercase is None else lowercase, max_df=max_df, min_df=min_df, binary=hp_bool( _name('binary'), ) if binary is None else binary, ngram_range=(1,max_ngram) if ngram_range is None else ngram_range, norm=norm, use_idf=use_idf, smooth_idf=smooth_idf, sublinear_tf=sublinear_tf, ) return rval
def nystrom(name, n_components=None, kernel=None, max_components=np.Inf, copy=True): def _name(msg): return '%s.%s_%s' % (name, 'nystrom', msg) rval = scope.sklearn_Nystrom( n_components=4 * scope.int( hp.qloguniform( name + '.n_components', low=np.log(0.51), high=np.log(min(max_components / 4, 30.5)), q=1.0)) if n_components is None else n_components, kernel=hp.pchoice( _name('kernel'), [ (0.35, 'sigmoid'), (0.35, 'rbf'), (0.30, 'poly')]) if kernel is None else kernel, gamma=_svc_gamma('gamma'), coef0=hp.uniform(_name('coef0'), 0.0, 1.0) ) return rval
def knn(name, n_neighbors=None, weights=None, algorithm=None, leaf_size=None, metric=None, p=None, **kwargs): def _name(msg): return '%s.%s_%s' % (name, 'knn', msg) """ metric_arg = hp.choice( _name('metric'), [ ('euclidean', None, None, None ), ('manhattan', None, None, None ), ('chebyshev', None, None, None ), ('minkowski', hp.quniform(_name('minkowski_p'), 1, 5, 1 ), None, None), ('wminkowski', hp.quniform(_name('wminkowski_p'), 1, 5, 1 ), hp.uniform(_name('wminkowski_w'), 0, 100 ), None ), ('seuclidean', None, None, hp.uniform(_name('seuclidean_V'), 0, 100)), ('mahalanobis', None, None, hp.uniform(_name('mahalanobis_V'), 0, 100)), ]) """ """ metric_args = hp.choice(_name('metric'), [ { 'metric':'euclidean' }, { 'metric':'manhattan' }, { 'metric':'chebyshev' }, { 'metric':'minkowski', 'p':scope.int(hp.quniform(_name('minkowski_p'), 1, 5, 1))}, { 'metric':'wminkowski', 'p':scope.int(hp.quniform(_name('wminkowski_p'), 1, 5, 1)), 'w':hp.uniform( _name('wminkowski_w'), 0, 100 ) }, { 'metric':'seuclidean', 'V':hp.uniform( _name('seuclidean_V'), 0, 100 ) }, { 'metric':'mahalanobis', 'V':hp.uniform( _name('mahalanobis_V'), 0, 100 ) }, ] ) """ rval = scope.sklearn_KNeighborsClassifier( n_neighbors=scope.int(hp.quniform( _name('n_neighbors'), 0.5, 50, 1)) if n_neighbors is None else n_neighbors, weights=hp.choice( _name('weights'), ['uniform', 'distance']) if weights is None else weights, algorithm=hp.choice( _name('algorithm'), ['ball_tree', 'kd_tree', 'brute', 'auto']) if algorithm is None else algorithm, leaf_size=scope.int(hp.quniform( _name('leaf_size'), 0.51, 100, 1)) if leaf_size is None else leaf_size, #TODO: more metrics available ###metric_args, ##metric=metric_arg[0] if metric is None else metric, ##p=metric_arg[1], ##w=metric_arg[2], ##V=metric_arg[3], #metric=hp.choice( # _name('metric'), # [ 'euclidean', 'manhattan', 'chebyshev', # 'minkowski' ] ) if metric is None else metric, #p=hp.quniform( # _name('p'), # 1, 5, 1 ) if p is None else p, ) return rval
from hyperopt import hp from hyperopt.pyll import scope space = {'lrate': scope.int(hp.quniform('lrate', -0.50001, 10.49999, 1)), 'l2_reg': scope.int(hp.quniform('l2_reg', -0.50001, 5.49999, 1)), 'batchsize': scope.int(hp.quniform('batchsize', -0.50001, 7.49999, 1)), 'n_epochs': scope.int(hp.quniform('n_epochs', -0.50001, 9.49999, 1))}
def _knn_neighbors(name): return scope.int(hp.qloguniform(name, np.log(0.5), np.log(50.5), 1))
def _trees_n_estimators(name): return scope.int(hp.qloguniform(name, np.log(9.5), np.log(3000.5), 1))
def convnet_space_to_tpe(convnet_space): """ Convert a search space defined as ConvNetSearchSpace to the TPE format. returns: search space in the TPE format. """ assert(isinstance(convnet_space, ConvNetSearchSpace)) params = [] #params = {} params.append({"format": "tpe"}) preprocessing_params = convnet_space.get_preprocessing_parameter_subspace() params.append(subspace_to_tpe("preprocessing", preprocessing_params)) #add_to_dict(params, subspace_to_tpe("preprocessing", preprocessing_params)) network_params = convnet_space.get_network_parameter_subspace() if isinstance(network_params["num_conv_layers"], Parameter): assert network_params["num_conv_layers"].min_val == 0 if isinstance(network_params["num_fc_layers"], Parameter): assert network_params["num_fc_layers"].min_val == 1 #in hyperopt we will represent the number of conv layers as a choice object #that's why we can strip them here: #num_conv_layers = network_params.pop("num_conv_layers") #num_fc_layers = network_params.pop("num_fc_layers") network_param_subspace = subspace_to_tpe("network", network_params) params.append(network_param_subspace) #add_to_dict(params, network_param_subspace) #Convolutional layers: conv_layer_subspaces = [] for layer_id in range(1, convnet_space.max_conv_layers+1): conv_layer_params = convnet_space.get_conv_layer_subspace(layer_id) label = "conv-layer-%d" % (layer_id) conv_layer_subspace = subspace_to_tpe(label, conv_layer_params) conv_layer_subspaces.append(conv_layer_subspace) #to stay consistent with the fc layers we reverse the order, see below conv_layer_subspaces.reverse() conv_layers_combinations = get_stacked_layers_subspace(conv_layer_subspaces) # conv_layers_combinations.insert(0, []) #no conv layers # if isinstance(num_conv_layers, int): # #fixed number of layers # conv_layers_space = conv_layers_combinations[num_conv_layers] # else: # conv_layers_space = hp.choice('num_conv_layers', conv_layers_combinations) #Unfortunately scope.switch is not supported by the converter! conv_layers_space = scope.switch(scope.int(network_param_subspace["network/num_conv_layers"]), [],#no conv layers *conv_layers_combinations) params.append(conv_layers_space) #add_to_dict(params, {"conv-layers": conv_layers_space}) #Fully connected layers fc_layer_subspaces = [] for layer_id in range(1, convnet_space.max_fc_layers+1): fc_layer_params = convnet_space.get_fc_layer_subspace(layer_id) label = "fc-layer-%d" % (layer_id) fc_layer_subspace = subspace_to_tpe(label, fc_layer_params) fc_layer_subspaces.append(fc_layer_subspace) """ We always want the last layer to show up, because it has special parameters. [[fc3], [fc2, fc3], [fc1, fc2, fc3]] """ fc_layer_subspaces.reverse() fc_layers_combinations = get_stacked_layers_subspace(fc_layer_subspaces) # if isinstance(num_fc_layers, int): # #fixed number of layers # fc_layers_space = fc_layers_combinations[num_fc_layers] # else: # fc_layers_space = hp.choice("num_fc_layers", # fc_layers_combinations) fc_layers_space = scope.switch(scope.int(network_param_subspace["network/num_fc_layers"]), None,#no fc layers *fc_layers_combinations) params.append(fc_layers_space) #add_to_dict(params, {"fc-layers": fc_layers_space}) return params
def _trees_min_samples_leaf(name): return hp.choice(name, [ 1, # most common choice. scope.int(hp.qloguniform(name + '.gt1', np.log(1.5), np.log(50.5), 1)) ])
weight_norm_0 = hp.uniform("weight_norm_0", 0.25, 8) weight_norm_1 = hp.uniform("weight_norm_1", 0.25, 8) weight_norm_2 = hp.uniform("weight_norm_2", 0.25, 8) weight_norm_3 = hp.uniform("weight_norm_3", 0.25, 8) weight_norm_4 = hp.uniform("weight_norm_4", 0.25, 8) weight_norm_5 = hp.uniform("weight_norm_5", 0.25, 8) dropout_0 = hp.uniform("dropout_0", 0, 0.8) dropout_1 = hp.uniform("dropout_1", 0, 0.8) dropout_2 = hp.uniform("dropout_2", 0, 0.8) dropout_3 = hp.uniform("dropout_3", 0, 0.8) dropout_4 = hp.uniform("dropout_4", 0, 0.8) dropout_5 = hp.uniform("dropout_5", 0, 0.8) space = scope.switch( scope.int(depth), {"depth": 0, "log_base_epsilon_0": log_base_epsilon_0, "weight_norm_0": weight_norm_0, "dropout_0": dropout_0}, { "depth": 1, "log_base_epsilon_0": log_base_epsilon_0, "weight_norm_0": weight_norm_0, "dropout_0": dropout_0, "log_base_epsilon_1": log_base_epsilon_1, "weight_norm_1": weight_norm_1, "dropout_1": dropout_1, "num_units_1": num_units_1, }, { "depth": 2, "log_base_epsilon_0": log_base_epsilon_0, "weight_norm_0": weight_norm_0,
import copy from collections import OrderedDict import numpy as np try: from hyperopt.pyll import scope except ImportError: print 'Trying standalone pyll' from pyll import scope from hyperopt.pyll_utils import hp_uniform, hp_loguniform, hp_quniform, hp_qloguniform from hyperopt.pyll_utils import hp_normal, hp_lognormal, hp_qnormal, hp_qlognormal from hyperopt.pyll_utils import hp_choice num_filters1 = scope.int(hp_qloguniform('num_filters1',np.log(16), np.log(96), q=16)) filter1_size = scope.int(hp_quniform('filter1_shape', 2, 12, 1)) num_filters2 = scope.int(hp_qloguniform('num_filters2',np.log(16), np.log(96), q=16)) filter2_size = scope.int(hp_quniform('filter2_shape', 2, 12, 1)) num_filters3 = scope.int(hp_qloguniform('num_filters3',np.log(16), np.log(96), q=16)) filter3_size = scope.int(hp_quniform('filter3_shape', 2, 9, 1)) num_filters4 = scope.int(hp_qloguniform('num_filters4',np.log(16), np.log(64), q=16)) filter4_size = scope.int(hp_quniform('filter4_shape', 2, 9, 1)) pool1_sizex = scope.int(hp_quniform('pool1_sizex', 2, 5, 1)) pool1_type = hp_choice('pool1_type', ['max', 'avg', hp_uniform('pool_order_1', 1, 12)]) pool2_sizex = scope.int(hp_quniform('pool2_sizex', 2, 5, 1)) pool2_type = hp_choice('pool2_type', ['max', 'avg', hp_uniform('pool_order_2', 1, 4)])
def _boosting_n_estimators(name): return scope.int(hp.qloguniform(name, np.log(10.5), np.log(1000.5), 1))
'_factory': modelFactory, 'types' : ['phi', 'psi'], 'sincos': True}, {'_class': DihedralFeaturizer, '_factory': modelFactory, 'types': ['phi', 'psi', 'chi1'], 'sincos': True}, {'_class': DihedralFeaturizer, '_factory': modelFactory, 'types': ['phi', 'psi', 'chi1', 'chi2'], 'sincos': True}, ]), hp.choice('preprocessing', [ {'_class': PCA, '_factory': modelFactory, 'n_components': scope.int(hp.quniform('pca_n_components', 2, 20, 1)), 'copy': False}, {'_class': tICA, '_factory': modelFactory, 'n_components': scope.int(hp.quniform('tica_n_components', 2, 20, 1)), 'gamma': hp.choice('tica_gamma', [0, 1e-7, 1e-5, 1e-3, 1e-1]), 'weighted_transform': hp.choice('tica_weighted_transform', [True, False]) } ]), hp.choice('cluster', [ {'_class': MiniBatchKMeans, '_factory': modelFactory, 'n_clusters': scope.int(hp.quniform('kmeans_n_clusters', 10, 1000, 10)), 'batch_size': 10000, 'n_init': 1, },
import copy from collections import OrderedDict import numpy as np try: from hyperopt.pyll import scope except ImportError: print 'Trying standalone pyll' from pyll import scope from hyperopt.pyll_utils import hp_uniform, hp_loguniform, hp_quniform, hp_qloguniform from hyperopt.pyll_utils import hp_normal, hp_lognormal, hp_qnormal, hp_qlognormal from hyperopt.pyll_utils import hp_choice num_filters1 = scope.int(hp_quniform('num_filters1', 32, 128, 16)) filter1_size = scope.int(hp_quniform('filter1_shape', 5, 12, 1)) num_filters2 = scope.int(hp_quniform('num_filters2', 64, 400, 16)) filter2_size = scope.int(hp_quniform('filter2_shape', 4, 7, 1)) num_filters3 = scope.int(hp_quniform('num_filters3', 64, 400, 16)) filter3_size = scope.int(hp_quniform('filter3_shape', 3, 5, 1)) num_filters4 = scope.int(hp_quniform('num_filters4', 64, 400, 16)) filter4_size = scope.int(hp_quniform('filter4_shape', 3, 4, 1)) num_filters5 = scope.int(hp_quniform('num_filters5', 64, 400, 16)) filter5_size = scope.int(hp_quniform('filter5_shape', 2, 3, 1)) pool1_sizex = scope.int(hp_quniform('pool1_sizex', 2, 4, 1)) pool1_type = hp_choice('pool1_type', ['max', 'avg', hp_uniform('pool_order_1', 1, 4)])
from hyperopt import hp from hyperopt.pyll import scope space = {"lrate" : hp.uniform("lrate", 0, 10), "l2_reg" : hp.uniform("l2_reg", 0, 1), "batchsize" : scope.int(hp.quniform("batchsize", 20, 2000, 1)), "n_epochs" : scope.int(hp.quniform("n_epochs", 5, 2000, 1))}
from hyperopt import hp from hyperopt.pyll import scope pca = {'preprocessing': 'pca', 'pca:keep_variance': scope.int( hp.quniform('pca:keep_variance', 0, 1, 1))} penalty_and_loss = hp.choice('penalty_and_loss', [{'liblinear:penalty': 'l1', 'liblinear:loss': 'l2'}, {'liblinear:penalty': 'l2', 'liblinear:loss': 'l1'}, {'liblinear:penalty': 'l2', 'liblinear:loss': 'l2'}]) liblinear_LOG2_C = scope.int(hp.quniform('liblinear:LOG2_C', -5, 15, 1)) liblinear = {'classifier': 'liblinear', 'liblinear:penalty_and_loss': penalty_and_loss, 'liblinear:LOG2_C': liblinear_LOG2_C} libsvm_LOG2_C = scope.int(hp.quniform('libsvm_svc:LOG2_C', -5, 15, 1)) libsvm_LOG2_gamma = scope.int(hp.quniform('libsvm_svc:LOG2_gamma', -15, 3, 1)) libsvm_svc = {'classifier': 'libsvm_svc', 'libsvm_svc:LOG2_C': libsvm_LOG2_C, 'libsvm_svc:LOG2_gamma': libsvm_LOG2_gamma} criterion = hp.choice('random_forest:criterion', ['gini', 'entropy']) max_features = scope.int(hp.quniform('random_forest:max_features', 1, 10, 1)) min_samples_split = scope.int(hp.quniform('random_forest:min_samples_split', 0, 4, 1)) random_forest = {'classifier': 'random_forest', 'random_forest:criterion': criterion, 'random_forest:max_features': max_features, 'random_forest:min_samples_split': min_samples_split} preprocessors = {'None': 'None', 'pca': pca} classifiers = {'libsvm_svc': libsvm_svc, 'liblinear': liblinear, 'random_forest': random_forest} space = {'classifier': hp.choice('classifier', classifiers.values()), 'preprocessing': hp.choice('preprocessing', preprocessors.values())}
def quniform_int(label, *args, **kwargs): return scope.int( scope.hyperopt_param(label, scope.quniform(*args, **kwargs)))