Пример #1
0
def test_skopt_rf_et():
    try:
        import skopt
        import pandas as pd
    except:
        print("Skipping test_skopt_rf_et!")
        return
    
    # Define an objective function for skopt to optimise.
    def objective_function(x):
        return x[0]**2 - x[1]**2 + x[1]*x[0]

    # Uneven bounds to prevent "objective has been evaluated" warnings.
    problem_bounds = [(-1e6, 3e6), (-1e6, 3e6)]

    # Don't worry about "objective has been evaluated" warnings.
    result_et = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "ET")
    result_rf = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "RF")

    et_df = pd.DataFrame(result_et.x_iters, columns = ["X0", "X1"])

    # Explain the model's predictions.
    explainer_et = shap.TreeExplainer(result_et.models[-1], et_df)
    shap_values_et = explainer_et.shap_values(et_df)

    rf_df = pd.DataFrame(result_rf.x_iters, columns = ["X0", "X1"])

    # Explain the model's predictions (Random forest).
    explainer_rf = shap.TreeExplainer(result_rf.models[-1], rf_df)
    shap_values_rf = explainer_rf.shap_values(rf_df)

    assert np.allclose(shap_values_et.sum(1) + explainer_et.expected_value, result_et.models[-1].predict(et_df))
    assert np.allclose(shap_values_rf.sum(1) + explainer_rf.expected_value, result_rf.models[-1].predict(rf_df))
Пример #2
0
def test_forest_minimize_api(base_estimator):
    # invalid string value
    with pytest.raises(ValueError):
        forest_minimize(lambda x: 0., [], base_estimator='abc')

    # not a string nor a regressor
    with pytest.raises(ValueError):
        forest_minimize(lambda x: 0., [], base_estimator=base_estimator)
Пример #3
0
def opt_sim_ensemble():
    global sim_ensemble, ev
    space = [
        Real(0.0, 1.0),  # CBF
        Real(0.0, 1.0),  # IBF
        Real(0.0, 1.0),  # CSLIM
    ]
    x0 = [1, 0, 0]
    x1 = [0, 1, 0]
    x2 = [0, 0, 1]
    x0s = [x0, x1, x2]
    # get the current fold
    ds = Dataset(load_tags=True, filter_tag=True)
    ds.set_track_attr_weights(1, 0.9, 0.2, 0.2, 0.2)
    ds.set_playlist_attr_weights(0.5, 0.5, 0.5, 0.05, 0.05)
    ev = Evaluator()
    ev.cross_validation(4, ds.train_final.copy())
    urm, tg_tracks, tg_playlist = ev.get_fold(ds)
    sim_ensemble.fit(urm, list(tg_playlist), list(tg_tracks), ds)
    res = forest_minimize(sim_objective,
                          space,
                          x0=x0s,
                          verbose=True,
                          n_random_starts=20,
                          n_calls=200,
                          n_jobs=-1,
                          callback=result)
    print('Maximimum p@k found: {:6.5f}'.format(-res.fun))
    print('Optimal parameters:')
    params = ['CBF', 'IBF', 'CSLIM']
    for (p, x_) in zip(params, res.x):
        print('{}: {}'.format(p, x_))
Пример #4
0
def main():
    
    # find optimal learning rate and rank
    bounds = [(10**-4, 1.0, 'log-uniform'),(10**-6, 10**-1, 'log-uniform')]
    opt_params = forest_minimize(optimal, bounds, verbose=True)
    opt_lr,opt_rank=opt_params.x[0],opt_params.x[1]

    # times and precisions for 3 data sets with 
    data_1_train,_,data_1_test = read_train_data("0.1_percent")
    data_2_train,_, data_2_test = read_train_data("0.5_percent")
    data_3_train,_, data_3_test = read_train_data("1_percent")
    
    dataset= [[data_1_train,data_1_test],[data_2_train,data_2_test],[data_3_train,data_3_test]]
    times =[]
    precisions=[]
    for data in dataset:
        train,test = sparse_train_test(data[0],data[1])
        start = time.time()
        model = LightFM(loss='warp', learning_rate=opt_lr,no_components=opt_rank)
        model.fit(train, epochs=10, verbose=True)
        precision = precision_at_k(model, test, k=50).mean()
        sec = (time.time() - start)/60
        print("TIME:",sec,"PREC:", precision)
        times.append(sec)
        precisions.append(np.mean(precision))
    
    print("times:", times, "precisions", precisions)
Пример #5
0
    def __optimize_n_clusters__(self):
        max_iter = self.cluster_max - self.cluster_min
        ntests = int(0.5 * (self.cluster_max - self.cluster_min))

        if self.method == "exhaustive" or max_iter < 30:
            # No-Optimize Full Test
            result = self.__cluster_metric__(self.cluster_min)
            best = self.cluster_min
            for k_val in range(self.cluster_min, self.cluster_max):
                run_result = self.__cluster_metric__([k_val])
                if run_result < result:
                    result = run_result
                    best = k_val
            return result, best, self.cluster_max - self.cluster_min
        
        elif self.method == "gprocess":
            # Gaussian Opt.
            # gp_minimize is a gaussian implementation similar to sklearn GridSearch
            res = gp_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests)
            # res.fun #score
            # res.func_vals #all tested scores
            return res.fun, res.x[0], res.x_iters

        elif self.method == "dtree":
            # Decision Tree Opt.
            res = forest_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], base_estimator='RF', n_calls=ntests)
            # res.fun #score
            # res.func_vals #all tested scores
            return res.fun, res.x[0], res.x_iters

        elif self.method == "dummy":
            # Random Opt.
            res = dummy_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests)
            return res.fun, res.x[0], res.x_iters
Пример #6
0
 def optimize_forest(self, train_df, 
                    test_df, 
                    verbose=True, 
                    random_state=42, 
                    n_calls=50,
                    objective='rank'):
     """
     Finds optimal parameters using the skopt.forrest_minimize function
     
     Attributes:
         train_df   : dataset for training
         test_df    : test dataset
         n_calls    : maximum number of training the model
         objective  : the metric to be minimized
         
     """
     self.__train_df = train_df
     self.__test_df = test_df
     
     if objective == 'rank':
         objective_func = self.__objective_rank
     elif objective == 'recall':
         objective_func = self.__objective_recall
     elif objective == 'precision':
         objective_func = self.__objective_precision
         
     self.__optimized_params = forest_minimize(objective_func,
                                               self.__space,
                                               n_calls=n_calls,
                                               verbose=verbose,)
                                               #random_state=random_state
     print("_________OPTIMIZATION FINISHED_________")
     print('optimal parameters:')
     for name, value in zip(self.params, self.__optimized_params.x):
         print(name, '=', value)
 def optimize_all_xgb_params(self):
     res = forest_minimize(
         self.cv_test_comb_xgb_params,
         [
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             (0.015, 0.035),
             (3, 8),
             (0.6, 0.9),
             (0.5, 1),
             (0, 0.5),
             #(0.015, 0.035), (3, 8), (0.6, 0.9), (0.5, 1), (0, 0.5),
             #(0.015, 0.035), (3, 8), (0.6, 0.9), (0.5, 1), (0, 0.5)
         ],
         x0=[
             0.0245, 5, 0.8, 1, 0, 0.03, 5, 0.8, 0.8, 0.4, 0.03, 5, 0.8, 1,
             0, 0.037, 5, 0.8, 0.8, 0.4, 0.033, 6, 0.8, 1, 0
         ],
         y0=0.05323593162735551,
         n_calls=75)
     dump(res, "xgb_all_opt.gz")
     print(res.x)
     print(res.fun)
     print("")
Пример #8
0
    def run_session(self):
        super(SKOptSession, self)._write_headers(path=self.RESULTS_PATH)

        for planner in self.planners:
            params_set = dict(self.planner_config[planner].items())
            search_space = self._load_search_space(params_set)

            self.n_trial = 0  # Reset to n_trials to zero for each planner
            self.planner = planner  # Keeping track of current planner
            self.start_time = timer()  # Keeping track of start_time
            if (self.MAX_RUNTIME != 'None'):
                self.end_time = self.start_time + self.MAX_RUNTIME  # Keeping track of end_time
                rospy.loginfo('Executing %s on %s for %d secs', self.MODE,
                              planner, self.MAX_RUNTIME)
            else:
                rospy.loginfo('Executing %s on %s for %d trials', self.MODE,
                              planner, self.MAX_TRIALS)

            if self.MODE == 'gp':
                result = gp_minimize(self._skopt_obj,
                                     search_space,
                                     n_calls=self.MAX_TRIALS,
                                     random_state=0,
                                     acq_func='gp_hedge')
                # gp_hedge means probabilistically choose betwn LCB, EI and PI acquisition functions at every iteration
            elif self.MODE == 'rf':
                result = forest_minimize(self._skopt_obj,
                                         search_space,
                                         n_calls=self.MAX_TRIALS,
                                         random_state=0,
                                         base_estimator='RF',
                                         acq_func='EI')
            elif self.MODE == 'et':
                result = forest_minimize(self._skopt_obj,
                                         search_space,
                                         n_calls=self.MAX_TRIALS,
                                         random_state=0,
                                         base_estimator='ET',
                                         acq_func='EI')
            elif self.MODE == 'gbrt':
                result = gbrt_minimize(self._skopt_obj,
                                       search_space,
                                       n_calls=self.MAX_TRIALS,
                                       random_state=0,
                                       acq_func='EI')

        rospy.loginfo('Saved results to %s\n', self.RESULTS_PATH)
Пример #9
0
def run(args):
    # Create base serialization dir
    if not os.path.exists(args.serialization_dir):
        os.makedirs(args.serialization_dir)

    # Read in search configuration and create the blackbox function to optimize
    f, dimensions, x0, trial_paths, delete_worse_files_cb = setup(args)
    n_random_starts = max(1,args.n_random_starts) if x0 is None else args.n_random_starts
    callback = None if args.no_delete_worse else delete_worse_files_cb

    # Run the actual optimization
    if args.mode == 'gp':
        results = skopt.gp_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            n_random_starts=n_random_starts,
            random_state=args.random_seed,
            verbose=True,
            acq_optimizer='sampling',
            xi=args.xi,
            kappa=args.kappa,
            callback=callback,
        )
    elif args.mode == 'random':
        results = skopt.dummy_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            random_state=args.random_seed,
            verbose=True,
            callback=callback,
        )

    elif args.mode == 'tree':
        results = skopt.forest_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            n_random_starts=n_random_starts,
            random_state=args.random_seed,
            verbose=True,
            xi=args.xi,
            kappa=args.kappa,
            callback=callback,
        )


    # Maybe evaluate the best model on the test dataset
    if args.evaluate_on_test:
        logger.info('EVALUATE ON TEST')
        evaluate_on_test(args, results, trial_paths)

    # Save a bunch of visualizations of the search process
    logger.info('PLOTTING RESULTS')
    plot_results(args.serialization_dir, results)

    logger.info('ALL DONE')
Пример #10
0
def find_best_hyperparameters(model, X, y, dynamic_params_space, scoring, plot, nfold, **HPO_params):
    
    # filter these warnings - they are not consistent, arise even for float features
    from warnings import filterwarnings
    # simplefilter("ignore", UserWarning)
    filterwarnings("ignore", message="The objective has been evaluated at this point before", category=UserWarning)
  
    # Get model name
    model_name = model.__class__.__name__
    
    # Get dynamic parameters names: 
    @use_named_args(dynamic_params_space)
    def get_params_names(**dynamic_params):
        return list(dynamic_params.keys())    
    param_names = get_params_names(dynamic_params_space)
        
    # Define an objective function
    @use_named_args(dynamic_params_space)
    def objective(**dynamic_params):
        #model.set_params(**static_params)
        model.set_params(**dynamic_params) 
        cv = StratifiedKFold(n_splits=nfold, random_state=seed, shuffle=True)
        scores = cross_validate(model, X, y, cv=cv, scoring = scoring, n_jobs=-1)
        val_score = np.mean(scores['test_score'])
        return -val_score
    
    print(model_name, 'model training...')
    # Load previously trained results and get starting point (x0) as best model from previous run
    try:
        res = load(r'output/models/'+model_name)
        x0 = res.x       
    # If not trained before -> no initial point provided
    except:
        x0 = None
    
    res = forest_minimize(objective, dynamic_params_space, x0 = x0, **HPO_params)
    
    # add attribute - parameters names to the res
    res.param_names = param_names

    print('Optimized parameters:    ', res.param_names)
    print('Previous best parameters:', x0)
    print('Current  best parameters:', res.x)
    print('Best score:', -res.fun)
    
    # Saved optimization result  
    dump(res, r'output/models/'+model_name, store_objective=False)
        
    if plot == True:
        plt.figure(figsize=(5,2))
        plot_convergence(res)
        try:
            # plot_objective would not work if only one parameter was searched for
            plot_objective(res)
        except:
            pass
    plt.show()
def main():
    print('loading data')
    train_features_path = os.path.join(
        FEATURES_DATA_PATH, 'train_features_' + FEATURE_NAME + '.csv')

    print('... train')
    train = pd.read_csv(train_features_path, nrows=TRAINING_PARAMS['nrows'])

    idx_split = int(
        (1 - VALIDATION_PARAMS['validation_fraction']) * len(train))
    train, valid = train[:idx_split], train[idx_split:]

    train = sample_negative_class(
        train,
        fraction=TRAINING_PARAMS['negative_sample_fraction'],
        seed=TRAINING_PARAMS['negative_sample_seed'])

    @skopt.utils.use_named_args(SPACE)
    def objective(**params):
        model_params = {**params, **STATIC_PARAMS}
        valid_preds = fit_predict(train,
                                  valid,
                                  None,
                                  model_params,
                                  TRAINING_PARAMS,
                                  fine_tuning=True)
        valid_auc = roc_auc_score(valid['isFraud'], valid_preds)
        return -1.0 * valid_auc

    experiment_params = {
        **STATIC_PARAMS,
        **TRAINING_PARAMS,
        **HPO_PARAMS,
    }

    with neptune.create_experiment(name='skopt forest sweep',
                                   params=experiment_params,
                                   tags=['skopt', 'forest', 'tune'],
                                   upload_source_files=get_filepaths()):
        print('logging data version')
        log_data_version(train_features_path, prefix='train_features_')

        results = skopt.forest_minimize(objective,
                                        SPACE,
                                        callback=[sk_utils.NeptuneMonitor()],
                                        **HPO_PARAMS)
        best_auc = -1.0 * results.fun
        best_params = results.x

        neptune.send_metric('valid_auc', best_auc)
        neptune.set_property('best_parameters', str(best_params))

        sk_utils.send_best_parameters(results)
        sk_utils.send_plot_convergence(results, channel_name='diagnostics_hpo')
        sk_utils.send_plot_evaluations(results, channel_name='diagnostics_hpo')
        sk_utils.send_plot_objective(results, channel_name='diagnostics_hpo')
Пример #12
0
def optimizeLGBM(space):

    result = forest_minimize(LGBMTargetFunction,
                             space,
                             random_state=160745,
                             n_random_starts=20,
                             n_calls=50,
                             verbose=1)

    return (result.x, result.fun)
Пример #13
0
def tune_hyperparams(model, space, X, y):
    """Tune hyper-parameters of a given model.
    
    Use mean cross-validation accuracy score to evaluate model.
    
    Parameters
    ----------
    model : instance
        a sklearn classifer
        
    space : list of skopt space
        a search space of model hyper-parameters
    
    X : pandas dataframe or numpy.ndarray
        features used for tuning
        
    y : numpy array
        target used for tuning
        
    Returns
    -------
    best_params : dict
        a dictionary containing best parameters
        
    best_score : float
        best mean cross validation accuracy score 
        
        
    Examples
    --------
    >>> from sklearn.datasets import load_breast_cancer
    >>> from sklearn.ensemble import RandomForestClassifier
    >>> from skopt.space import Real, Integer, Categorical
    >>> from src.hyperparams_tuning import tune_hyperparams
    >>> X, y = load_breast_cancer(return_X_y=True)
    >>> model = RandomForestClassifier(random_state=0)
    >>> space = [Integer(2, 20, name='max_depth'),
    ...          Integer(2, 20, name='max_leaf_nodes')]
    >>> best_params, best_score = tune_hyperparams(model, space, X, y)
    """
    @use_named_args(space)
    def objective(**params):
        model.set_params(**params)
        return -np.mean(cross_val_score(model, X, y, scoring='accuracy', cv=5))

    results = forest_minimize(objective, space, n_calls=10, random_state=0)

    best_params = dict()
    for i in range(len(space)):
        best_params[space[i].name] = results.x[i]

    best_score = -results.fun
    #plot_convergence(results)
    #plt.show()
    return best_params, best_score
Пример #14
0
def _suggest_x(dims, x0, y0, random_start, random_state, opts):
    res = skopt.forest_minimize(lambda *args: 0,
                                dims,
                                n_calls=1,
                                n_random_starts=1 if random_start else 0,
                                x0=x0,
                                y0=y0,
                                random_state=random_state,
                                kappa=opts["kappa"],
                                xi=opts["xi"])
    return res.x_iters[-1], res.random_state
Пример #15
0
    def run_minimize(self):

        params = forest_minimize(self.define_params_nn,
                                 dimensions=space_params,
                                 n_calls=ncalls,
                                 verbose=True,
                                 random_state=random_state)

        Train.write_best_params(params)
        self.history_df.to_csv(path_to_hist, index=False)
        print('Best params are : {}'.format(params))
Пример #16
0
def test_categorical_integer():
    def f(params):
        return 0

    dims = [[1]]
    res = forest_minimize(f,
                          dims,
                          n_calls=1,
                          random_state=1,
                          n_random_starts=1)
    assert res.x_iters[0][0] == dims[0][0]
Пример #17
0
    def run_minimize(self, args=None):
        from helper import write_best_params

        space = space_params_fit_gen
        params = forest_minimize(self.define_params_nn,
                                 dimensions=space,
                                 n_calls=ncalls,
                                 verbose=True,
                                 random_state=seed)

        write_best_params(params)
        print('Best params are : {}'.format(params))
Пример #18
0
 def minimize(self,
              space,
              ncalls,
              minimize_seed,
              path_params='best_params.json'):
     exp_name = self.exp_name + '_{}'.format(datetime.datetime.now())
     mlflow.create_experiment(exp_name)
     mlflow.set_experiment(exp_name)
     best_params = forest_minimize(self.objective,
                                   space,
                                   n_calls=ncalls,
                                   random_state=minimize_seed)['x']
     save_params(best_params, path_params=path_params)
Пример #19
0
 def minimize(self, ncalls=10, seed=2):
     self.rel_cls = Related(model_path=self.model_path,
                            path_to_alg=self.path_to_alg)
     best_params = forest_minimize(self.objective,
                                   dimensions=self.space,
                                   n_calls=ncalls,
                                   verbose=1,
                                   random_state=seed)['x']
     weights = dict(list(zip(self.fields, best_params)))
     self.rel_cls.create_related(
         path_to_actual_data=self.path_to_overall_data,
         weights_specific=weights)
     print('Generated best recommendations')
 def optimize_xgb_comb_params(self):
     res = forest_minimize(
         self.cv_test_combined_models_with_xgb_comb_params, [(0.015, 0.035),
                                                             (3, 6),
                                                             (0.6, 0.9),
                                                             (0.5, 1),
                                                             (0, 0.5)],
         x0=[0.03, 5, 0.8, 0.8, 0.4],
         y0=0.053262479044449806,
         n_calls=10)
     dump(res, "xgb_com_opt.gz")
     print(res.x)
     print(res.fun)
     print("")
Пример #21
0
def _init_trial(trial, state):
    import skopt
    random_starts, x0, y0, dims = state.minimize_inputs(trial.run_id)
    res = skopt.forest_minimize(lambda *args: 0,
                                dims,
                                n_calls=1,
                                n_random_starts=random_starts,
                                x0=x0,
                                y0=y0,
                                random_state=state.random_state,
                                kappa=state.batch_flags["kappa"],
                                xi=state.batch_flags["xi"])
    state.random_state = res.random_state
    return skopt_util.trial_flags(state.flag_names, res.x_iters[-1])
Пример #22
0
def learn_hyperparams():
    # define the space
    space = [(2, 20),  # epochs
             (10**-3, 1.0, 'log-uniform'),  # learning_rate
             (100, 1000),  # no_components
             (10**-9, 10**-3, 'log-uniform'),  # item_alpha
             ]
    best_result = forest_minimize(objective, space, n_calls=100,
                                  random_state=0,
                                  verbose=True)
    print('Maximimum p@k found: {:6.5f}'.format(-best_result.fun))
    print('Optimal parameters:')
    params = ['epochs', 'learning_rate', 'no_components', 'item_alpha']
    for (p, x_) in zip(params, best_result.x):
        print('{}: {}'.format(p, x_))
Пример #23
0
            def tune(default_parameters=None):
                if default_parameters is None:
                    default_parameters = {}

                def train_evaluate(search_params):
                    global CALLS
                    # start_func_time = time.time()

                    parameters = {**default_parameters, **search_params}
                    print(parameters)
                    CALLS += 1
                    print('Call', CALLS)

                    PLANNER.update_parameters(parameters)
                    result = PLANNER.compute(START_END,
                                             startDate=DEPART,
                                             current=CURRENT,
                                             recompute=True)

                    # end_func_time = time.time() - start_func_time
                    avgFitList = [
                        subLog.chapters["fitness"].select("avg")
                        for log in result['logs'] for subLog in log
                    ]
                    avgFit = np.sum(np.sum(avgFitList, axis=0), axis=0)
                    # score = np.append(end_func_time, avgFit)
                    weights = np.array([1, 1 / 100])
                    weightedSum = np.dot(avgFit, weights)

                    return weightedSum

                @skopt.utils.use_named_args(SPACE)
                def objective(**params):
                    return train_evaluate(params)

                checkpoint_saver = CheckpointSaver("./checkpoint.pkl",
                                                   compress=9,
                                                   store_objective=False)
                res = skopt.forest_minimize(objective,
                                            SPACE,
                                            n_calls=N_CALLs,
                                            n_random_starts=N_POINTS,
                                            callback=[checkpoint_saver])
                return res
    def _fit_single_classifier(self, id, X, y, **kwargs):
        """
        DO NOT USE DIRECTLY. Use fit instead. It performs the fitting of a single model.
        :param id: The id of the model to be tested. The method fit handles this.
        :param X: numpy array with the features. Each feature is a column.
        :param y: numpy array with the labels.
        :param kwargs: The parameters of forest_minimize from scikit-optimize
        :return: The
        """

        if self.objective is None:
            self.objective = self._objective

        self._model = self._models[id]
        self.history = self.all_history[id]

        objective = lambda p: self.objective(p, X, y)

        self.names = list(self.space[id].keys())
        res_fm = forest_minimize(objective,
                                 list(self.space[id].values()),
                                 n_calls=self.num_iter,
                                 random_state=self.random_state,
                                 verbose=self.verbose,
                                 **kwargs)

        tr_err, val_err, times = self.objective(res_fm.x,
                                                X,
                                                y,
                                                return_all=True)
        self.outputs['classifiers'].append({
            'training':
            tr_err,  # {'mean':np.mean(tr_err), 'std':np.std(tr_err), 'n':len(tr_err), 'samples':tr_err},
            'validation': val_err,
            # {'mean':np.mean(val_err), 'std':np.std(val_err), 'n':len(val_err), 'samples':val_err},
            'time':
            times,  # {'mean': np.mean(times), 'std': np.std(times), 'n':len(times), 'samples':times},
            'best_parameters':
            {key: val
             for key, val in zip(self.space[id], res_fm.x)}
        })

        self.training_error = self.score(X, y)
Пример #25
0
def main():
    space = [
        (1, 5),  # alfa
        (1e-7, 1e-3, 'log-uniform'),  # l1
        (1e-6, 1e-2),  # l2
    ]
    # best individual
    # 2.598883982624128, 1e-05, 3.8223372852050046e-05
    x0 = [2.59, 1e-05, 3.822 * 1e-05]
    res = forest_minimize(objective,
                          space,
                          verbose=True,
                          x0=x0,
                          n_calls=10,
                          y0=-0.1126)
    print('Maximimum p@k found: {:6.5f}'.format(-res.fun))
    print('Optimal parameters:')
    params = ['alfa', 'l1', 'l2']
    for (p, x_) in zip(params, res.x):
        print('{}: {}'.format(p, x_))
Пример #26
0
def cluster_ensemble():
    """
    Ensemble in which we have different weights for each cluster of users
    """
    n_cluster = 20
    space = []
    for i in range(n_cluster):
        space.append((0.0, 1.0))

    res = forest_minimize(fit_cluster,
                          space,
                          verbose=True,
                          n_calls=1000,
                          n_jobs=-1,
                          callback=result)
    print('Maximimum p@k found: {:6.5f}'.format(-res.fun))
    print('Optimal parameters:')
    params = ['1', '2', '3']
    for (p, x_) in zip(params, res.x):
        print('{}: {}'.format(p, x_))
Пример #27
0
def batch_size_optim_objective(batch_size, dset_path):
    """

    :param batch_size:
    :return:
    """
    # Define the list of hyper-param to optimise
    space = [Integer(4, 50),  # Number of hidden dimensions
             Integer(1, 3),  # Number of layers
             Integer(0, 1),  # SGD==0 ADAM==1
             Real(3.9e-4, 0.154), # Learning Rate
             Real(0, 9.9)]  # Regularisation strength

    datasets = get_datasets(dset_path)
    loaders = get_loaders(datasets, b_size=batch_size)
    train_loader = loaders['train']
    valid_loader = loaders['test']
    inp_shape = loaders['train'].dataset.x.shape[1]

    def objective(params):

        hidden_dim = params[0]
        num_layers = params[1]
        optimizer = 'SGD' if params[2]==0 else 'ADAM'
        lr = params[3]
        wd = params[4]

        model = ToyModel(inp_shape, hidden_dim, num_layers)
        model.cuda()
        out = fit_toy_model(100, model, train_loader, valid_loader,
                      patience=2000, patience_increase=2,
                      optimizer_type=optimizer,
                      lr=lr,
                      weight_decay=wd)
        return -out['ci_test']

    res_min = forest_minimize(objective, space, n_calls=15, random_state=0)

    best_param = res_min.x

    return res_min, best_param
Пример #28
0
 def tune_param_func(self, interactions, space, test_percentage = 0.25, random_state = 2020, n_calls = 10, loss = "warp"):
     from lightfm.evaluation import auc_score
     import numpy as np
     from lightfm.cross_validation import random_train_test_split
     from skopt import forest_minimize
     from lightfm import LightFM
     
     def objective(params):
         # unpack
         epochs, learning_rate, no_components = params
     
         model = LightFM(loss=loss,
                         random_state=random_state,
                         learning_rate=learning_rate,
                         no_components=no_components)
         model.fit(train, epochs=epochs,
                   num_threads=4, verbose=True)
     
         patks = auc_score(model, test, num_threads=4)
         maptk = np.mean(patks)
         # Make negative because we want to _minimize_ objective
         out = -maptk
         # Handle some weird numerical shit going on
         if np.abs(out + 1) < 0.01 or out < -1.0:
             return 0.0
         else:
             return out
     
     train, test = random_train_test_split(interactions, test_percentage=test_percentage, random_state=None)
     
     res_fm = forest_minimize(objective, space, n_calls=n_calls,
                          random_state=random_state ,
                          verbose=False)
     max_auc = -res_fm.fun
     
     params = ['epochs', 'learning_rate', 'no_components']
     params_list = []
     for (p, x_) in zip(params, res_fm.x):
         params_list.append((p, x_))
         
     return max_auc, params_list
Пример #29
0
    def hyperparametertuning(self):

        space = [
            (50, 100),  # epochs
            (10**-4, 0.2, 'log-uniform'),  # learning_rate
            (5, 35),  # no_components
            (10**-6, 10**-1, 'log-uniform'),  # alpha
        ]

        res_fm = forest_minimize(self.objective,
                                 space,
                                 n_calls=20,
                                 random_state=0,
                                 verbose=True)

        paramsdict = {}
        params = ['epochs', 'learning_rate', 'no_components', 'alpha']
        for (p, x_) in zip(params, res_fm.x):
            paramsdict[p] = x_

        joblib.dump(paramsdict, 'models/hyperparams.pkl')
Пример #30
0
def main():
    global args
    args = parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Data loading
    loaders = XRayLoaders(data_dir=args.data, batch_size=args.batch_size)
    global train_loader, val_loader
    train_loader = loaders.train_loader(imagetxt=args.traintxt)
    val_loader = loaders.val_loader(imagetxt=args.valtxt)

    global criterion
    criterion = nn.BCELoss(size_average=True)
    if args.cuda:
        criterion.cuda()

    space = [(16,32), (2,6), (2,6), (2,6), (2,6), (1,4)]
    res_rf = forest_minimize(objective, space, n_calls=15, random_state=0, verbose=True)
    dump(res_rf, 'optim_rf05202018')