Exemplo n.º 1
0
def main():
    selector = GroupCombinationExplorer(
        base_cols=["v1", "v2"],
        list_groups={
            "group1": ["v3", "v4"],
            "group2": ["v5"],
        },
    )

    df = pd.DataFrame({
        "v1": [
            1,
            1,
            1,
            1,
        ],
        "v2": [
            2,
            2,
            2,
            2,
        ],
        "v3": [
            2,
            2,
            2,
            2,
        ],
        "v4": [
            2,
            2,
            2,
            2,
        ],
        "v5": [
            2,
            2,
            2,
            2,
        ],
        "target": [
            0,
            0,
            0,
            1,
        ],
    })

    # Optimize with Optuna!
    study = optuna.create_study(direction="maximize",
                                sampler=GridSampler(
                                    selector.get_search_space()))
    study.optimize(partial(objective, df, selector),
                   n_trials=selector.gridsearch_space_size())

    # Load from the best trial
    selector.from_trial(study.best_trial)
    print("Selected cols:", selector.get_selected_cols())
Exemplo n.º 2
0
def start_tuning(train_data, valid_data, model_path:str ,param_path:str, sampler='TPE'):
    if(sampler == 'TPE'):
        print('selecting tpe sampler')
        study = optuna.create_study(direction="maximize", sampler=TPESampler())
        study.optimize(lambda trial: objective(train_data, valid_data,model_path, trial), n_trials=30)
    elif(sampler == 'Grid'):
        print('selecting grid search sampler')
        #search_space = {"lrmain": [5e-5, 3e-5, 2e-5], "drop_out": [0.1]}
        search_space = {"lrmain": [5e-5, 4e-5, 3e-5, 2e-5], "drop_out": [0.0, 0.1, 0.2, 0.3]}
        study = optuna.create_study(direction="maximize", sampler=GridSampler(search_space))
        study.optimize(lambda trial:objective(train_data, valid_data, model_path, trial), n_trials=4 * 4 )
    elif(sampler == 'Grid_with_two_lr'):
        print('selecting grid search sampler 2lr')
        search_space = {"lrmain": [5e-5, 4e-5, 3e-5, 2e-5],'lrclassifier': [1e-3, 1e-2, 1e-1], "drop_out": [0.0, 0.1,0.2,0.3]}
        study = optuna.create_study(direction="maximize", sampler=GridSampler(search_space))
        study.optimize(lambda trial: objective(train_data, valid_data, model_path, trial), n_trials=4 * 3 * 4)

    best_params = study.best_params
    save_json(best_params, param_path)
    return best_params, study.best_trial
Exemplo n.º 3
0
 def __init__(self,
     engine,
     mode='fast_ai',
     epochs_warmup=2,
     max_lr=0.03,
     min_lr=4e-3,
     step=0.001,
     num_epochs=3,
     path_to_savefig='',
     seed = 5,
     stop_callback=None,
     smooth_f=0.01,
     n_trials=30,
     **kwargs) -> None:
     r"""A  pipeline for learning rate search.
     Args:
         mode (str, optional): mode for learning rate finder, "fast_ai", "grid_search", "TPE".
             Default is "fast_ai".
         max_lr (float): upper bound for leaning rate
         min_lr (float): lower bound for leaning rate
         step (float, optional): number of step for learning rate searching space. Default is 1e-3
         num_epochs (int, optional): number of epochs to train for each learning rate. Default is 3
         pretrained (bool): whether or not the model is pretrained
         path_to_savefig (str): if path given save plot loss/lr (only for fast_ai mode). Default: ''
     """
     self.engine = engine
     main_model_name = engine.get_model_names(None)[0]
     self.model = engine.models[main_model_name]
     self.optimizer = engine.optims[main_model_name]
     self.model_device = next(self.model.parameters()).device
     self.mode = mode
     self.min_lr = min_lr
     self.max_lr = max_lr
     self.step = step
     self.n_trials = n_trials
     self.num_epochs = num_epochs
     self.path_to_savefig = path_to_savefig
     self.seed = seed
     self.stop_callback = stop_callback
     self.epochs_warmup = epochs_warmup
     self.enable_sam = engine.enable_sam
     self.smooth_f = smooth_f
     self.engine_cfg = Dict(min_lr=min_lr, max_lr=max_lr, mode=mode, step=step)
     search_space = np.arange(min_lr, max_lr, step)
     self.samplers = {'grid_search': GridSampler(search_space={'lr': search_space}),
                         'TPE': TPESampler(n_startup_trials=5, seed=True)}
Exemplo n.º 4
0
    def __init__(self, argument, grid_search_space=None):
        self.name = ''
        self.argument = argument
        self.grid_search_space = grid_search_space

        if self.argument.sampler == "grid":
            assert self.grid_search_space is not None, "grid search spaceを指定してください"

            self.sampler = GridSampler(self.grid_search_space)
            self.n_trials = 1
            for value in self.grid_search_space.values():
                self.n_trials *= len(value)

            # トライアル回数制限


#            if self.n_trials > self.argument.n_trials:
#                self.n_trials = self.argument.n_trials

            self.obj_func_name = self.objective_grid
        elif self.argument.sampler == "random":
            self.sampler = RandomSampler(seed=self.argument.seed)
            self.n_trials = self.argument.n_trials
            self.obj_func_name = self.objective_no_grid
        else:
            self.sampler = TPESampler(**TPESampler.hyperopt_parameters(),
                                      seed=self.argument.seed)
            self.n_trials = self.argument.n_trials
            self.obj_func_name = self.objective_no_grid

        if self.n_trials == 1:
            try:
                mlflow.set_experiment(self.argument.experiment)
            except Exception as e:
                print(e)
        else:
            try:
                mlflow.set_experiment(
                    self.argument.experiment + "_" +
                    datetime.now().strftime('%Y%m%d_%H:%M:%S'))
            except Exception as e:
                print(e)

        self.study = optuna.create_study(sampler=self.sampler)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', required=True)
    parser.add_argument('--seed', required=False, type=int, default=1)
    parser.add_argument('--run',
                        required=True,
                        choices=['grid', 'hpo', 'hpo-tpe'])
    args = parser.parse_args()
    print(f'Dataset = {args.dataset}')
    data_folder = get_data(path_dir, args.dataset)
    inputs = data_folder.X
    labels = data_folder.y
    folds = data_folder.folds
    labels = preprocess_data(labels)
    result_folder = '../../../../result/simulated/'

    if args.run == 'hpo':
        run_nested_cv(
            inputs,
            labels,
            folds,
            seed=args.seed,
            dataset_name=args.dataset,
            search_obj=HPO(),
            n_trials=100,
            distributions=['normal', 'logistic', 'extreme'],
            sampler=RandomSampler(seed=args.seed),
            model_file_fmt=result_folder +
            '{dataset_name}/{distribution}-fold{test_fold_id}-model.json',
            trial_log_fmt=result_folder +
            '{dataset_name}/{distribution}-fold{test_fold_id}.json')
    elif args.run == 'hpo-tpe':
        run_nested_cv(
            inputs,
            labels,
            folds,
            seed=args.seed,
            dataset_name=args.dataset,
            search_obj=HPO(),
            n_trials=100,
            distributions=['normal', 'logistic', 'extreme'],
            sampler=TPESampler(seed=args.seed),
            model_file_fmt=
            '{dataset_name}/tpe-{distribution}-fold{test_fold_id}-model.json',
            trial_log_fmt=
            '{dataset_name}/tpe-{distribution}-fold{test_fold_id}.json')
    elif args.run == 'grid':
        grid = {
            'learning_rate': [0.001, 0.01, 0.1, 1.0],
            'max_depth': [2, 3, 4, 5, 6, 7, 8, 9, 10],
            'min_child_weight': [0.1, 1.0, 10.0, 100.0],
            'reg_alpha': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
            'reg_lambda': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
            'aft_loss_distribution_scale': [1.0, 10.0, 100.0]
        }
        run_nested_cv(
            inputs,
            labels,
            folds,
            seed=args.seed,
            dataset_name=args.dataset,
            search_obj=Grid(),
            n_trials=100,
            distributions=['normal'],
            sampler=GridSampler(search_space=grid),
            model_file_fmt=result_folder +
            '{dataset_name}/grid-{distribution}-fold{test_fold_id}-model.json',
            trial_log_fmt=result_folder +
            '{dataset_name}/grid-{distribution}-fold{test_fold_id}.json')
    else:
        raise ValueError(f'Unknown run: {args.run}')