Пример #1
0
def test_study_optimize_command(options):
    # type: (List[str]) -> None

    with StorageConfigSupplier(TEST_CONFIG_TEMPLATE) as (storage_url,
                                                         config_path):
        storage = RDBStorage(storage_url)

        study_name = storage.get_study_name_from_id(
            storage.create_new_study_id())
        command = [
            'optuna', 'study', 'optimize', '--study', study_name, '--n-trials',
            '10', __file__, 'objective_func'
        ]
        command = _add_option(command, '--storage', storage_url, 'storage'
                              in options)
        command = _add_option(command, '--config', config_path, 'config'
                              in options)
        subprocess.check_call(command)

        study = optuna.Study(storage=storage_url, study_name=study_name)
        assert len(study.trials) == 10
        assert 'x' in study.best_params

        # Check if a default value of study_name is stored in the storage.
        assert storage.get_study_name_from_id(
            study.study_id).startswith(DEFAULT_STUDY_NAME_PREFIX)
Пример #2
0
    def take_action(self, parsed_args):
        # type: (Namespace) -> int

        config = optuna.config.load_optuna_config(self.app_args.config)
        storage_url = get_storage_url(self.app_args.storage, config)
        study = optuna.Study(storage=storage_url, study_name=parsed_args.study)

        # We force enabling the debug flag. As we are going to execute user codes, we want to show
        # exception stack traces by default.
        self.app.options.debug = True

        target_module = imp.load_source('optuna_target_module',
                                        parsed_args.file)

        try:
            target_method = getattr(target_module, parsed_args.method)
        except AttributeError:
            self.logger.error('Method {} not found in file {}.'.format(
                parsed_args.method, parsed_args.file))
            return 1

        study.optimize(target_method,
                       n_trials=parsed_args.n_trials,
                       timeout=parsed_args.timeout,
                       n_jobs=parsed_args.n_jobs)
        return 0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("kifu")
    parser.add_argument("engine")
    parser.add_argument("value_ranges")
    parser.add_argument("out_dir")
    parser.add_argument("--trials", type=int, default=10)
    parser.add_argument("--resume", action="store_true")
    args = parser.parse_args()

    base_config["engine"] = yaml_load(args.engine)
    base_config["value_ranges"] = yaml_load(args.value_ranges)
    base_config["kifu"] = args.kifu
    base_config["out_dir"] = args.out_dir

    os.makedirs(base_config["out_dir"], exist_ok=True)
    study_name = 'next_move_evaluation_opt'  # Unique identifier of the study.
    # sqliteの指定は絶対パスでもokのようだ
    if args.resume:
        study = optuna.Study(study_name=study_name,
                             storage='sqlite:///' +
                             os.path.join(base_config["out_dir"], "optuna.db"))
    else:
        study = optuna.create_study(
            study_name=study_name,
            storage='sqlite:///' +
            os.path.join(base_config["out_dir"], "optuna.db"))
    study.optimize(objective, n_trials=args.trials)
Пример #4
0
    def take_action(self, parsed_args):
        # type: (Namespace) -> None

        config = optuna.config.load_optuna_config(self.app_args.config)
        storage_url = get_storage_url(self.app_args.storage, config)
        study = optuna.Study(storage=storage_url, study_name=parsed_args.study)
        study.set_user_attr(parsed_args.key, parsed_args.value)

        self.logger.info('Attribute successfully written.')
Пример #5
0
    def take_action(self, parsed_args):
        # type: (Namespace) -> None

        config = optuna.config.load_optuna_config(self.app_args.config)
        storage_url = get_storage_url(self.app_args.storage, config)
        study = optuna.Study(storage=storage_url, study_name=parsed_args.study)

        if parsed_args.out is None:
            optuna.dashboard.serve(study)
        else:
            optuna.dashboard.write(study, parsed_args.out)
            self.logger.info('Report successfully written to: {}'.format(parsed_args.out))
Пример #6
0
def main(study_name, is_resume_study):
    df_train, _ = load_data(scaled=False)
    df_train = df_train.loc[:, sorted(df_train.columns)]
    X_train = df_train.drop([
        'HasDetections', 'MachineIdentifier', 'machine_id', 'AvSigVersion_1',
        'test_probability'
    ],
                            axis=1)
    y_train = df_train['HasDetections']

    f = partial(objective, X_train, y_train, df_train)
    if is_resume_study:
        study = optuna.Study(study_name=study_name,
                             storage='sqlite:///example.db')
    else:
        study = optuna.create_study(study_name=study_name,
                                    storage='sqlite:///example.db')
    study.optimize(f, n_trials=50)
    print('params:', study.best_params)
Пример #7
0
        best_val_f_score = -np.inf
        for epoch, [val_loss, val_result] in enumerate(zip(val_losses, val_results)):
            if best_val_loss > val_loss:
                best_val_loss = val_loss
                best_val_loss_epoch = epoch
                best_val_loss_result = val_result
            if best_val_f_score < val_result["f_score"]:
                best_val_f_score = val_result["f_score"]
                best_val_f_score_loss = val_loss
                best_val_f_score_epoch = epoch
                best_val_f_score_result = val_result
        assert min(val_losses) == best_val_loss
        assert max(result["f_score"] for result in val_results) == best_val_f_score

        best_val_losses.append(best_val_loss)
        best_val_loss_results.append(best_val_loss_result)
        best_val_loss_epochs.append(best_val_loss_epoch)
        best_val_f_score_losses.append(best_val_f_score_loss)
        best_val_f_score_results.append(best_val_f_score_result)
        best_val_f_score_epochs.append(best_val_f_score_epoch)

    return best_val_losses, best_val_loss_results, best_val_loss_epochs, best_val_f_score_losses, best_val_f_score_results, best_val_f_score_epochs

import optuna
OPTUNA_STUDY_NAME = "model"
OPTUNA_STORAGE = "sqlite:///opt.db"

study = optuna.Study(study_name=OPTUNA_STUDY_NAME, storage=OPTUNA_STORAGE)
study.optimize(wrapper, n_trials=1)

Пример #8
0
            ## Test ##
            with chainer.using_config('train', False):
                best_mic, best_mac = estimate_test(test_data, reader, epoch,
                                                   file_path, args.model,
                                                   model, args.multilabel,
                                                   best_mic, best_mac)

    return 1 - best_mic  ## microが最小になるように


if __name__ == "__main__":
    num_of_trials = 20  ## n_trialsはhyper_parameterを探索する試行回数
    args = parse_argument()
    reader, train_data, test_data = prepare()
    study = optuna.Study(study_name=args.dbname, storage=args.storagename)
    if args.model == "TRF-Delay-Multi" or args.model == "TRF-Sequential":
        study.optimize(objective, n_trials=num_of_trials)
    else:
        study.optimize(objective, n_trials=num_of_trials)

    with open(args.filepath + "/opt_result.txt", mode='w') as f:
        f.write('Number of finished trials: {}'.format(len(study.trials)) +
                "\n")

        f.write('Best trial:' + "\n")
        trial = study.best_trial

        f.write('  Value: {}'.format(trial.value) + "\n")
        f.write('  Params: ' + "\n")
        for key, value in trial.params.items():
Пример #9
0
    report = evaluator()

    # The following line mitigates the memory problem in CircleCI
    # (see https://github.com/pfnet/optuna/pull/325 for more details).
    gc.collect()

    return 1.0 - report['main/accuracy']


if __name__ == '__main__':
    # Please make sure common study and storage are shared among nodes.
    study_name = sys.argv[1]
    storage_url = sys.argv[2]

    study = optuna.Study(study_name,
                         storage_url,
                         pruner=optuna.pruners.MedianPruner())
    comm = chainermn.create_communicator('naive')
    if comm.rank == 0:
        print('Study name:', study_name)
        print('Storage URL:', storage_url)
        print('Number of nodes:', comm.size)

    # Run optimization!
    chainermn_study = optuna.integration.ChainerMNStudy(study, comm)
    chainermn_study.optimize(objective, n_trials=25)

    if comm.rank == 0:
        pruned_trials = [
            t for t in study.trials
            if t.state == optuna.structs.TrialState.PRUNED
Пример #10
0
    trainer.run()

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(test_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    return 1.0 - report['main/accuracy']


if __name__ == '__main__':
    # Please make sure common study and storage are shared among nodes.
    study_name = sys.argv[1]
    storage_url = sys.argv[2]

    study = optuna.Study(study_name, storage_url)
    comm = chainermn.create_communicator('naive')
    if comm.rank == 0:
        print('Study name:', study_name)
        print('Storage URL:', storage_url)
        print('Number of nodes:', comm.size)

    # Run optimization!
    chainermn_study = optuna.integration.ChainerMNStudy(study, comm)
    chainermn_study.optimize(objective, n_trials=25)

    if comm.rank == 0:
        print('Number of finished trials: ', len(study.trials))
        print('Best trial:')
        trial = study.best_trial
        print('  Value: ', trial.value)
Пример #11
0
def search(sim_sn_path, hsc_path, model_dir, batch_size, optimizer,
           adabound_gamma, adabound_final_lr, lr, seed, epochs, patience,
           n_trials, norm, input1, input2, threads, eval_frequency, task_name,
           remove_y, target_distmod):
    storage = 'sqlite:///{}/example.db'.format(model_dir)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if platform.system() == 'Windows':
        tmp = (Path(__file__).parents[1] / 'mlruns' / 'search-hsc-redshift' /
               'mlruns')
        uri = str(tmp.absolute().as_uri())
        # uri = 'file://' + str(tmp.absolute())
    else:
        tmp = (Path(__file__).parents[1] / 'mlruns' / 'search-hsc-redshift' /
               'mlruns')
        uri = str(tmp.absolute().as_uri())
    mlflow.set_tracking_uri(uri)

    print(model_dir)

    name = '{flag}-{input1}-{input2}'.format(flag=task_name,
                                             input1=input1,
                                             input2=input2)
    if remove_y:
        name += '-remove-y'
    name += '-{}'.format('distmod' if target_distmod else 'redshift')
    mlflow.set_experiment(name)

    db_path = os.path.join(model_dir, 'example.db')
    sampler = MyTPESampler()
    if os.path.exists(db_path):
        study = optuna.Study(study_name='study190513',
                             storage=storage,
                             sampler=sampler)
    else:
        study = optuna.create_study(study_name='study190513',
                                    storage=storage,
                                    sampler=sampler)

    input_setting = InputSetting(batch_size=batch_size,
                                 mixup='none',
                                 mixup_alpha=2,
                                 mixup_beta=2)
    input_data = InputData(training_data=None,
                           validation_data=None,
                           test_data=None,
                           mean=None,
                           std=None,
                           input1=input1,
                           input2=input2,
                           remove_y=remove_y,
                           is_hsc=True,
                           n_classes=1,
                           input_setting=input_setting)

    optimizer_setting = OptimizerSetting(name=optimizer,
                                         lr=lr,
                                         gamma=adabound_gamma,
                                         final_lr=adabound_final_lr)
    loop_setting = LoopSetting(epochs=epochs,
                               patience=patience,
                               eval_frequency=eval_frequency,
                               end_by_epochs=False)
    print('loading data')
    sim_sn, _ = load_hsc_data(sim_sn_path=sim_sn_path,
                              hsc_path=hsc_path,
                              remove_y=input_data.remove_y)
    sim_sn = sklearn.utils.shuffle(sim_sn, random_state=seed)
    if target_distmod:
        lcdm = Cosmology()
        target = np.asarray(
            Parallel(n_jobs=threads)(delayed(lcdm.DistMod)(z)
                                     for z in sim_sn['redshift']))
    else:
        target = sim_sn['redshift']

    tmp_x, test_x, tmp_y, test_y = train_test_split(sim_sn,
                                                    target,
                                                    test_size=0.3,
                                                    random_state=42)
    dev_x, val_x, dev_y, val_y = train_test_split(tmp_x,
                                                  tmp_y,
                                                  test_size=0.3,
                                                  random_state=44)

    if norm:
        output_mean, output_std = np.mean(dev_y), np.std(dev_y)
        dev_y = ((dev_y - output_mean) / output_std).astype(np.float32)
        val_y = ((val_y - output_mean) / output_std).astype(np.float32)
        test_y = ((test_y - output_mean) / output_std).astype(np.float32)
        # ハイパーパラメータ探索の最中なので平均と分散は保存しない
    else:
        # output_mean, output_std = 0, 1

        dev_y = dev_y.astype(np.float32)
        val_y = val_y.astype(np.float32)
        test_y = test_y.astype(np.float32)
    mean, std = compute_moments(train_data=dev_x,
                                input1=input1,
                                input2=input2,
                                norm=norm,
                                use_redshift=False,
                                is_hsc=True,
                                threads=threads)
    input_data.mean, input_data.std = mean, std

    training_data = Data(x=dev_x, y=dev_y)
    validation_data = Data(x=val_x, y=val_y)
    test_data = Data(x=test_x, y=test_y)
    input_data.training_data = training_data
    input_data.validation_data = validation_data
    input_data.test_data = test_data

    for i in range(n_trials):
        study.optimize(
            lambda trial: objective_hsc(trial=trial,
                                        sim_sn_path=sim_sn_path,
                                        hsc_path=hsc_path,
                                        optimizer_setting=optimizer_setting,
                                        seed=seed,
                                        loop_setting=loop_setting,
                                        normalization=norm,
                                        threads=threads,
                                        input_data=input_data),
            n_trials=1)

        df = study.trials_dataframe()
        df.to_csv(os.path.join(model_dir, 'result.csv'))
def train_and_generate_model():

    #global log_fd

    global log_fd_opt

    global tr_input_arr

    global tr_angle_arr

    global val_input_arr

    global val_angle_arr

    data_len = len(exchange_rates)

    log_fd_tr = open("./train_progress_log_" +
                     dt.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt",
                     mode="w")

    # inner logger function for backtest

    def logfile_writeln_tr(log_str):

        nonlocal log_fd_tr

        log_fd_tr.write(log_str + "\n")

        log_fd_tr.flush()

    print("data size of rates: " + str(data_len))

    print("num of rate datas for tarin: " +
          str(COMPETITION_TRAIN_DATA_NUM_AT_RATE_ARR))

    print("input features sets for tarin: " + str(COMPETITION_TRAIN_DATA_NUM))

    logfile_writeln_tr("data size of rates: " + str(data_len))

    logfile_writeln_tr("num of rate datas for tarin: " +
                       str(COMPETITION_TRAIN_DATA_NUM_AT_RATE_ARR))

    tr_input_mat = []

    tr_angle_mat = []

    is_loaded_input_mat = False

    if os.path.exists("./tr_input_mat.pickle"):

        with open('./tr_input_mat.pickle', 'rb') as f:

            tr_input_mat = pickle.load(f)

        with open('./tr_angle_mat.pickle', 'rb') as f:

            tr_angle_mat = pickle.load(f)

        is_loaded_input_mat = True

    else:

        for i in range(DATA_HEAD_ASOBI,
                       len(exchange_rates) - DATA_HEAD_ASOBI - OUTPUT_LEN,
                       SLIDE_IDX_NUM_AT_GEN_INPUTS_AND_COLLECT_LABELS):

            tr_input_mat.append([
                exchange_rates[i],
                (exchange_rates[i] - exchange_rates[i - 1]) /
                exchange_rates[i - 1],
                get_rsi(exchange_rates, i),
                get_ma(exchange_rates, i),
                get_ma_kairi(exchange_rates, i),
                get_bb_1(exchange_rates, i),
                get_bb_2(exchange_rates, i),
                get_ema(exchange_rates, i),
                get_ema_rsi(exchange_rates, i),
                get_cci(exchange_rates, i),
                get_mo(exchange_rates, i),
                get_lw(exchange_rates, i),
                get_ss(exchange_rates, i),
                get_dmi(exchange_rates, i),
                get_vorarity(exchange_rates, i),
                get_macd(exchange_rates, i),
                str(judge_chart_type(exchange_rates[i - CHART_TYPE_JDG_LEN:i]))
            ])

            tr_input_mat.append([
                reverse_exchange_rates[i],
                (reverse_exchange_rates[i] - reverse_exchange_rates[i - 1]) /
                reverse_exchange_rates[i - 1],
                get_rsi(reverse_exchange_rates, i),
                get_ma(reverse_exchange_rates, i),
                get_ma_kairi(reverse_exchange_rates, i),
                get_bb_1(reverse_exchange_rates, i),
                get_bb_2(reverse_exchange_rates, i),
                get_ema(reverse_exchange_rates, i),
                get_ema_rsi(reverse_exchange_rates, i),
                get_cci(reverse_exchange_rates, i),
                get_mo(reverse_exchange_rates, i),
                get_lw(reverse_exchange_rates, i),
                get_ss(reverse_exchange_rates, i),
                get_dmi(reverse_exchange_rates, i),
                get_vorarity(reverse_exchange_rates, i),
                get_macd(reverse_exchange_rates, i),
                str(
                    judge_chart_type(
                        reverse_exchange_rates[i - CHART_TYPE_JDG_LEN:i]))
            ])

            tmp = exchange_rates[i + OUTPUT_LEN] - exchange_rates[i]

            if tmp >= 0:

                tr_angle_mat.append(1)

            else:

                tr_angle_mat.append(0)

            tmp = reverse_exchange_rates[
                i + OUTPUT_LEN] - reverse_exchange_rates[i]

            if tmp >= 0:

                tr_angle_mat.append(1)

            else:

                tr_angle_mat.append(0)

        if is_loaded_input_mat == False:

            with open('tr_input_mat.pickle', 'wb') as f:

                pickle.dump(tr_input_mat, f)

            with open('tr_angle_mat.pickle', 'wb') as f:

                pickle.dump(tr_angle_mat, f)

    #log output for tensorboard

    #configure("logs/xgboost_trade_cpu_1")

    tr_input_arr = np.array(tr_input_mat[0:COMPETITION_TRAIN_DATA_NUM])

    tr_angle_arr = np.array(tr_angle_mat[0:COMPETITION_TRAIN_DATA_NUM])

    watchlist = None

    split_idx = COMPETITION_TRAIN_DATA_NUM + int(
        (len(tr_input_mat) - COMPETITION_TRAIN_DATA_NUM) *
        VALIDATION_DATA_RATIO)

    if VALIDATION_DATA_RATIO != 0.0:

        val_input_arr = np.array(
            tr_input_mat[COMPETITION_TRAIN_DATA_NUM:split_idx])

        val_angle_arr = np.array(
            tr_angle_mat[COMPETITION_TRAIN_DATA_NUM:split_idx])

        watchlist = [(tr_input_arr, tr_angle_arr),
                     (val_input_arr, val_angle_arr)]

    else:

        watchlist = [(tr_input_arr, tr_angle_arr)]

    start = time.time()

    if is_param_tune_with_optuna:

        log_fd_opt = open("./tune_progress_log_" +
                          dt.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt",
                          mode="w")

        study = None

        if is_use_db_at_tune:

            study = optuna.Study(study_name='fxsystrade',
                                 storage='sqlite:///../fxsystrade.db')

        else:

            study = optuna.create_study()

        parallel_num = RAPTOP_THREAD_NUM * 2

        if is_colab_cpu or is_exec_at_mba:

            parallel_num = COLAB_CPU_AND_MBA_THREAD_NUM * 2

        if special_optuna_parallel_num != -1:

            parallel_num = special_optuna_parallel_num

        study.optimize(opt, n_trials=OPTUNA_TRIAL_NUM, n_jobs=parallel_num)

        process_time = time.time() - start

        logfile_writeln_opt("best_params: " + str(study.best_params))

        logfile_writeln_opt("best_value: " + str(study.best_value))

        logfile_writeln_opt("best_trial: " + str(study.best_trial))

        logfile_writeln_opt("excecution time of tune: " + str(process_time))

        log_fd_opt.flush()

        log_fd_opt.close()

        exit()

    param = {}

    n_thread = RAPTOP_THREAD_NUM

    if is_use_gpu:

        param['tree_method'] = 'gpu_hist'

        param['max_bin'] = 16

        param['gpu_id'] = 0

        n_thread = COLAB_CPU_AND_MBA_THREAD_NUM

    if is_colab_cpu or is_exec_at_mba:

        n_thread = COLAB_CPU_AND_MBA_THREAD_NUM

    logfile_writeln_tr("training parameters are below...")

    logfile_writeln_tr(str(param))

    eval_result_dic = {}

    logfile_writeln_tr("num_round: " + str(NUM_ROUND))

    clf = XGBClassifier(max_depth=MAX_DEPTH,
                        random_state=42,
                        n_estimators=NUM_ROUND,
                        min_child_weight=18,
                        subsample=0.9,
                        colsample_bytree=0.6,
                        eta=ETA,
                        objective='binary:logistic',
                        verbosity=0,
                        n_thread=n_thread,
                        **param)

    verbosity = True

    if is_use_gpu or is_colab_cpu:

        verbosity = False

    clf.fit(tr_input_arr, tr_angle_arr, eval_set=watchlist, verbose=verbosity)

    process_time = time.time() - start

    logfile_writeln_tr("excecution time of training: " + str(process_time))

    clf.save_model('./xgb.model')

    booster = clf.get_booster()

    booster.dump_model('./xgb_model.raw.txt')

    eval_result_dic = clf.evals_result()

    for ii in range(len(eval_result_dic['validation_0']['error'])):

        if VALIDATION_DATA_RATIO != 0.0:

            logfile_writeln_tr(
                str(ii) + "," +
                str(eval_result_dic['validation_0']['error'][ii]) + "," +
                str(eval_result_dic['validation_1']['error'][ii]))

        else:

            logfile_writeln_tr(
                str(ii) + "," +
                str(eval_result_dic['validation_0']['error'][ii]))

    # Feature Importance

    fti = clf.feature_importances_

    logfile_writeln_tr('Feature Importances:')

    for i, feat in enumerate(FEATURE_NAMES):

        logfile_writeln_tr('\t{0:20s} : {1:>.6f}'.format(feat, fti[i]))

    log_fd_tr.flush()

    log_fd_tr.close()

    print("finished training and saved model.")
Пример #13
0
def train_and_generate_model():
    global log_fd
    global tr_input_arr
    global tr_angle_arr
    global val_input_arr
    global val_angle_arr

    data_len = len(exchange_rates)
    # if is_param_tune_with_optuna:
    #     train_len = len(exchange_rates) - 1000 - OUTPUT_LEN
    # else:
    #     train_len = int(len(exchange_rates)/TRAINDATA_DIV)

    log_fd = open("./train_progress_log_" + dt.now().strftime("%Y-%m-%d_%H-%M-%S") + ".txt", mode = "w")

    print("data size of rates: " + str(data_len))
    print("num of rate datas for tarin: " + str(COMPETITION_TRAIN_DATA_NUM_AT_RATE_ARR))
    print("input features sets for tarin: " + str(COMPETITION_TRAIN_DATA_NUM))


    logfile_writeln("data size of rates: " + str(data_len))
    logfile_writeln("num of rate datas for tarin: " + str(COMPETITION_TRAIN_DATA_NUM_AT_RATE_ARR))

    tr_input_mat = []
    tr_angle_mat = []

    if os.path.exists("./tr_input_mat.pickle"):
        with open('./tr_input_mat.pickle', 'rb') as f:
            tr_input_mat = pickle.load(f)
        with open('./tr_angle_mat.pickle', 'rb') as f:
            tr_angle_mat = pickle.load(f)
    else:
        for i in range(DATA_HEAD_ASOBI, len(exchange_rates) - DATA_HEAD_ASOBI - OUTPUT_LEN, SLIDE_IDX_NUM_AT_GEN_INPUTS_AND_COLLECT_LABELS):
            # if "2006" in exchange_dates[i]:
            #     print(len(tr_input_mat))
            #     print(str(DATA_HEAD_ASOBI + i - 1))
            #     quit()
            tr_input_mat.append(
                [exchange_rates[i],
                 (exchange_rates[i] - exchange_rates[i - 1])/exchange_rates[i - 1],
                 get_rsi(exchange_rates, i),
                 get_ma(exchange_rates, i),
                 get_ma_kairi(exchange_rates, i),
                 get_bb_1(exchange_rates, i),
                 get_bb_2(exchange_rates, i),
                 get_ema(exchange_rates, i),
                 get_ema_rsi(exchange_rates, i),
                 get_cci(exchange_rates, i),
                 get_mo(exchange_rates, i),
                 get_lw(exchange_rates, i),
                 get_ss(exchange_rates, i),
                 get_dmi(exchange_rates, i),
                 get_vorarity(exchange_rates, i),
                 get_macd(exchange_rates, i),
                 str(judge_chart_type(exchange_rates[i-CHART_TYPE_JDG_LEN:i]))
             ]
                )
            tr_input_mat.append(
                [reverse_exchange_rates[i],
                 (reverse_exchange_rates[i] - reverse_exchange_rates[i - 1])/reverse_exchange_rates[i - 1],
                 get_rsi(reverse_exchange_rates, i),
                 get_ma(reverse_exchange_rates, i),
                 get_ma_kairi(reverse_exchange_rates, i),
                 get_bb_1(reverse_exchange_rates, i),
                 get_bb_2(reverse_exchange_rates, i),
                 get_ema(reverse_exchange_rates, i),
                 get_ema_rsi(reverse_exchange_rates, i),
                 get_cci(reverse_exchange_rates, i),
                 get_mo(reverse_exchange_rates, i),
                 get_lw(reverse_exchange_rates, i),
                 get_ss(reverse_exchange_rates, i),
                 get_dmi(reverse_exchange_rates, i),
                 get_vorarity(reverse_exchange_rates, i),
                 get_macd(reverse_exchange_rates, i),
                 str(judge_chart_type(reverse_exchange_rates[i-CHART_TYPE_JDG_LEN:i]))
             ]
                )

            tmp = exchange_rates[i+OUTPUT_LEN] - exchange_rates[i]
            if tmp >= 0:
                tr_angle_mat.append(1)
            else:
                tr_angle_mat.append(0)
            tmp = reverse_exchange_rates[i+OUTPUT_LEN] - reverse_exchange_rates[i]
            if tmp >= 0:
                tr_angle_mat.append(1)
            else:
                tr_angle_mat.append(0)

        with open('tr_input_mat.pickle', 'wb') as f:
            pickle.dump(tr_input_mat, f)
        with open('tr_angle_mat.pickle', 'wb') as f:
            pickle.dump(tr_angle_mat, f)

    #log output for tensorboard
    #configure("logs/xgboost_trade_cpu_1")

    # if is_param_tune_with_optuna:
    #     gen_data_len = int(((len(exchange_rates)/TRAINDATA_DIV))/5.0)
    # else:
    #     gen_data_len = len(tr_input_mat)
    tr_input_arr = np.array(tr_input_mat[0:COMPETITION_TRAIN_DATA_NUM])
    tr_angle_arr = np.array(tr_angle_mat[0:COMPETITION_TRAIN_DATA_NUM])
    dtrain = xgb.DMatrix(tr_input_arr, label=tr_angle_arr)

    split_idx = COMPETITION_TRAIN_DATA_NUM + int((len(tr_input_mat) - COMPETITION_TRAIN_DATA_NUM) * VALIDATION_DATA_RATIO)
    if VALIDATION_DATA_RATIO != 0.0:
        val_input_arr = np.array(tr_input_mat[COMPETITION_TRAIN_DATA_NUM:split_idx])
        val_angle_arr = np.array(tr_angle_mat[COMPETITION_TRAIN_DATA_NUM:split_idx])
        dval = xgb.DMatrix(val_input_arr, label=val_angle_arr)
        watchlist  = [(dtrain,'train'),(dval,'validation')]
    else:
        watchlist  = [(dtrain,'train')]

    start = time.time()
    if is_param_tune_with_optuna:
        #study = optuna.create_study()
        study = optuna.Study(study_name='distributed-example', storage='sqlite:///example.db')
        study.optimize(opt, n_trials=100)
        process_time = time.time() - start
        logfile_writeln(str(study.best_params))
        logfile_writeln(str(study.best_value))
        logfile_writeln(str(study.best_trial))
        logfile_writeln("excecution time of tune: " + str(process_time))
        log_fd.flush()
        quit()

    param = {'max_depth':MAX_DEPTH, 'eta':ETA, 'objective':'binary:logistic', 'verbosity':0, 'n_thread':4,'random_state':42, 'n_estimators':NUM_ROUND, 'min_child_weight': 15, 'subsample': 0.7, 'colsample_bytree':0.7}

    #param = {'max_depth':6, 'learning_rate':0.1, 'subsumble':0.5, 'objective':'binary:logistic', 'verbosity':0, 'booster': 'dart',
    # 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.1, 'skip_drop': 0.5}

    #param = {'max_depth':3, 'eta':0.1, 'objective':'binary:logistic', 'verbosity':0, 'n_thread':4,
    #    'random_state':42, 'n_estimators':NUM_ROUND, 'min_child_weight': 15, 'subsample': 0.7, 'colsample_bytree':0.7,
    #    'booster': 'dart', 'sample_type': 'uniform', 'normalize_type': 'tree', 'rate_drop': 0.1, 'skip_drop': 0.5}

    if is_use_gpu:
        param['tree_method'] = 'gpu_hist'
        param['max_bin'] = 16
        param['gpu_id'] = 0
        param['n_thread'] = 2
    if is_colab_cpu:
        param['n_thread'] = 2

    logfile_writeln("training parameters are below...")
    logfile_writeln(str(param))
    eval_result_dic = {}

    logfile_writeln("num_round: " + str(NUM_ROUND))
    bst = xgb.train(param, dtrain, NUM_ROUND, evals=watchlist, evals_result=eval_result_dic, verbose_eval=int(NUM_ROUND/100))
    process_time = time.time() - start
    logfile_writeln("excecution time of training: " + str(process_time))

    bst.dump_model('./xgb_model.raw.txt')
    bst.save_model('./xgb.model')

    for ii in range(len(eval_result_dic['train']['error'])):
        if VALIDATION_DATA_RATIO != 0.0:
            logfile_writeln(str(ii) + "," + str(eval_result_dic['train']['error'][ii]) + "," + str(eval_result_dic['validation']['error'][ii]))
        else:
            logfile_writeln(str(ii) + "," + str(eval_result_dic['train']['error'][ii]))

    # feature importance
    create_feature_map()
    fti = bst.get_fscore(fmap='fx_systrade_xgb.fmap')

    logfile_writeln('Feature Importances:')
    logfile_writeln(str(fti))
    # for feat in FEATURE_NAMES:
    #     logfile_writeln('\t{0:10s} : {1:>12.4f}'.format(feat, fti[feat]))

    log_fd.flush()
    log_fd.close()

    print("finished training and saved model.")
Пример #14
0
 def confirm(self, directory):
     self.study = optuna.Study(
         study_name=self.name,
         storage='sqlite:///{}/hypara_search.db'.format(directory))
     self.df = self.study.trials_dataframe()
     """
Пример #15
0
                arglist.append(f"--{arg}")
                continue
            arglist.append(f"--{arg}={value}")

        statement = " ".join(arglist)
        print(f"statement for this study is: ")
        print(statement)

        # calling through subprocess to ensure that all cuda memory is fully released between experiments
        subprocess.check_call(arglist)

        result_file = open("logs/" + opt.study_name + ".txt", "r+")
        score = float(result_file.read())
        print(f"score for this study is {score}")
        return score  # want to return a value to minimize

    try:
        # study = optuna.create_study(study_name=opt.study_name, storage="mysql+pymysql://root:[email protected]/optuna")
        study = optuna.create_study(study_name=opt.study_name)
        print("Created optuna study")
    except ValueError as e:
        if "Please use a different name" in str(e):
            # study = optuna.Study(study_name=opt.study_name, storage="mysql+pymysql://root:[email protected]/optuna")
            study = optuna.Study(study_name=opt.study_name)
            print("Joined existing optuna study")
        else:
            raise
    except:
        raise
    study.optimize(objective, n_trials=opt.num_trials)
Пример #16
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--train',
                        required=True,
                        help='TSV file for training (.tsv)')
    parser.add_argument('--valid',
                        required=True,
                        help='TSV file for validation (.tsv)')
    parser.add_argument('--save-dir', required=True, help='Save directory')
    parser.add_argument('--encoder',
                        default='LSTM',
                        choices=['LSTM', 'BiLSTM', 'NSE'],
                        help='Select type of encoder')
    parser.add_argument('--attn',
                        default=False,
                        action='store_true',
                        help='Whether to add attention layer')
    parser.add_argument('--batchsize', type=int, default=64, help='Batch size')
    parser.add_argument('--embsize',
                        type=int,
                        default=128,
                        help='Embedding size')
    parser.add_argument('--unit', type=int, default=128, help='Number of unit')
    parser.add_argument('--lr-schedule-gamma',
                        type=float,
                        default=0.95,
                        help='Gamma of lr-scheduler')
    parser.add_argument('--clip',
                        type=float,
                        default=10,
                        help='Clipping gradients')
    parser.add_argument('--epoch', type=int, default=25, help='Max epoch')
    parser.add_argument('--minfreq',
                        type=int,
                        default=2,
                        help='Min word frequency')
    parser.add_argument('--maxlen',
                        type=int,
                        default=70,
                        help='Max number of words for validation')
    parser.add_argument('--vocabsize',
                        type=int,
                        default=40000,
                        help='vocabulary size')
    parser.add_argument(
        '--early-stop-n',
        type=int,
        default=2,
        help='Stop training if the best score does not update  n epoch before')
    parser.add_argument('--n-trial',
                        type=int,
                        default=100,
                        help='Number of trial')
    parser.add_argument('--study-name',
                        default=None,
                        help='Study name for sqlite')
    args = parser.parse_args()

    ### setup data ###
    print('setup data...\n')
    SRC = Field(init_token='<sos>', eos_token='<eos>', lower=True)
    TRG = Field(init_token='<sos>', eos_token='<eos>', lower=True)

    train_data, valid_data = TabularDataset.splits(path='.',
                                                   train=args.train,
                                                   validation=args.valid,
                                                   format='tsv',
                                                   fields=[('src', SRC),
                                                           ('trg', TRG)])

    def objective(trial):
        ### setup trials ###
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
        weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
        layer = trial.suggest_categorical('n_layer', [1, 2, 3])
        dropout = trial.suggest_categorical('dropout', [0.1, 0.2, 0.3])
        init_emb = trial.suggest_categorical('init-emb',
                                             ['fasttext', 'glove', 'none'])
        share_emb = trial.suggest_categorical('share-emb', [True, False])

        ### setup vocabularies ###
        if init_emb == 'none':
            SRC.build_vocab(train_data,
                            min_freq=args.minfreq,
                            max_size=args.vocabsize)
            TRG.build_vocab(train_data,
                            min_freq=args.minfreq,
                            max_size=args.vocabsize)
        else:
            if init_emb == 'fasttext':
                vectors = FastText(language='en')
            elif init_emb == 'glove':
                vectors = GloVe()
            SRC.build_vocab(train_data,
                            vectors=vectors,
                            min_freq=args.minfreq,
                            max_size=args.vocabsize)
            TRG.build_vocab(train_data,
                            vectors=vectors,
                            min_freq=args.minfreq,
                            max_size=args.vocabsize)
            args.embsize = SRC.vocab.vectors.size()[1]

        vocabs = {
            'src_stoi': SRC.vocab.stoi,
            'src_itos': SRC.vocab.itos,
            'trg_stoi': TRG.vocab.stoi,
            'trg_itos': TRG.vocab.itos
        }

        train_iter, valid_iter = BucketIterator.splits(
            (train_data, valid_data),
            batch_size=args.batchsize,
            sort_within_batch=True,
            sort_key=lambda x: len(x.src),
            repeat=False,
            shuffle=True,
            device=device)

        train_size = len(train_data)
        valid_size = len(valid_data)
        src_vocabsize = len(SRC.vocab)
        trg_vocabsize = len(TRG.vocab)
        print(f'# training examples: {train_size}')
        print(f'# validation examples: {valid_size} \n')
        print(f'# unique tokens in source vocabulary: {src_vocabsize}')
        print(f'# unique tokens in target vocabulary: {trg_vocabsize} \n')

        ### setup model ###
        sos_id = TRG.vocab.stoi['<sos>']
        eos_id = TRG.vocab.stoi['<eos>']
        src_pad_id = SRC.vocab.stoi['<pad>']
        trg_pad_id = TRG.vocab.stoi['<pad>']

        if share_emb:
            vocabsize = max(src_vocabsize, trg_vocabsize)
            assert src_pad_id == trg_pad_id
            embedding = EmbeddingLayer(vocabsize, args.embsize, src_pad_id)
            encoder_embedding = embedding
            decoder_embedding = embedding
        else:
            encoder_embedding = EmbeddingLayer(src_vocabsize, args.embsize,
                                               src_pad_id)
            decoder_embedding = EmbeddingLayer(trg_vocabsize, args.embsize,
                                               trg_pad_id)
        bidirectional = True if args.encoder == 'BiLSTM' else False

        if args.encoder == 'NSE':
            encoder = NSE(encoder_embedding, args.unit, layer, dropout)
        else:
            encoder = LSTMEncoder(encoder_embedding, args.unit, layer, dropout,
                                  bidirectional)

        decoder = LSTMDecoder(decoder_embedding, args.unit, layer, dropout,
                              args.attn, encoder.output_units)
        model = Seq2seq(encoder, decoder, sos_id, eos_id, device).to(device)
        parameter_num = count_parameters(model)
        print(model)

        # Multi GPU
        if device.__str__() == 'cuda':
            model = torch.nn.DataParallel(model)
            cudnn.benchmark = True

        criterion = nn.CrossEntropyLoss(ignore_index=TRG.vocab.stoi['<pad>'])
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               weight_decay=weight_decay)
        # lr scheduling with exponential curve
        scheduler = lr_scheduler.ExponentialLR(optimizer,
                                               gamma=args.lr_schedule_gamma)

        ### make directory for saving ###
        if os.path.exists(args.save_dir):
            shutil.rmtree(args.save_dir)
        os.mkdir(args.save_dir)

        params = args.__dict__
        params.update(lr=lr,
                      weight_decay=weight_decay,
                      layer=layer,
                      dropout=dropout,
                      init_emb=init_emb,
                      share_emb=share_emb,
                      train_size=train_size,
                      valid_size=valid_size,
                      src_vocabsize=src_vocabsize,
                      trg_vocabsize=trg_vocabsize,
                      parameter_num=parameter_num)
        json.dump(params,
                  open(f'{args.save_dir}/params.json', 'w', encoding='utf-8'),
                  ensure_ascii=False)
        pprint.pprint(params, indent=4)
        print()

        ### training and validation ###
        best_loss = float('inf')
        no_update_best_interval = 0
        for epoch in range(args.epoch):
            is_first = True if epoch == 0 else False
            scheduler.step()  # reduce lr
            train_loss = train(model, train_iter, optimizer, criterion,
                               args.clip)
            valid_loss = eval(model, valid_iter, criterion, SRC.vocab.itos,
                              TRG.vocab.itos, is_first)

            if valid_loss < best_loss:
                best_loss = valid_loss
                no_update_best_interval = 0
                # save best model
                model_path = f'{args.save_dir}/model-best.pt'
                state = {
                    'vocabs': vocabs,
                    'params': params,
                    'state_dict': model.state_dict()
                }
                torch.save(state, model_path)
            else:
                no_update_best_interval += 1

            # logging
            logs = f"Epoch: {epoch+1:02}\tTrain loss: {train_loss:.3f}\tVal. Loss: {valid_loss:.3f}\n"
            print(logs)

            # early stopping
            if no_update_best_interval >= args.early_stop_n:
                print('Early stopped in training')
                break

        return best_loss

    if args.study_name:
        # You have to do the following command in advance:
        # optuna create-study --study '{args.study_name}' --storage 'sqlite:///{args.study_name}.db'
        study = optuna.Study(study_name=args.study_name,
                             storage=f'sqlite:///{args.study_name}.db')
    else:
        study = optuna.create_study()
    study.optimize(objective, n_trials=args.n_trial)
    print('\nbest params: ', study.best_params)
    print(f'best value: {study.best_value}')
    print(f'best trial: {study.best_trial.trial_id}')
Пример #17
0
def search_hsc(sim_sn_path, hsc_path, model_dir, batch_size, optimizer,
               adabound_gamma, adabound_final_lr, lr, seed, epochs, patience,
               n_trials, norm, input1, input2,
               mixup, threads, eval_frequency, binary, task_name, remove_y,
               mixup_alpha, mixup_beta):
    storage = 'sqlite:///{}/example.db'.format(model_dir)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if platform.system() == 'Windows':
        tmp = (Path(__file__).parents[1] / 'mlruns' /
               'search-hsc-classification' / 'mlruns')
        uri = str(tmp.absolute().as_uri())
        # uri = 'file://' + str(tmp.absolute())
    else:
        tmp = (Path(__file__).absolute().parents[1] / 'mlruns' /
               'search-hsc-classification' / 'mlruns')
        uri = str(tmp.absolute().as_uri())
    mlflow.set_tracking_uri(uri)
    mlflow.set_tracking_uri(uri)

    n_classes = 2 if binary else 3
    name = '{n_classes}-{task_name}-{input1}-{input2}'.format(
        n_classes=n_classes, task_name=task_name, input1=input1, input2=input2
    )
    if remove_y:
        name += '-remove-y'
    mlflow.set_experiment(name)

    print(model_dir)
    db_path = os.path.join(model_dir, 'example.db')
    sampler = MyTPESampler()
    if os.path.exists(db_path):
        study = optuna.Study(study_name='study190513', storage=storage,
                             sampler=sampler)
    else:
        study = optuna.create_study(study_name='study190513', storage=storage,
                                    sampler=sampler)

    input_setting = InputSetting(
        batch_size=batch_size, mixup=mixup,
        mixup_alpha=mixup_alpha, mixup_beta=mixup_beta
    )
    input_data = InputData(
        training_data=None, validation_data=None, test_data=None,
        mean=None, std=None, input1=input1, input2=input2,
        remove_y=remove_y, is_hsc=True, n_classes=n_classes,
        input_setting=input_setting
    )

    optimizer_setting = OptimizerSetting(
        name=optimizer, lr=lr, gamma=adabound_gamma,
        final_lr=adabound_final_lr
    )
    loop_setting = LoopSetting(epochs=epochs, patience=patience,
                               eval_frequency=eval_frequency,
                               end_by_epochs=False)
    print('loading data')
    sim_sn, _ = load_hsc_data(
        sim_sn_path=sim_sn_path, hsc_path=hsc_path,
        remove_y=input_data.remove_y
    )
    sim_sn = sklearn.utils.shuffle(sim_sn, random_state=seed)

    # クラスラベルを数字にする
    label_map = get_label_map(binary=binary)
    sim_sn_y = np.array([label_map[c] for c in sim_sn['sn_type']])

    sim_x1, sim_x2, sim_y1, sim_y2 = train_test_split(
        sim_sn, sim_sn_y, test_size=0.3, random_state=42, stratify=sim_sn_y
    )
    sim_dev_x, sim_val_x, sim_dev_y, sim_val_y = train_test_split(
        sim_x1, sim_y1, test_size=0.3, random_state=44, stratify=sim_y1
    )

    training_data = Data(x=sim_dev_x, y=sim_dev_y)
    validation_data = Data(x=sim_val_x, y=sim_val_y)
    test_data = Data(x=sim_x2, y=sim_y2)
    input_data.training_data = training_data
    input_data.validation_data = validation_data
    input_data.test_data = test_data

    mean, std = compute_moments(
        train_data=training_data.x, input1=input1, input2=input2, norm=norm,
        use_redshift=False, is_hsc=True, threads=threads
    )
    input_data.mean, input_data.std = mean, std

    for i in range(n_trials):
        study.optimize(
            lambda trial: objective_hsc(
                trial=trial, sim_sn_path=sim_sn_path, hsc_path=hsc_path,
                optimizer_setting=optimizer_setting, seed=seed,
                loop_setting=loop_setting, normalization=norm,
                threads=threads, binary=binary, input_data=input_data
            ),
            n_trials=1
        )

        df = study.trials_dataframe()
        df.to_csv(os.path.join(model_dir, 'result.csv'))
Пример #18
0
def search_plasticc(sim_sn_path, training_cosmos_path, test_cosmos_path,
                    model_dir, batch_size, optimizer, adabound_gamma,
                    adabound_final_lr, lr, seed, epochs, patience, n_trials,
                    norm, flux_err, input1, input2, mixup, threads,
                    eval_frequency, binary,
                    mixup_alpha, mixup_beta):
    storage = 'sqlite:///{}/example.db'.format(model_dir)

    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if platform.system() == 'Windows':
        tmp = (Path(__file__).parents[1] / 'mlruns' /
               'search-plasticc-classification' / 'mlruns')
        uri = str(tmp.absolute().as_uri())
        # uri = 'file://' + str(tmp.absolute())
    else:
        tmp = (Path(__file__).parents[1] / 'mlruns' /
               'search-plasticc-classification' / 'mlruns')
        uri = str(tmp.absolute().as_uri())
    mlflow.set_tracking_uri(uri)

    n_classes = 2 if binary else 3
    name = '{n_classes}-{input1}-{input2}'.format(
        n_classes=n_classes, input1=input1, input2=input2
    )
    mlflow.set_experiment(name)

    db_path = os.path.join(model_dir, 'example.db')
    sampler = MyTPESampler()
    if os.path.exists(db_path):
        study = optuna.Study(study_name='study190513', storage=storage,
                             sampler=sampler)
    else:
        study = optuna.create_study(study_name='study190513', storage=storage,
                                    sampler=sampler)

    input_setting = InputSetting(
        batch_size=batch_size, mixup=mixup, mixup_alpha=mixup_alpha,
        mixup_beta=mixup_beta, balance=False
    )
    input_data = InputData(
        training_data=None, validation_data=None, test_data=None,
        mean=None, std=None, input1=input1, input2=input2, remove_y=False,
        is_hsc=False, n_classes=n_classes, input_setting=input_setting
    )

    optimizer_setting = OptimizerSetting(
        name=optimizer, lr=lr, gamma=adabound_gamma,
        final_lr=adabound_final_lr
    )
    loop_setting = LoopSetting(epochs=epochs, patience=patience,
                               eval_frequency=eval_frequency,
                               end_by_epochs=False)

    print('loading data')
    # 今までのflux_errなら1, 新しいflux_errなら2
    sim_sn, training_cosmos, _ = load_plasticc_data(
        sim_sn_path=sim_sn_path, training_cosmos_path=training_cosmos_path,
        test_cosmos_path=test_cosmos_path, use_flux_err2=flux_err == 2
    )
    sim_sn = sklearn.utils.shuffle(sim_sn, random_state=seed)
    training_cosmos = sklearn.utils.shuffle(training_cosmos,
                                            random_state=seed + 1)
    for data in (sim_sn, training_cosmos):
        for key in ('flux', 'flux_err'):
            tmp = data[key]
            data[key][np.isnan(tmp)] = 0

    # クラスラベルを数字にする
    label_map = get_label_map(binary=binary)
    sim_sn_y = np.array([label_map[c] for c in sim_sn['sn_type']])
    training_cosmos_y = np.array([label_map[c]
                                  for c in training_cosmos['sn_type']])

    sim_x1, sim_x2, sim_y1, sim_y2 = train_test_split(
        sim_sn, sim_sn_y, test_size=0.3, random_state=42, stratify=sim_sn_y
    )
    cosmos_x1, cosmos_x2, cosmos_y1, cosmos_y2 = train_test_split(
        training_cosmos, training_cosmos_y, test_size=0.3, random_state=43,
        stratify=training_cosmos_y
    )

    sim_dev_x, sim_val_x, sim_dev_y, sim_val_y = train_test_split(
        sim_x1, sim_y1, test_size=0.3, random_state=44, stratify=sim_y1
    )
    cosmos_dev_x, cosmos_val_x, cosmos_dev_y, cosmos_val_y = train_test_split(
        cosmos_x1, cosmos_y1, test_size=0.3, random_state=45,
        stratify=cosmos_y1
    )

    weight = np.asarray([0.9 / len(sim_dev_y)] * len(sim_dev_y) +
                        [0.1 / len(cosmos_dev_y)] * len(cosmos_dev_y))
    training_data = Data(x=np.hstack([sim_dev_x, cosmos_dev_x]),
                         y=np.hstack([sim_dev_y, cosmos_dev_y]),
                         weight=weight)
    validation_data = Data(x=np.hstack([sim_val_x, cosmos_val_x]),
                           y=np.hstack([sim_val_y, cosmos_val_y]))
    test_data = Data(x=np.hstack([sim_x2, cosmos_x2]),
                     y=np.hstack([sim_y2, cosmos_y2]))
    input_data.training_data = training_data
    input_data.validation_data = validation_data
    input_data.test_data = test_data

    mean, std = compute_moments(
        train_data=training_data.x, input1=input1, input2=input2, norm=norm,
        use_redshift=False, is_hsc=False, threads=threads
    )
    input_data.mean, input_data.std = mean, std

    for i in range(n_trials):
        study.optimize(
            lambda trial: objective_plasticc(
                trial=trial, input_data=input_data,
                optimizer_setting=optimizer_setting, seed=seed,
                loop_setting=loop_setting, normalization=norm,
                threads=threads, binary=binary, sim_sn_path=sim_sn_path,
                training_cosmos_path=training_cosmos_path, flux_err=flux_err
            ),
            n_trials=1
        )

        df = study.trials_dataframe()
        df.to_csv(os.path.join(model_dir, 'result.csv'))
Пример #19
0
                "bagging_fraction":
                trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
                "bagging_freq":
                trial.suggest_int("bagging_freq", 1, 7),
                "min_data_in_leaf":
                trial.suggest_int("min_data_in_leaf", 1, 50),
                "min_child_samples":
                trial.suggest_int("min_child_samples", 5, 100),
            },
        },
    }

    evals_result = dict()
    model = init_instance_by_config(task["model"])
    model.fit(dataset, evals_result=evals_result)
    return min(evals_result["valid"])


if __name__ == "__main__":

    provider_uri = "~/.qlib/qlib_data/cn_data"
    GetData().qlib_data(target_dir=provider_uri,
                        region=REG_CN,
                        exists_skip=True)
    qlib.init(provider_uri=provider_uri, region=REG_CN)

    dataset = init_instance_by_config(DATASET_CONFIG)

    study = optuna.Study(study_name="LGBM_360", storage="sqlite:///db.sqlite3")
    study.optimize(objective, n_jobs=6)
    return history


#print(time_callback.times)


def objective(trial):
    # ハイパーパラメータ(オプティマイザーと学習率を調べる)
    #    optimizer = trial.suggest_categorical("optimizer", ["sgd", "momentum", "rmsprop", "adam"])
    optimizer = trial.suggest_categorical("optimizer", ["sgd", "adam"])
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e0)

    K.clear_session()
    hist = train(optimizer, learning_rate, trial)

    #    return 1.0 - np.max(hist["val_acc"])
    return hist["val_loss"][-1]


#study = optuna.create_study()
study = optuna.Study(study_name='distributed-raccoon',
                     storage='sqlite:///example.db')
study.optimize(objective, n_trials=10)

print(study.best_params)
print(study.best_value)
print(study.best_trial)

trial_df = study.trials_dataframe()
trial_df.to_csv("optuna_ssd_raccoon.csv")