Пример #1
0
def test_integer():
    a = Integer(1, 10)
    for i in range(50):
        yield (check_limits, a.rvs(random_state=i), 1, 11)
    random_values = a.rvs(random_state=0, n_samples=10)
    assert_array_equal(random_values.shape, (10))
    assert_array_equal(a.transform(random_values), random_values)
    assert_array_equal(a.inverse_transform(random_values), random_values)
Пример #2
0
 def test_select_method__auto_mixed_dims(self):
     cs = Config([Categorical(['one', 'two']), Integer(0, 1)], 'auto', 10)
     self.assertEqual(cs.selected_method, 'bayesian')
Пример #3
0
        valid_split=cfg.TRAIN.VALID_SPLIT)

    # Set the device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device used: ", device)

    # Hyperparameter Search
    if cfg.HYPERSEARCH:

        # Defined the prameters and search space
        dim_learning_rate = Real(low=1e-6,
                                 high=1e-2,
                                 prior='log-uniform',
                                 name='learning_rate')

        dim_num_dense_layers = Integer(low=0, high=2, name='num_dense_layers')

        dim_dropout = Real(low=0, high=0.9, name='dropout')
        dim_wd = Real(low=1e-6,
                      high=1e-2,
                      prior='log-uniform',
                      name='Weight_Decay')

        dimensions = [
            dim_learning_rate, dim_num_dense_layers, dim_dropout, dim_wd
        ]

        default_parameters = [0.001, 2, 0.2, 0.0005]

        # Path to save the best model find during the hyperparameter search
        path_best_model = "best_overall_model.pth"
Пример #4
0
import pickle as pkl
from skopt import BayesSearchCV
from sklearn.ensemble import RandomForestRegressor
from skopt.space import Real, Categorical, Integer

with open("results/db_reg_close", "rb") as f:
    Xur_c_train, Xur_c_valid, Xr_c_train, Xr_c_valid, Y_c_train, Y_c_valid = pkl.load(f)

ur_model = BayesSearchCV(
    RandomForestRegressor(oob_score=True),
    {
        'max_depth': Integer(10, 30),
    },
    n_iter=32, verbose=1,
    cv=5
)
ur_model.fit(Xur_c_train, Y_c_train)
with open("results/UR_close_rf_1", "wb") as f:
    pkl.dump(ur_model, f)

r_model = BayesSearchCV(
    RandomForestRegressor(oob_score=True),
    {
        'max_depth': Integer(10, 30),
    },
    n_iter=32, verbose=1,
    cv=5
)
r_model.fit(Xr_c_train, Y_c_train)
with open("results/R_close_rf_1", "wb") as f:
    pkl.dump(r_model, f)
Пример #5
0
 def sell_indicator_space() -> List[Dimension]:
     return [
         Integer(30, 90, name='sell-adx'),
         Real(0, 1.0, name='sell-fisher')
     ]
                  beta=0.24087176329409027,
                  normalize_similarity=True)

# recommender_3 = UserKNNCFRecommender(urm_train)
# recommender_3.fit(shrink=2, topK=600, normalize=True)

W_sparse_CF = recommender_4.W_sparse

recommender_class = CFW_D_Similarity_Linalg

parameterSearch = SearchBayesianSkopt(recommender_class,
                                      evaluator_validation=evaluator_valid,
                                      evaluator_test=evaluator_test)

hyperparameters_range_dictionary = {}
hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
hyperparameters_range_dictionary["add_zeros_quota"] = Real(low=0,
                                                           high=1,
                                                           prior='uniform')
hyperparameters_range_dictionary["normalize_similarity"] = Categorical(
    [True, False])

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[urm_train, icm_asset, W_sparse_CF],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={})

output_folder_path = "result_experiments/"

import os
    Y = params['Y']
    waveform_out = gen_waveform_from_coords([X, Y])

    peak_to_rms = sound_metrics.peak_to_rms(waveform_out)
    print("""The peak to RMS at X=%.6f Y=%.6f is %.6f""" % (X, Y, peak_to_rms))
    return -peak_to_rms


fund_freq_range = (50, 3000)
n_harmonics_range = [0, 4]
detuning_range_hz = [0, 100]
harmonics_power_range = [0, 1]

bigger_space = [
    Real(50, 3000, name='fund_freq'),
    Integer(0, 4, name='n_harmonics'),
    Integer(-99, 100, name='detuning_hz'),
    Real(0, 1, name='harmonics_power')
]


@use_named_args(bigger_space)
def objective_rms_to_peak_high_dimension(**params):
    fund_freq = params['fund_freq']
    n_harmonics = params['n_harmonics']
    detuning_hz = params['detuning_hz']
    harmonics_power = params['harmonics_power']

    params_1d = get_freqs_powers(
        np.array([[
            fund_freq, n_harmonics, detuning_hz, harmonics_power
Пример #8
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     ICM_train=None,
                                     URM_train_last_test=None,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=35,
                                     n_random_starts=50,
                                     resume_from_saved=False,
                                     save_model="best",
                                     allow_weighting=True,
                                     similarity_type_list=None):

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation_earlystopping,
        "lower_validations_allowed": 5,
        "validation_metric": metric_to_optimize,
    }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, GlobalEffects, Random]:
            """
            TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed
            """

            parameterSearch = SearchSingleCase(
                recommender_class,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values={},
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
            )

            return

        ##########################################################################################################

        if recommender_class in [
                ItemKNNCFRecommender, UserKNNCFRecommender,
                ItemKNNCBFRecommender
        ]:

            if similarity_type_list is None:
                similarity_type_list = [
                    'cosine', 'jaccard', "asymmetric", "dice", "tversky"
                ]

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNRecommender_on_similarity_type,
                recommender_input_args=recommender_input_args,
                parameter_search_space={},
                parameterSearch=parameterSearch,
                n_cases=n_cases,
                n_random_starts=n_random_starts,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize,
                allow_weighting=allow_weighting,
                recommender_input_args_last_test=
                recommender_input_args_last_test)

            if parallelizeKNN:
                pool = multiprocessing.Pool(
                    processes=multiprocessing.cpu_count(), maxtasksperchild=1)
                pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                         similarity_type_list)

                pool.close()
                pool.join()

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

    ##########################################################################################################

        if recommender_class is P3alphaRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class in [LinearHybrid001, LinearHybrid002]:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=1,
                                                             prior='uniform')
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=0, high=1, prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################
        if recommender_class is PipeHybrid001:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is MergedHybrid000:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=1,
                                                             prior='uniform')
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["adam"])  # Categorical(["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical(
                [400])  # Changed! default = 500
            #hyperparameters_range_dictionary["use_bias"] = Categorical([True, False])
            #hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["num_factors"] = Integer(
                160, 210)  # Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            #hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["positive_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["negative_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None
                })

        ##########################################################################################################

        if recommender_class is IALSRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary[
                "confidence_scaling"] = Categorical(["linear", "log"])
            hyperparameters_range_dictionary["alpha"] = Real(
                low=1e-3, high=50.0, prior='log-uniform')
            hyperparameters_range_dictionary["epsilon"] = Real(
                low=1e-3, high=10.0, prior='log-uniform')
            hyperparameters_range_dictionary["reg"] = Real(low=1e-5,
                                                           high=1e-2,
                                                           prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class in [PureSVDRecommender, PureSVDItemRecommender]:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 500)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is NMFRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)
            hyperparameters_range_dictionary["solver"] = Categorical(
                ["multiplicative_update"])  # , "coordinate_descent"])
            hyperparameters_range_dictionary["init_type"] = Categorical(
                ["random", "nndsvda"])
            hyperparameters_range_dictionary["beta_loss"] = Categorical(
                ["frobenius", "kullback-leibler"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["symmetric"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["lambda_i"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["lambda_j"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None,
                    'train_with_sparse_weights': None
                })

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')
            hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                             high=1.0,
                                                             prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if recommender_class is EASE_R_Recommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Categorical(
                [None])  #Integer(5, 3000)
            hyperparameters_range_dictionary["normalize_matrix"] = Categorical(
                [False])
            hyperparameters_range_dictionary["l2_norm"] = Real(
                low=1e0, high=1e7, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

    #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Пример #9
0
    print('Save predictors and categorical: ')
    np.save(PICKLEPATH + 'predictors' + VERSION_NUM + '.npy', predictors)
    np.save(PICKLEPATH + 'categorical' + VERSION_NUM + '.npy', categorical)

    # Delete test df now to save memory
    del test_df
    gc.collect()
    '''
    #######################
    #   MODELLING         #
    #######################
    '''
    start_time = time.time()
    space = [
        Integer(3, 10, name='max_depth'),
        Integer(6, 30, name='num_leaves'),
        Integer(20, 200, name='min_child_samples'),
        Real(250, 400, name='scale_pos_weight'),
        Real(0.2, 0.7, name='subsample'),
        Real(0.2, 0.7, name='colsample_bytree'),
        Integer(50, 255, name='max_bin')
    ]

    print("Preparing train and val datasets")
    xgtrain = lgb.Dataset(train_df[predictors].values,
                          label=train_df[target].values,
                          feature_name=predictors,
                          categorical_feature=categorical,
                          free_raw_data=False)
    del train_df
    def get_optimize_params():
        space = [
            Real(0.8, 1, name='learning_rate'),
            Integer(2, 5, name='depth'),
            Real(0, 0.5, name='l2_leaf_reg'),
            #Integer(16, 48, name='max_leaves'),
            Real(0, 2, name='random_strength'),
        ]

        def get_MAE(arg_list):
            keys = ['learning_rate', 'depth', 'l2_leaf_reg', 'random_strength']
            val_params = {keys[i]: arg_list[i] for i in range(len(keys))}
            #learning_rate, depth, l2_leaf_reg, num_leaves, random_strength = arg_list
            """
            Melissa.send_message(f'starting val CATBOOST\n so fermo nmezzo alla strada... ovviamente\n'
                                 f'{val_params}')
            """

            X, Y = data.dataset('local', 'train', onehot=False)

            weather_cols = [
                'WEATHER_-4', 'WEATHER_-3', 'WEATHER_-2', 'WEATHER_-1'
            ]
            X[weather_cols] = X[weather_cols].fillna('Unknown')

            weather_cols = [
                col for col in X.columns if col.startswith('WEATHER_')
            ]
            categorical_cols = [
                'EMERGENCY_LANE', 'ROAD_TYPE', 'EVENT_DETAIL', 'EVENT_TYPE'
            ] + weather_cols

            categorical_cols.extend(['WEEK_DAY', 'IS_WEEKEND'])

            weather_clusters_cols = [
                'WEATHER_-4_CL', 'WEATHER_-3_CL', 'WEATHER_-2_CL',
                'WEATHER_-1_CL'
            ]
            X[weather_clusters_cols] = X[weather_clusters_cols].fillna(
                'Unknown')

            # build params from default and validation ones
            params = {
                'X': X,
                'mode': 'local',
                'n_estimators': 10000,
                'loss_function': 'MAE',
                'eval_metric': 'MAE',
                'early_stopping_rounds': 100,
                'cat_features': categorical_cols
            }
            params.update(val_params)

            catboost = CatBoost(params)
            model = MultiOutputRegressor(catboost, n_jobs=-1)
            model.fit(X, Y)

            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                Y,
                                                                test_size=0.2,
                                                                shuffle=False)
            MAE = inout.evaluate(model, X_test, y_test)

            iterations = []
            for i in range(4):
                iterations.append(model.estimators_[i].model.best_iteration_)

            global _best_MAE
            if MAE < _best_MAE:
                _best_MAE = MAE
                Melissa.send_message(
                    f'CATBOOST\n ITERATIONS: {iterations} MAE: {MAE}\nparams:{val_params}\n'
                )
            return MAE

        return space, get_MAE
Пример #11
0
def create_params():

    param_grid_xgb = {
        'classifier__learning_rate': Real(0.1, 1.0, prior='log-uniform'),
        'classifier__max_depth': Integer(1, 20),
        'classifier__gamma': Real(0.01, 1.0, prior='log-uniform'),
        'classifier__subsample': Real(0.1, 1.0, prior='uniform'),
    }

    param_grid_lgbm = {
        'classifier__num_leaves': Integer(10, 150),
        'classifier__learning_rate': Real(0.01, 1),
        'classifier__max_depth': Integer(1, 30),
        'classifier__feature_fraction': Real(0.1, 1),
        'classifier__subsample': Real(0.1, 1)
    }

    param_grid_hgb = {
        'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform'),
        'classifier__max_leaf_nodes': Integer(2, 60),
        'classifier__max_depth': Integer(2, 50),
        'classifier__min_samples_leaf': Integer(2, 20),
        'classifier__l2_regularization': Real(1, 100)
    }

    param_grid_extra = {
        'classifier__n_estimators': Integer(100, 500),
        'classifier__min_samples_leaf': Integer(1, 10),
        'classifier__max_depth': Integer(1, 30),
        'classifier__max_features': Integer(2, 10),
        'classifier__class_weight': ['balanced'],
        'classifier__criterion': ['gini', 'entropy']
    }

    param_grid_log = {
        'classifier__penalty': ['l1', 'l2'],
        'classifier__C': Real(0.01, 100),
        'classifier__class_weight': ['balanced'],
        'classifier__solver': ['saga']
    }

    param_grid_rf = {
        'classifier__n_estimators': Integer(100, 500),
        'classifier__min_samples_leaf': Integer(1, 10),
        'classifier__max_depth': Integer(1, 30),
        'classifier__max_features': Integer(2, 10),
        'classifier__class_weight': ['balanced'],
        'classifier__criterion': ['gini', 'entropy']
    }

    param_grid_svm = {
        'classifier__C': Real(1000, 100000),
        'classifier__gamma': Real(0.000000001, 0.0001, prior='log-uniform'),
        'classifier__class_weight': ['balanced']
    }

    param_grid_nn = {
        'classifier__activation': ['tanh', 'relu'],
        'classifier__solver': ['sgd', 'adam'],
        'classifier__alpha': Real(0.01, 10),
        'classifier__learning_rate': ['constant', 'adaptive'],
    }

    param_grid_knn = {'classifier__n_neighbors': Integer(1, 150)}

    param_grid_tree = {
        'classifier__min_samples_split': Integer(2, 20),
        'classifier__max_depth': Integer(1, 20),
        'classifier__class_weight': ['balanced'],
    }

    param_grid_ada = {
        'classifier__n_estimators': Integer(20, 200),
        'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform')
    }

    param_grid_cat = {
        'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform'),
        'classifier__depth': Integer(1, 6),
        'classifier__l2_leaf_reg': Real(1, 100),
        'classifier__silent': [True]
    }

    PARAMS = {
        'XGBOOST': param_grid_xgb,
        'LGBM': param_grid_lgbm,
        'HGB': param_grid_hgb,
        'EXTRA': param_grid_extra,
        'LOG': param_grid_log,
        'RF': param_grid_rf,
        'SVM': param_grid_svm,
        'NN': param_grid_nn,
        'KNN': param_grid_knn,
        'TREE': param_grid_tree,
        'ADA': param_grid_ada,
        'CAT': param_grid_cat
    }

    return PARAMS
Пример #12
0
 def get_default_params_and_name(self, estimator):
     # print("get default estimator name and parameters")
     if isinstance(estimator, AdaBoostClassifier):
         base_estimator = [
             RandomForestClassifier(verbose=2, n_estimators=20),
             ExtraTreesClassifier(verbose=2, n_estimators=20),
             # GradientBoostingClassifier(verbose=2,n_estimators=20),
             # XGBClassifier(n_estimators=20),
             # LGBMClassifier(n_estimators=20)
         ]
         params = {
             "n_estimators": (30, 200),
             "learning_rate": (1e-6, 1.0, 'log-uniform'),
             "base_estimator": Categorical(base_estimator),
         }
         estimator_name = "adaboost"
     elif isinstance(estimator, GradientBoostingClassifier):
         params = {
             "n_estimators": (300, 1200),
             "learning_rate": (1e-6, 1.0, 'log-uniform'),
             "max_depth": (3, 30),
             "min_samples_leaf": (1, 128),
             "min_samples_split": (2, 256),
             "subsample": (0.6, 1.0, 'uniform'),
             "max_features": (0.6, 1.0, 'uniform'),
             "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'),
             "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'),
         }
         estimator_name = "gbm"
     elif isinstance(estimator, XGBClassifier):
         params = {
             "n_estimators": (300, 1200),
             "max_depth": (3, 30),
             "min_child_weight": (1e-3, 1e+3, 'log-uniform'),
             "learning_rate": (1e-6, 1.0, 'log-uniform'),
             "colsample_bytree": (0.6, 1.0, 'uniform'),
             "subsample": (0.6, 1.0, 'uniform'),
             "gamma": (1e-6, 1.0, 'log-uniform'),
             'reg_alpha': (1e-3, 1e3, 'log-uniform'),
             'reg_lambda': (1e-3, 1e3, 'log-uniform'),
             "scale_pos_weight": (0.01, 1.0, 'uniform'),
         }
         estimator_name = "xgb"
     elif isinstance(estimator, LGBMClassifier):
         params = {
             "n_estimators": (300, 1200),
             "max_depth": (3, 30),
             "max_bin": (64, 256),
             "num_leaves": (30, 256),
             "min_child_weight": (1e-3, 1e3, 'log-uniform'),
             "min_child_samples": (8, 256),
             "min_split_gain": (1e-6, 1.0, 'log-uniform'),
             "learning_rate": (1e-6, 1.0, 'log-uniform'),
             "colsample_bytree": (0.6, 1.0, 'uniform'),
             "subsample": (0.6, 1.0, 'uniform'),
             'reg_alpha': (1e-3, 1e3, 'log-uniform'),
             'reg_lambda': (1e-3, 1e3, 'log-uniform'),
             "scale_pos_weight": (0.01, 1.0, 'uniform'),
         }
         estimator_name = "lgb"
     elif isinstance(estimator, CatBoostClassifier):
         params = {
             # 'iterations': hyperopt.hp.quniform("iterations", 300, 1200, 10),
             'depth':
             hyperopt.hp.quniform("depth", 3, 12, 1),
             # 'border_count': hyperopt.hp.quniform("border_count", 16, 224, 4),
             'learning_rate':
             hyperopt.hp.loguniform('learning_rate', 1e-6, 1e-1),
             'l2_leaf_reg':
             hyperopt.hp.qloguniform('l2_leaf_reg', 0, 3, 1),
             'bagging_temperature':
             hyperopt.hp.uniform('bagging_temperature', 0.6, 1.0),
             'rsm':
             hyperopt.hp.uniform('rsm', 0.8, 1.0)
         }
         estimator_name = "catboost"
     elif isinstance(estimator, RandomForestClassifier):
         params = {
             "n_estimators": (100, 1000),
             "criterion": Categorical(["gini", "entropy"]),
             "max_features": (0.8, 1.0, 'uniform'),
             "max_depth": (3, 30),
             "min_samples_split": (2, 256),
             "min_samples_leaf": (1, 128),
             "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'),
             "max_leaf_nodes": (30, 256),
             "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'),
         }
         estimator_name = "rf"
     elif isinstance(estimator, ExtraTreesClassifier):
         params = {
             "n_estimators": (100, 1000),
             "criterion": Categorical(["gini", "entropy"]),
             "max_features": (0.8, 1.0, 'uniform'),
             "max_depth": (3, 30),
             "min_samples_split": (2, 256),
             "min_samples_leaf": (1, 128),
             "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'),
             "max_leaf_nodes": (30, 256),
             "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'),
         }
         estimator_name = "et"
     elif isinstance(estimator, SVC):
         params = {
             "C": Real(1e-6, 1e+6, prior='log-uniform'),
             "gamma": Real(1e-6, 1e+1, prior='log-uniform'),
             "degree": Integer(1, 3),
             "kernel": Categorical(['linear', 'poly', 'rbf']),
         }
         estimator_name = "svc"
     elif isinstance(estimator, LogisticRegression):
         params = {
             "C": Real(1e-6, 1e+6, prior='log-uniform'),
             # "penalty": Categorical(['l1', 'l2']),
             "solver":
             Categorical(["newton-cg", "lbfgs", "liblinear", "saga"]),
         }
         estimator_name = "lr"
     elif isinstance(estimator, MLPClassifier):
         hls = []
         for i in [16, 32, 64]:
             hls.append((i * 2, i * 3))
             hls.append((i * 2, i * 3, i * 2))
             hls.append((i, i * 2, i * 4, i * 3))
         params = {
             "hidden_layer_sizes": Categorical(hls),
             "activation": Categorical(["logistic", "tanh", "relu"]),
             "solver": Categorical(["lbfgs", "sgd", "adam"]),
             "learning_rate": Categorical(["invscaling", "adaptive"]),
             "alpha": Categorical([0.00001, 0.0001, 0.001, 0.01]),
         }
         estimator_name = "mlp"
     else:
         print("wrong base estimator used")
         return
     return (estimator_name, params)
Пример #13
0
def test_integer_distance_out_of_range():
    ints = Integer(1, 10)
    assert_raises_regex(RuntimeError, "compute distance for values within",
                        ints.distance, 11, 10)
Пример #14
0
def test_integer_distance():
    ints = Integer(1, 10)
    for i in range(1, 10 + 1):
        assert_equal(ints.distance(4, i), abs(4 - i))
Пример #15
0
def test_space_consistency():
    # Reals (uniform)

    s1 = Space([Real(0.0, 1.0)])
    s2 = Space([Real(0.0, 1.0)])
    s3 = Space([Real(0, 1)])
    s4 = Space([(0.0, 1.0)])
    s5 = Space([(0.0, 1.0, "uniform")])
    s6 = Space([(0, 1.0)])
    s7 = Space([(np.float64(0.0), 1.0)])
    s8 = Space([(0, np.float64(1.0))])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    a4 = s4.rvs(n_samples=10, random_state=0)
    a5 = s5.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_equal(s1, s6)
    assert_equal(s1, s7)
    assert_equal(s1, s8)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)
    assert_array_equal(a1, a4)
    assert_array_equal(a1, a5)

    # Reals (log-uniform)
    s1 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")])
    s2 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")])
    s3 = Space([Real(10**-3, 10**3, prior="log-uniform")])
    s4 = Space([(10**-3.0, 10**3.0, "log-uniform")])
    s5 = Space([(np.float64(10**-3.0), 10**3.0, "log-uniform")])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    a4 = s4.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)
    assert_array_equal(a1, a4)

    # Integers
    s1 = Space([Integer(1, 5)])
    s2 = Space([Integer(1.0, 5.0)])
    s3 = Space([(1, 5)])
    s4 = Space([(np.int64(1.0), 5)])
    s5 = Space([(1, np.int64(5.0))])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_equal(s1, s3)
    assert_equal(s1, s4)
    assert_equal(s1, s5)
    assert_array_equal(a1, a2)
    assert_array_equal(a1, a3)

    # Categoricals
    s1 = Space([Categorical(["a", "b", "c"])])
    s2 = Space([Categorical(["a", "b", "c"])])
    s3 = Space([["a", "b", "c"]])
    a1 = s1.rvs(n_samples=10, random_state=0)
    a2 = s2.rvs(n_samples=10, random_state=0)
    a3 = s3.rvs(n_samples=10, random_state=0)
    assert_equal(s1, s2)
    assert_array_equal(a1, a2)
    assert_equal(s1, s3)
    assert_array_equal(a1, a3)

    s1 = Space([(True, False)])
    s2 = Space([Categorical([True, False])])
    assert s1 == s2
Пример #16
0
# example of bayesian optimization with scikit-optimize
from numpy import mean
from sklearn.datasets import make_blobs
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from skopt.space import Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

# generate 2d classification dataset
X, y = make_blobs(n_samples=500, centers=3, n_features=2)
# define the model
model = KNeighborsClassifier()
# define the space of hyperparameters to search
search_space = [Integer(1, 5, name='n_neighbors'), Integer(1, 2, name='p')]


# define the function used to evaluate a given configuration
@use_named_args(search_space)
def evaluate_model(**params):
    # something
    model.set_params(**params)
    # calculate 5-fold cross validation
    result = cross_val_score(model, X, y, cv=5, n_jobs=-1, scoring='accuracy')
    # calculate the mean of the scores
    estimate = mean(result)
    return 1.0 - estimate


# perform optimization
result = gp_minimize(evaluate_model, search_space)
Пример #17
0
init_type = "random"
#init_type = "sobol"
#init_type = "latin"

#####
# Experiment parameters
#####
boston = load_boston()
X, y = boston.data, boston.target
n_features = X.shape[1]

space = [
    Real(10**-5, 10**0, name='learning_rate_init'),
    Real(10**-5, 10**0, name='alpha'),
    Integer(1, 100, name='hidden_layer_sizes')
]

space_gpyopt = [
    {
        "name": "learning_rate_init",
        "type": "continuous",
        "domain": (10**-5, 10**0)
    },
    {
        "name": "alpha",
        "type": "continuous",
        "domain": (10**-5, 10**0)
    },
    {
        "name": "hidden_layer_sizes",
Пример #18
0
def run_KNNRecommender_on_similarity_type(
        similarity_type,
        parameterSearch,
        parameter_search_space,
        recommender_input_args,
        n_cases,
        n_random_starts,
        resume_from_saved,
        save_model,
        output_folder_path,
        output_file_name_root,
        metric_to_optimize,
        allow_weighting=False,
        recommender_input_args_last_test=None):

    original_parameter_search_space = parameter_search_space

    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["topK"] = Categorical(range(
        5, 900, 5))  #Integer(5, 1000)
    hyperparameters_range_dictionary["shrink"] = Integer(0, 1000)
    hyperparameters_range_dictionary["similarity"] = Categorical(
        [similarity_type])
    hyperparameters_range_dictionary["normalize"] = Categorical([True, False])

    is_set_similarity = similarity_type in [
        "tversky", "dice", "jaccard", "tanimoto"
    ]

    if similarity_type == "asymmetric":
        hyperparameters_range_dictionary["asymmetric_alpha"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["normalize"] = Categorical([True])

    elif similarity_type == "tversky":
        hyperparameters_range_dictionary["tversky_alpha"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["tversky_beta"] = Real(
            low=0, high=2, prior='uniform')
        hyperparameters_range_dictionary["normalize"] = Categorical([True])

    elif similarity_type == "euclidean":
        hyperparameters_range_dictionary["normalize"] = Categorical(
            [True, False])
        hyperparameters_range_dictionary["normalize_avg_row"] = Categorical(
            [True, False])
        hyperparameters_range_dictionary[
            "similarity_from_distance_mode"] = Categorical(
                ["lin", "log", "exp"])

    if not is_set_similarity:

        if allow_weighting:
            hyperparameters_range_dictionary[
                "feature_weighting"] = Categorical(["none", "BM25", "TF-IDF"])

    local_parameter_search_space = {
        **hyperparameters_range_dictionary,
        **original_parameter_search_space
    }

    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=local_parameter_search_space,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        resume_from_saved=resume_from_saved,
        save_model=save_model,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root + "_" + similarity_type,
        metric_to_optimize=metric_to_optimize,
        recommender_input_args_last_test=recommender_input_args_last_test)
    _local_variables['docker_client'] = docker_client
    _local_variables['report_name'] = report_name


# Parameter names for appending the best configuration.
PARAMETERS = ('gop', 'bitrate', 'ip-period', 'init-qp', 'qpmin', 'qpmax',
              'disable-frame-skip', 'diff-qp-ip', 'diff-qp-ib',
              'num-ref-frame', 'rc-mode', 'profile', 'cabac', 'dct8x8',
              'deblock-filter', 'prefix-nal', 'idr-interval')

# All parameters available in qsv-h264 and their ranges
# https://github.com/intel/libyami-utils/blob/c64cad218e676cc02b426cb67b660d8eb2567d3b/tests/encodehelp.h
# https://github.com/intel/libyami/blob/apache/interface/VideoEncoderDefs.h
# https://github.com/intel/libyami-utils/blob/master/doc/yamitranscode.1
SPACE = [
    Integer(1, 250, name='gop'),
    Integer(100, 5000, name='bitrate'),
    Integer(0, 50, name='ip_period'),  # @TODO check range
    Integer(0, 51, name='init_qp'),
    Integer(0, 50, name='qpmin'),
    Integer(0, 51, name='qpmax'),
    Categorical((0, 1), name='disable_frame_skip'),
    Integer(0, 51, name='diff_qp_ip'),  # @TODO check range
    Integer(0, 51, name='diff_qp_ib'),  # @TODO check range
    Integer(0, 16, name='num_ref_frame'),
    Integer(0, 4, name='rc_mode'),
    Integer(0, 2, name='profile'),
    Categorical((0, 1), name='cabac'),
    Categorical((0, 1), name='dct8x8'),
    Categorical((0, 1), name='deblock_filter'),
    Categorical((0, 1), name='prefix_nal'),
                        epochs=200,
                        batch_size=batch_size,
                        verbose=0,
                        class_weight=classWeight,
                        callbacks=[early_stopping, model_checkpoint],
                        validation_split=0.25)
    Y_predict = model.predict(X_test)
    fpr, tpr, thresholds = roc_curve(Y_test, Y_predict)
    roc_auc = auc(fpr, tpr)
    return roc_auc
    #best_acc = max(history.history['val_acc'])
    #return best_acc


space = [
    Integer(2, 5, name='hidden_layers'),
    Integer(32, 1024, name='initial_nodes'),
    Real(10**-6, 10**-3, "log-uniform", name='l2_lambda'),
    Real(0.0, 0.5, name='dropout'),
    Integer(256, 4096, name='batch_size'),
    Real(10**-5, 10**-1, "log-uniform", name='learning_rate'),
]


@use_named_args(space)
def objective(**X):
    print("New configuration: {}".format(X))

    model = build_custom_model(num_hiddens=X['hidden_layers'],
                               initial_node=X['initial_nodes'],
                               dropout=X['dropout'],
def get_parameters_space(hidden_layers_comb):
    params = [conf_to_params(conf) for conf in Config.get("bayesianOpt")["hyperparameters"]]
    max = len(hidden_layers_comb[-1])-1
    params.append(Integer(0, max, name="hidden_layer_choice"))

    return params
Пример #22
0
import numpy as np
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

boston = load_boston()
X, y = boston.data, boston.target
n_features = X.shape[1]

reg = GradientBoostingRegressor(n_estimators=50, random_state=0)

space = [
    Integer(1, 5, name='max_depth'),
    Real(10**-5, 10**0, "log-uniform", name='learning_rate'),
    Integer(1, n_features, name='max_features'),
    Integer(2, 100, name='min_samples_split'),
    Integer(1, 100, name='min_samples_leaf')
]


@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(
        cross_val_score(
            reg, X, y, cv=5, n_jobs=-1, scoring="neg_mean_absolute_error"))
Пример #23
0
        return s.data.cpu().numpy()
    
    def fn_skopt(params):
        x,y,z=params

        px=torch.tensor(x,device='cpu',requires_grad=True)
        py=torch.tensor(y,device='cpu',requires_grad=True)
        s=torch.tensor(0.5,device='cpu',requires_grad=True)
        for i in trange(10,leave=False):
            s=s+0.5*px+py
            sleep(0.1)
        return float(s.data.cpu().numpy())

    if args.fn=='fn_skopt':
        res_gp=gp_minimize(func=fn_skopt,
                         dimensions=[Real(-10,10,'uniform',name='x'),
                         Real(-10,10,'uniform',name='y'),
                         Integer(-10,10,name='z')],
                         n_calls=15,
                         random_state=0)
        print("Best score=%.4f" % res_gp.fun)
        print('best param',res_gp.x)
        best=res_gp.fun
    else:
        bo=bayesopt(fn_bayes,{'x':[-10,10],'y':[-10,10],'z':[-10,10]})
        bo.maximize(init_points=5,n_iter=10,kappa=2)
        best=bo.res['max']
        print(bo.res['all'])
    print(best)
    
    
Пример #24
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=35,
                                     allow_weighting=True,
                                     similarity_type_list=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, GlobalEffects, Random]:
            """
            TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed
            """

            parameterSearch = SearchSingleCase(
                recommender_class,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            parameterSearch.search(recommender_parameters,
                                   fit_parameters_values={},
                                   output_folder_path=output_folder_path,
                                   output_file_name_root=output_file_name_root)

            return

        ##########################################################################################################

        if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]:

            if similarity_type_list is None:
                similarity_type_list = [
                    'cosine', 'jaccard', "asymmetric", "dice", "tversky"
                ]

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNRecommender_on_similarity_type,
                recommender_parameters=recommender_parameters,
                parameter_search_space={},
                parameterSearch=parameterSearch,
                n_cases=n_cases,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize,
                allow_weighting=allow_weighting)

            if parallelizeKNN:
                pool = multiprocessing.Pool(
                    processes=multiprocessing.cpu_count(), maxtasksperchild=1)
                pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                         similarity_type_list)

                pool.close()
                pool.join()

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 800)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 800)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 250)

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 800)
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')
            hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                             high=1.0,
                                                             prior='uniform')

            recommender_parameters = SearchInputRecommenderParameters(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_parameters,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Пример #25
0
    N_SPLITS = 3
    groups = create_fold_groups(X, n_splits=N_SPLITS)
    kfold = CustomSplitter(n_splits=N_SPLITS)

    # Create estimator and metrics
    estimator = RandomForestClassifier(n_jobs=-1,
                                       random_state=42,
                                       n_estimators=100,
                                       class_weight='balanced')
    scorer = make_scorer(sharpe)

    # Define parameter search space
    search_spaces = {
        'max_features': Real(.05, .5),
        'max_samples': Real(.1, .99),
        'min_samples_leaf': Integer(1, 100),
    }

    opt = CustomBayesSearch(estimator=estimator,
                            search_spaces=search_spaces,
                            n_iter=1_00,
                            scoring=scorer,
                            cv=kfold)

    opt.fit(X, y, groups)

    submitter = PredictionSubmitter(napi=napi,
                                    model=opt.best_estimator_,
                                    napi_user='******')

    submitter.submit()
from __future__ import print_function
from skopt import gp_minimize
from skopt.utils import use_named_args
from skopt.space import Integer
import numpy as np
import scipy.io as sio
import BER_calc
from datetime import datetime
from sklearn.utils import shuffle
import tensorflow.compat.v1 as tf
from scipy import special

tf.disable_v2_behavior()

top = 100
dim_delay = Integer(low=1, high=50, name='lr')
dim_num_dense_layers11 = Integer(low=1, high=top, name='node_layer11')
dim_num_dense_layers12 = Integer(low=1, high=top, name='node_layer12')
dim_num_dense_layers13 = Integer(low=1, high=top, name='node_layer13')
dim_num_dense_layers21 = Integer(low=1, high=top, name='node_layer21')
dim_num_dense_layers22 = Integer(low=1, high=top, name='node_layer22')
dim_num_dense_layers23 = Integer(low=1, high=top, name='node_layer23')
# dim_batch_size = Integer(low=200, high=200, name='batch_size')

dimensions = [
    dim_delay,
    dim_num_dense_layers11,
    dim_num_dense_layers12,
    dim_num_dense_layers13,
    dim_num_dense_layers21,
    dim_num_dense_layers22,
Пример #27
0
from skopt.utils import use_named_args
from hyperopt import tpe, hp, fmin
import time
from datetime import datetime as dt


SEED = 1  # random_seed
N_FOLD = 5  # number of folds in StratifiedKFold
MAX_CALLS = 20  # number of iterations for hyperparameter tuning
CHUNKS = 100000
CLASS_WEIGHTS_GAL = {6: 1, 16: 1, 53: 1, 65: 1, 92: 1}
CLASS_WEIGHTS_EXTRA = {15: 2, 42: 1, 52: 1, 62: 1, 64: 2, 67: 1, 88: 1, 90: 1, 95: 1}
TUNING = 'skopt' # 'skopt' or 'hyperopt'

# Tuning LightGBM parameters
space_skopt = [Integer(4, 7, name='max_depth'),
               Real(low=1e-3, high=1e-1, prior="log-uniform", name='learning_rate'),
               Integer(low=100, high=800, name='n_estimators')]

space_hyperopt = {'max_depth': hp.choice('max_depth', range(4, 8, 1)),
                  'learning_rate': hp.loguniform('learning_rate', np.log(1e-3), np.log(1e-1)),
                  'n_estimators': hp.choice('n_estimators', range(100, 801, 1))}


def multi_weighted_logloss(y_true, y_preds):
    """ Multi logloss for PLAsTiCC challenge """

    if len(np.unique(y_true)) == 5:
        class_weights = CLASS_WEIGHTS_GAL
    elif len(np.unique(y_true)) == 9:
        class_weights = CLASS_WEIGHTS_EXTRA
# Here we define a function that we evaluate.


def objective(params):
    clf = DecisionTreeClassifier(**{
        dim.name: val
        for dim, val in zip(SPACE, params) if dim.name != 'dummy'
    })
    return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))


#############################################################################
# Bayesian optimization
# =====================
SPACE = [
    Integer(1, 20, name='max_depth'),
    Integer(2, 100, name='min_samples_split'),
    Integer(5, 30, name='min_samples_leaf'),
    Integer(1, 30, name='max_features'),
    Categorical(list('abc'), name='dummy'),
    Categorical(['gini', 'entropy'], name='criterion'),
    Categorical(list('def'), name='dummy'),
]

result = gp_minimize(objective, SPACE, n_calls=20)

#############################################################################
# Partial dependence plot
# =======================
#
# Here we see an example of using partial dependence. Even when setting
hpo_params = {
    'n_calls': 100,
    'n_random_starts': 10,
    'base_estimator': 'ET',
    'acq_func': 'EI',
    'xi': 0.02,
    'kappa': 1.96,
    'n_points': 10000,
}
rf_space = [
    Categorical([10, 100, 500], name='n_estimators'),
    Categorical(['auto', 'log2'], name='max_features'),
    Categorical([2, 5, 10, 20, None], name='max_depth'),
    Real(0.0001, 1, name='min_samples_split'),
    Integer(1, 5, name='min_samples_leaf'),
    Categorical([None, 50, 100, 150, 200], name='max_leaf_nodes')
    #Integer(1, 37, name = 'max_features')
]

ada_space = [  #Integer(200, 500, name = 'n_estimators'),
    Real(0.01, 1, prior="log-uniform", name='learning_rate')
]

gbc_space = [
    Real(0.01, 1, prior="log-uniform", name='learning_rate'),
    Integer(200, 500, name='n_estimators'),
    Integer(1, 10, name='max_depth'),
    Real(0.1, 1, name='min_samples_split'),
    Real(0.1, 0.5, name='min_samples_leaf'),
    Integer(1, 10, name='max_features')
with open(
        "/home/norberteke/PycharmProjects/Thesis/data/GH_recent_full_activity_corpus.txt",
        'w') as f:
    for text in texts:
        f.write(str(text) + "\n")

corpus = [dictionary.doc2bow(text) for text in texts]

model = LdaTransformer(id2word=dictionary,
                       alpha='auto',
                       iterations=100,
                       random_state=2019)

# The list of hyper-parameters we want to optimize. For each one we define the bounds,
# the corresponding scikit-learn parameter name
space = [Integer(20, 500, name='num_topics'), Real(0.001, 200, name='eta')]


# this decorator allows your objective function to receive a the parameters as keyword arguments.
# This is particularly convenient when you want to set scikit-learn estimator parameters
@use_named_args(space)
def objective(**params):
    model.set_params(**params)
    lda = model.fit(corpus)
    coherence = evaluateModel(lda.gensim_model)

    try:
        cm = CoherenceModel(model=lda.gensim_model,
                            corpus=corpus,
                            dictionary=dictionary,
                            coherence='u_mass')
Пример #31
0
    for i in range(1, 10 + 1):
        assert_equal(reals.distance(4.1234, i), abs(4.1234 - i))


@pytest.mark.parametrize("dimension, bounds",
                         [(Real, (2, 1)), (Integer, (2, 1)), (Real, (2, 2)),
                          (Integer, (2, 2))])
def test_dimension_bounds(dimension, bounds):
    with pytest.raises(ValueError) as exc:
        dim = dimension(*bounds)
        assert "has to be less than the upper bound " in exc.value.args[0]


@pytest.mark.parametrize(
    "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"),
                        (Integer(1, 100, name="no of trees"), "no of trees"),
                        (Categorical(["red, blue"], name="colors"), "colors")])
def test_dimension_name(dimension, name):
    assert dimension.name == name


@pytest.mark.parametrize(
    "dimension",
    [Real(1, 2), Integer(1, 100),
     Categorical(["red, blue"])])
def test_dimension_name_none(dimension):
    assert dimension.name is None


def test_dimension_name():
    notnames = [1, 1., True]