示例#1
0
def run_decisiontree(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    y_train: pd.DataFrame,
    y_test: pd.DataFrame,
    config: Dict[str, Any],
) -> Any:
    LOGGER.info("Finding best decision tree..")
    search_space = {
        "type": "decision_tree",
        "max_depth": hp.uniformint("max_depth", 2, 15),
        "min_samples_split": hp.uniformint("n_estimators", 2, 20),
        # "class_weight": hp.choice("class_weight", ["balanced"]),
    }

    best_params = hyperopt_search(X_train, y_train, search_space, config)
    model = make_pipeline(DecisionTreeClassifier(**best_params))

    mean_cross_val_score = cross_validate_model(model, X_train, y_train)
    LOGGER.info(
        f"Decision tree cross validation score: {mean_cross_val_score}")

    if config["test"]:
        print(classification_report(model.predict(X_test), y_test))

    return model
class KMeansOptimizationJob(HyperparameterOptimizationJob):
    def __init__(
        self,
        get_cluster_score: Callable[[DataFrame], float] = None,
        additional_weight_function: Callable[[int], float] = lambda e: 1.0
    ) -> None:
        self.additional_weight_function = additional_weight_function
        if get_cluster_score:
            self.get_cluster_score = get_cluster_score

    app_name: str = "KMeansOptimizationJob"

    search_space: Apply = hp.choice('model', [('kmeans', {
        'k':
        hp.uniformint('k', 4, 20),
        'distance_measure':
        hp.choice("distance_measure", ['euclidean', 'cosine']),
        'window_size':
        hp.uniformint('window_size', 2, 5),
        's':
        hp.uniform('s', .5, 5)
    })])

    def get_clusters(self, parameters: dict,
                     urls_and_vectors: DataFrame) -> DataFrame:
        urls_and_vectors = urls_and_vectors.cache()
        model = KMeansJob.get_model(urls_and_vectors, parameters["k"],
                                    parameters["distance_measure"])
        clustered_url_vectors = model.transform(urls_and_vectors)
        urls_and_vectors.unpersist()
        return clustered_url_vectors
示例#3
0
def test_uniformint_arguments(arguments):
    """
    Test whether uniformint can accept both positional and keyword arguments.
    Related to PR #704.
    """
    if isinstance(arguments, list):
        space = hp.uniformint(*arguments)
    if isinstance(arguments, dict):
        space = hp.uniformint(**arguments)
    rng = np.random.RandomState(123)
    values = [sample(space, rng=rng) for _ in range(10)]
    assert values == [7, 3, 2, 6, 7, 4, 10, 7, 5, 4]
示例#4
0
def test_uniformint_arguments(arguments):
    """
    Test whether uniformint can accept both positional and keyword arguments.
    Related to PR #704.
    """
    if isinstance(arguments, list):
        space = hp.uniformint(*arguments)
    if isinstance(arguments, dict):
        space = hp.uniformint(**arguments)
    rng = np.random.default_rng(np.random.PCG64(123))
    values = [sample(space, rng=rng) for _ in range(10)]
    assert values == [7, 1, 2, 2, 2, 8, 9, 3, 8, 9]
示例#5
0
    def testConvertHyperOpt(self):
        from ray.tune.suggest.hyperopt import HyperOptSearch
        from hyperopt import hp

        # Grid search not supported, should raise ValueError
        with self.assertRaises(ValueError):
            HyperOptSearch.convert_search_space({
                "grid": tune.grid_search([0, 1])
            })

        config = {
            "a": tune.sample.Categorical([2, 3, 4]).uniform(),
            "b": {
                "x": tune.sample.Integer(-15, -10),
                "y": 4,
                "z": tune.sample.Float(1e-4, 1e-2).loguniform()
            }
        }
        converted_config = HyperOptSearch.convert_search_space(config)
        hyperopt_config = {
            "a": hp.choice("a", [2, 3, 4]),
            "b": {
                "x": hp.uniformint("x", -15, -10),
                "y": 4,
                "z": hp.loguniform("z", np.log(1e-4), np.log(1e-2))
            }
        }

        searcher1 = HyperOptSearch(
            space=converted_config,
            random_state_seed=1234,
            metric="a",
            mode="max")
        searcher2 = HyperOptSearch(
            space=hyperopt_config,
            random_state_seed=1234,
            metric="a",
            mode="max")

        config1 = searcher1.suggest("0")
        config2 = searcher2.suggest("0")

        self.assertEqual(config1, config2)
        self.assertIn(config1["a"], [2, 3, 4])
        self.assertIn(config1["b"]["x"], list(range(-15, -10)))
        self.assertEqual(config1["b"]["y"], 4)
        self.assertLess(1e-4, config1["b"]["z"])
        self.assertLess(config1["b"]["z"], 1e-2)

        searcher = HyperOptSearch(metric="a", mode="max")
        analysis = tune.run(
            _mock_objective, config=config, search_alg=searcher, num_samples=1)
        trial = analysis.trials[0]
        assert trial.config["a"] in [2, 3, 4]

        mixed_config = {"a": tune.uniform(5, 6), "b": hp.uniform("b", 8, 9)}
        searcher = HyperOptSearch(space=mixed_config, metric="a", mode="max")
        config = searcher.suggest("0")
        self.assertTrue(5 <= config["a"] <= 6)
        self.assertTrue(8 <= config["b"] <= 9)
示例#6
0
def _search_space_from_dict(dict_hyperparams):
    hyperparams = {}

    if not isinstance(dict_hyperparams, dict):
        raise TypeError('Hyperparams must be a dictionary.')

    for name, hyperparam in dict_hyperparams.items():
        hp_type = hyperparam['type']

        if hp_type == 'int':
            hp_range = hyperparam.get('range') or hyperparam.get('values')
            hp_min = min(hp_range) if hp_range else None
            hp_max = max(hp_range) if hp_range else None
            hp_instance = hp.uniformint(name, hp_min, hp_max)

        elif hp_type == 'float':
            hp_range = hyperparam.get('range') or hyperparam.get('values')
            hp_min = min(hp_range)
            hp_max = max(hp_range)
            hp_instance = hp.uniform(name, hp_min, hp_max)

        elif hp_type == 'bool':
            hp_instance = hp.choice(name, [True, False])

        elif hp_type == 'str':
            hp_choices = hyperparam.get('range') or hyperparam.get('values')
            hp_instance = hp.choice(name, hp_choices)

        hyperparams[name] = hp_instance

    return hyperparams
示例#7
0
    def full_hyper_space(self):
        from hyperopt import hp

        hyper_space, hyper_choices = super(Tfidf, self).full_hyper_space()
        hyper_space.update(
            {"fex_ngram_max": hp.uniformint("fex_ngram_max", 1, 3)})
        return hyper_space, hyper_choices
示例#8
0
def run_knn(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    y_train: pd.DataFrame,
    y_test: pd.DataFrame,
    config: Dict[str, Any],
) -> Any:
    """ Finds optimal model parameters for a KNN classifier, evaluates model and return model object."""
    LOGGER.info("Finding best knn..")
    search_space = {
        "type": "knn",
        "n_neighbors": hp.uniformint("n_neighbors", 2, 15),
        "weights": hp.choice("weights", ["uniform", "distance"]),
    }

    best_params = hyperopt_search(X_train, y_train, search_space, config)
    model = make_pipeline(get_scaler(config),
                          KNeighborsClassifier(**best_params))

    mean_cross_val_score = cross_validate_model(model, X_train, y_train)
    LOGGER.info(f"KNN cross validation score: {mean_cross_val_score}")

    if config["test"]:
        print(classification_report(model.predict(X_test), y_test))

    return model
示例#9
0
def run_gradient_boosting_classifier(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    y_train: pd.DataFrame,
    y_test: pd.DataFrame,
    config: Dict[str, Any],
) -> Tuple[Any, Any]:
    LOGGER.info("Running Gradient boosting classifier..")
    if config["find_optimal_model"]:
        search_space = {
            "type": "gradient_boosting",
            "max_depth": hp.uniformint("max_depth", 2, 15),
            "n_estimators": hp.uniformint("n_estimators", 50, 300),
            "max_features": hp.choice("max_features", ("auto", "sqrt", None)),
            "learning_rate": hp.quniform("learning_rate", 0.025, 0.5, 0.025),
        }

        best_params = hyperopt_search(X_train, y_train, search_space, config)
        model = make_pipeline(GradientBoostingClassifier(**best_params)).fit(
            X_train, y_train)

        mean_cross_val_score = cross_validate_model(model, X_train, y_train)
        LOGGER.info(
            f"Gradient boosting classifier cross validation score: {mean_cross_val_score}"
        )

    else:
        model = make_pipeline(
            GradientBoostingClassifier(
                **config["models"]["gradient_boosting"])).fit(
                    X_train, y_train)
        if config["test"]:
            print(classification_report(model.predict(X_test), y_test))

    if config["test"]:
        y_pred = model.predict(X_test)
        score = accuracy_score(y_pred=y_pred, y_true=y_test)

        LOGGER.info(
            f"The gradient boosting classifier has a train accuracy of {score}"
        )

        return model, y_pred

    return model, None
示例#10
0
def test_remove_allpaths_int():
    z = hp.uniformint("z", 0, 10)
    a = hp.choice("a", [z + 1, z - 1])
    hps = {}
    expr_to_config(a, (True, ), hps)
    aconds = hps["a"]["conditions"]
    zconds = hps["z"]["conditions"]
    assert aconds == set([(True, )]), aconds
    assert zconds == set([(True, )]), zconds
示例#11
0
def test_remove_allpaths_int():
    z = hp.uniformint('z', 0, 10)
    a = hp.choice('a', [ z + 1, z - 1])
    hps = {}
    expr_to_config(a, (True,), hps)
    aconds = hps['a']['conditions']
    zconds = hps['z']['conditions']
    assert aconds == set([(True,)]), aconds
    assert zconds == set([(True,)]), zconds
def test_remove_allpaths_int():
    z = hp.uniformint('z', 0, 10)
    a = hp.choice('a', [z + 1, z - 1])
    hps = {}
    expr_to_config(a, (True, ), hps)
    aconds = hps['a']['conditions']
    zconds = hps['z']['conditions']
    assert aconds == set([(True, )]), aconds
    assert zconds == set([(True, )]), zconds
示例#13
0
    def full_hyper_space(self):
        from hyperopt import hp

        hyper_space, hyper_choices = super(Tfidf, self).full_hyper_space()
        hyper_choices.update({"fex_stop_words": ["english", "none"]})
        hyper_space.update({
            "fex_ngram_max":
            hp.uniformint("fex_ngram_max", 1, 3),
            "fex_stop_words":
            hp.choice('fex_stop_words', hyper_choices["fex_stop_words"]),
        })
        return hyper_space, hyper_choices
示例#14
0
def run_random_forest(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    y_train: pd.DataFrame,
    y_test: pd.DataFrame,
    config: Dict[str, Any],
) -> Tuple[Any, Any]:
    LOGGER.info("Running Random Forest model..")
    if config["find_optimal_model"]:
        search_space = {
            "type": "random_forest",
            "max_depth": hp.uniformint("max_depth", 2, 30),
            "n_estimators": hp.uniformint("n_estimators", 10, 1000),
            "max_features": hp.choice("max_features", ("auto", "sqrt", None)),
        }

        best_params = hyperopt_search(X_train, y_train, search_space, config)
        model = make_pipeline(RandomForestClassifier(**best_params))

        mean_cross_val_score = cross_validate_model(model, X_train, y_train)
        LOGGER.info(
            f"Random Forest classifier cross validation score: {mean_cross_val_score}"
        )

    else:
        model = make_pipeline(
            RandomForestClassifier(**config["models"]["random_forest"])).fit(
                X_train, y_train)
        if config["test"]:
            print(classification_report(model.predict(X_test), y_test))

    if config["test"]:
        y_pred = model.predict(X_test)
        score = accuracy_score(y_pred=y_pred, y_true=y_test)

        LOGGER.info(f"Random forest model has a train accuracy of {score}")

        return model, y_pred

    return model, None
示例#15
0
def tune_custom_model_a_hyperparameters(
        episodes_folder: str,
        save_folder: str,
        potential_training_file_nbs: List[int],
        potential_validation_file_nbs: List[int],
        cpickled_trials_path: str = None):
    """cpickled_trials_path can be used to resume the tuning. By default it will be in the save_folder and have the
    file name Trials.xz (as we use lzma-compression with compress-pickle)."""
    if cpickled_trials_path is None:
        cpickled_trials_path = os.path.join(save_folder, "Trials.xz")

    if not os.path.exists(cpickled_trials_path):
        trials = Trials()
        current_nb_runs = 0
    else:
        with open(cpickled_trials_path, 'rb') as file:
            trials = compress_pickle.load(file, compression="lzma")
        current_nb_runs = len(trials.trials)

    best_hyperparameters = None
    while current_nb_runs < TUNING_NB_RUNS:
        best_hyperparameters = fmin(
            tune_model_a,
            space=(
                hp.loguniform('learning_rate', math.log(10**-5),
                              math.log(10**-3)),
                hp.loguniform('regularization_strength', math.log(10**-4),
                              math.log(10**-2)),
                hp.uniformint('nb_frames_to_stack', 2, 25),
                hp.choice('episodes_folder',
                          [episodes_folder]),  # not really a choice
                hp.choice('save_folder',
                          [save_folder]),  # just a way to pass more parameters
                hp.choice('potential_training_file_nbs',
                          [potential_training_file_nbs]),
                hp.choice('potential_validation_file_nbs',
                          [potential_validation_file_nbs])),
            algo=tpe.suggest,
            max_evals=current_nb_runs +
            1,  # just keep going (Note: messes with the progress bar)
            trials=trials)
        current_nb_runs += 1  # (after the += 1: == len(trials.trials))

        # Save after every tuning run
        with open(cpickled_trials_path, "wb") as file:
            compress_pickle.dump(trials, file, compression="lzma")

    print(best_hyperparameters)
    print(trials.best_trial["result"]["loss"])
示例#16
0
class BisectingKMeansOptimizationJob(HyperparameterOptimizationJob):
    def __init__(self, get_cluster_score: Callable[[DataFrame], float] = None,
                 additional_weight_function: Callable[[int], float] = lambda e: 1.0) -> None:
        self.additional_weight_function = additional_weight_function
        if get_cluster_score:
            self.get_cluster_score = get_cluster_score

    app_name: str = "BisectingKMeansOptimizationJob"

    search_space: Apply = hp.choice('model', [('bisecting_kmeans', {'k': hp.uniformint('k', 4, 20),
                                                                    'distance_measure': hp.choice("distance_measure",
                                                                                                  ['euclidean',
                                                                                                   'cosine']),
                                                                    'window_size': hp.uniformint('window_size', 2, 5),
                                                                    's': hp.uniform('s', .5, 5)})])

    def get_clusters(self, parameters: dict, urls_and_vectors: DataFrame) -> DataFrame:
        urls_and_vectors = urls_and_vectors.cache()
        bisecting_kmeans = BisectingKMeans().setK(parameters['k']).setDistanceMeasure(
            parameters['distance_measure']).setFeaturesCol("vector").setPredictionCol("cluster_id")
        model = bisecting_kmeans.fit(urls_and_vectors)
        clustered_url_vectors = model.transform(urls_and_vectors)
        urls_and_vectors.unpersist()
        return clustered_url_vectors
示例#17
0
 def _get_advisor(self):
     logger.log(logging.INFO, 'Start Getting Train job Advisor')
     try:
         param_types = self._model_class.get_param_type()
         for param_key, param_value_list in self._train_params.items():
             if param_value_list[0] == 'choice':
                 self._advisor[param_key] = hp.choice(
                     param_key, param_value_list[1])
             else:
                 if param_types.get(param_key) == 'int':
                     self._advisor[param_key] = hp.uniformint(
                         param_key, param_value_list[1][0],
                         param_value_list[1][1])
                 else:
                     self._advisor[param_key] = hp.uniform(
                         param_key, param_value_list[1][0],
                         param_value_list[1][1])
         logger.log(logging.INFO, 'Finish Getting Train job Advisor')
     except Exception as e:
         logger.log(logging.ERROR, repr(e))
示例#18
0
    'promotion_category': str,
    'marketing_campaign': str,
    'mouse_y': str,
    'marketing_channel': str,
    'marketing_creative_sub': str,
    'site_level': str,
    'acquired_date': str
}


ENGINEERING_PARAM_GRID = {
    'preprocessor__numeric_transformer__log_creator__take_log': hp.choice(
        'preprocessor__numeric_transformer__log_creator__take_log', ['yes', 'no']),
    'preprocessor__categorical_transformer__category_combiner__combine_categories': hp.choice(
        'preprocessor__categorical_transformer__category_combiner__combine_categories', ['yes', 'no']),
    'preprocessor__categorical_transformer__feature_selector__percentile': hp.uniformint(
        'preprocessor__categorical_transformer__feature_selector__percentile', 1, 100),
    'preprocessor__numeric_transformer__feature_selector__percentile': hp.uniformint(
        'preprocessor__numeric_transformer__feature_selector__percentile', 1, 100),
}

FOREST_PARAM_GRID = {
    'model__base_estimator__max_depth': hp.uniformint('model__base_estimator__max_depth', 3, 16),
    'model__base_estimator__min_samples_leaf': hp.uniform('model__base_estimator__min_samples_leaf', 0.001, 0.01),
    'model__base_estimator__max_features': hp.choice('model__base_estimator__max_features', ['log2', 'sqrt']),
}

XGBOOST_PARAM_GRID = {
    'model__base_estimator__learning_rate': hp.uniform('model__base_estimator__learning_ratee', 0.01, 0.5),
    'model__base_estimator__n_estimators': hp.randint('model__base_estimator__n_estimators', 75, 150),
    'model__base_estimator__max_depth': hp.randint('model__base_estimator__max_depth', 3, 16),
    'model__base_estimator__min_child_weight': hp.uniformint('model__base_estimator__min_child_weight', 2, 16),
示例#19
0
            'logsig': True,
            'sig_depth': 2,
            'add_time': True,
            "use_timestamps": True,
            "t_max": hp.uniform('t_max', 0, 1), # not to explode activations
            "t_scale": hp.uniform('t_scale', 86400, 604800), # days and weeks
            'leadlag': True,
            "split_paths": False,
            "min_count": 5,
            "batch_size": 128,
            "d_embedding": scope.int(hp.quniform('d_embedding', 16, 64, 1)),
            "epochs": 20,
            "lr": hp.loguniform('lr', np.log(1e-5), np.log(1e-2)),
            "wd": hp.loguniform('wd', np.log(1e-7), np.log(1e-4)),
            "patience": 5,
            "feedforward_num_layers": hp.uniformint('feedforward_num_layers', 1, 2),
            "embedding_dropout_p": 0,
            "verbose": True,
            # "testing_subsample_size": 1000
            "feedforward_hidden_dims": scope.int(hp.quniform('feedforward_hidden_dims', 32, 256, 4)),
            "feedforward_activations": "relu",
            "feedforward_dropout": hp.uniform('feedforward_dropout', 0, 0.7),
            "training_proportion": p,
            "evaluate_on_test": False
        }

        space.update({
            "name": "{name}_{version}_logsig{logsig}_sigdepth{sig_depth}_leadlag{leadlag}_addtime_{add_time}_timestamps{use_timestamps}_allcode{all_code_types}_trainprop{training_proportion}".format_map(
                space)})

        # Set mongo trail name
示例#20
0
    kf = StratifiedShuffleSplit(10, test_size=0.5, random_state=42)
    ep_fake = np.empty((288, 22, 500))
    lb_fake = np.r_[1 * np.ones(int(len(ep_fake) / 2)),
                    2 * np.ones(int(len(ep_fake) / 2))]
    for idx_search, idx_test in kf.split(ep_fake, lb_fake):

        # print('Search:', idx_search, '\n\n')
        # print('Test:', idx_test, '\n\n')

        ACC = []
        for suj in subjects:
            for class_ids in classes:
                H = H.iloc[0:0]  # cleaning df
                print(f'###### {suj} {class_ids} ######')
                fl_ = hp.uniformint("fl", 0, 15)
                fh_ = hp.uniformint("fh", 10, 25)
                space = (
                    {
                        "fl": fl_
                    },
                    {
                        "fh": (fh_ + fl_)
                    },  # fl_ + 20, # hp.uniform("fh", (10 + fl_), 40),
                    hp.quniform('tmin', 0, 2, 0.5),
                    hp.quniform('tmax', 2, 4, 0.5),
                    # hp.quniform('ncomp', 2, 10, 2),
                    hp.choice('ncomp', [2, 4, 6, 8, 22]),
                    hp.uniformint('nbands', 1, 25),
                    # hp.quniform('reg', -5, 0, 1),
                    hp.pchoice(
from ray.tune.suggest.hyperopt import HyperOptSearch

from tune_demo.train import get_iris_data, rf_cv


# %%
def eval_model(config):
    X, y = get_iris_data()
    for i in range(5):
        acc = rf_cv(config, X, y)
        tune.track.log(acc=acc)


# %%
space = {
    "max_depth": hp.uniformint("max_depth", 1, 20),
    "n_estimators": hp.uniformint("n_estimators", 10, 1000),
}

hyperopt_search = HyperOptSearch(
    space=space,
    max_concurrent=4,
    metric="acc",
    gamma=0.2,
)

analysis = tune.run(
    eval_model,
    num_samples=50,
    scheduler=ASHAScheduler(metric="acc", mode="max"),
    search_alg=hyperopt_search,
示例#22
0
        'clf_details', 'as_train', 'as_test', 'sb_dft', 'sb_iir', 'cla_dft',
        'cla_iir'
    ]
    R = pd.DataFrame(columns=header)

    ##%% ###########################################################################
    for suj in subjects:
        sname = 'A0' + str(suj) + ''
        data, events, info = np.load('/mnt/dados/eeg_data/IV2a/npy/' + sname +
                                     '.npy',
                                     allow_pickle=True)
        for class_ids in classes:
            # data, events, info = np.load('/mnt/dados/eeg_data/IV2a/npy/'+sname+'T.npy', allow_pickle=True)
            print(f'###### {suj} {class_ids} ######')
            space = (
                hp.uniformint('fl', 0, 20),
                hp.uniformint('fh', 21, 50),
                hp.quniform('tmin', 0, 2, 0.5),
                hp.quniform('tmax', 2, 4, 0.5),
                hp.quniform('ncomp', 2, 22, 2),
                hp.uniformint('nbands', 1, 50),  #
                hp.choice('clf', [{
                    'model': 'LDA'
                }, {
                    'model':
                    'SVM',
                    'C':
                    hp.quniform('C', -8, 0, 1),
                    'kernel':
                    hp.choice('kernel', [{
                        'kf': 'linear'
def optimization_space():
    return {
        'torch_health':
        hp.uniformint('torch_health', 1, 10),
        'torch_dmg':
        hp.uniformint('torch_dmg', 1, 10),
        # 'torch_weight': hp.uniformint('torch_weight', 1, 10),
        'torch_torch_range':
        hp.uniformint('torch_torch_range', 1, 4),
        'torch_duration':
        hp.uniformint('torch_duration', 1, 6),
        'torch_cooldown':
        hp.uniformint('torch_cooldown', 1, 6),
        'torch_ticks_between_moves':
        hp.uniformint('torch_ticks_between_moves', 1, 6),
        # SawBot parameters
        'saw_health':
        hp.uniformint('saw_health', 1, 10),
        'saw_dmg_min':
        hp.uniformint('saw_dmg_min', 1, 10),
        'saw_dmg_max':
        hp.uniformint('saw_dmg_max', 1, 10),
        # 'saw_weight': hp.uniformint('saw_weight', 1, 10),
        'saw_duration':
        hp.uniformint('saw_duration', 1, 6),
        'saw_cooldown':
        hp.uniformint('saw_cooldown', 1, 6),
        'saw_ticks_between_moves':
        hp.uniformint('saw_ticks_between_moves', 1, 6),
        # NailBot parameters
        'nail_health':
        hp.uniformint('nail_health', 1, 10),
        'nail_dmg':
        hp.uniformint('nail_dmg', 1, 10),
        # 'nail_weight': hp.uniformint('nail_weight', 1, 10),
        'nail_cooldown':
        hp.uniformint('nail_cooldown', 1, 6),
        'nail_ticks_between_moves':
        hp.uniformint('nail_ticks_between_moves', 1, 6)
    }
示例#24
0
                cpahis[53:]
            ]))) * 2.64  # rt in different period

    return r1, report5Q, cpahis, periodRt


def SimulationScenarioOrigin():
    config = [0, True, False, 0, False, 0, True]
    result = list(
        map(lambda x: SimulateBaselineModel(op, auxdata, config, TargetDay=81),
            range(0, 500)))
    r1, report5Q, S_cHIS, periodRt = CreateSummary(result, 'originalResult')


space = {
    'inik': hp.uniformint('inik', 1, 10),
    'iniexp': hp.uniformint('iniexp', 1, 10),
    'baseline': hp.uniform('x', 0.03, 0.07),  # baseline
    'socialconnection': hp.uniform('y', 0.3, 0.7),  # socialconnection
    'gamma1': hp.uniform('gamma1', 1 / 14, 1 / 9),  # gamma1
    'gamma2': hp.uniform('gamma2', 1 / 14, 1 / 6),  # gamma2
    'gamma3': hp.uniform('gamma3', 1 / 6, 1 / 4),  # gamma3
    'gamma4': hp.uniform('gamma4', 1 / 5, 1 / 3),  # gamma4
    'gamma5': hp.uniform('gamma5', 1 / 4, 1 / 2)  # gamma5
}

auxdata = list(GetLaiTestData(fed))

if OptimizeTheConfig:
    config = [0, True, False, 0, False, 0, True]
    best = fmin(fn=lambda x: BatchEvaluationBaselineModel(x, auxdata, config),
    previsoes[rodada] = {}
    resultados = []
    resultado = {}
    for clf in classificadores:
        neg_pos_rate = 20
        # espaco de busca
        if type(clf).__name__ == 'XGBClassifier':

            X_train = X_train_tree.copy()
            y_train = y_train_tree.copy()
            X_test = X_test_tree.copy()
            y_test = y_test_tree.copy()

            space = {
                'n_estimators': hp.uniformint('n_estimators', 50, 250),
                'max_depth': hp.uniformint('max_depth', 1, 14),
                'learning_rate': hp.loguniform('learning_rate', -5, 0),
                'min_child_weight': hp.uniformint('min_child_weight', 1, 10),
                'subsample': hp.uniform('subsample', 0.7, 1.0),
                'gamma': hp.uniform('gamma', 0.5, 1.2),
                'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
                'alpha': hp.uniformint('alpha', 1, 2),
                'lambda': hp.uniform('lambda', 1.0, 2.0),
                'scale_pos_weight': neg_pos_rate
            }
            func = xgb_cv

        if type(clf).__name__ == 'LogisticRegression':

            X_train = X_train_linear.copy()
示例#26
0
    logger.debug(f"Running using params:\n{fullParams}")

    return -1 * run(fullParams)


if __name__ == "__main__":
    templatesDir = sys.argv[1]
    paramsFile = sys.argv[2]
    device = sys.argv[3]
    deviceIdx = sys.argv[4]

    # used for loguniform
    c = log(10.0)

    space = {
        "basisSize": hp.uniformint("basisSize", 3, 10),
        "embeddingSize": hp.uniformint("embeddingSize", 500, 5000),
        "l1_residuCoeff": hp.uniform("l1_residuCoeff", 0.1, 10),
    }

    trials = Trials()

    best = fmin(objective,
                space,
                algo=tpe.suggest,
                catch_eval_exceptions=True,
                max_evals=200)
    logger.info(f"Best configuration:\n{best}")
    logger.debug(f"Trials:\n{trials}")
示例#27
0
    def svm_using_jda_feature(cls):
        params = utils.ConfigDict(
            C=hp.uniform('C', 0.001, 100),
            svm_kernel=hp.choice('svm_kernel', [
                {
                    'name' : 'rbf',
                    'gamma': hp.uniform('rbf_gamma_uniform', 0.001, 10)
                },
                {
                    'name': 'linear',
                },
                {
                    'name' : 'sigmoid',
                    'gamma': hp.uniform('sigmoid_gamma_uniform', 0.001, 100),
                    'coef0': hp.uniform('sigmoid_coef0', 0, 10)
                },
                # {
                #     'name'  : 'poly',
                #     'gamma' : hp.uniform('poly_gamma_uniform', 0.001, 100),
                #     'coef0' : hp.uniform('poly_coef0', 0, 10),
                #     'degree': hp.uniformint('poly_degree', 2, 3),
                # }
            ]),
            jda_kernel=hp.choice('jda_kernel', [
                {
                    'name' : 'primal',
                    'gamma': 1,
                },
                {
                    'name' : 'linear',
                    'gamma': 1,
                },
                {
                    'name' : 'rbf',
                    'gamma': hp.uniform('jda_gamma', 0.001, 10)
                }
            ]),
            lamb=hp.uniform('jda_lamb', 0.001, 1),
            T=hp.uniformint('jda_iterations', 1, 20),
        )

        def choose_best_params(config):
            print('\n---------------------------------------------------------------------------------------')
            print('Params: ')
            print(config)
            jda_kernel_config = config['jda_kernel']
            jda_kernel = jda_kernel_config.pop('name')
            Xs_new, Ys_new, Xt_new, Yt_new = cls.loader.load_jda_data(cls.Xs, cls.Ys, cls.Xt, cls.Yt,
                                                                      kernel=jda_kernel,
                                                                      **jda_kernel_config,
                                                                      lamb=config['lamb'],
                                                                      T=config['T'])
            svm_kernel_config = config['svm_kernel']
            svm_kernel = svm_kernel_config.pop('name')
            C = config['C']

            # uar
            clf = make_pipeline(StandardScaler(),
                                SVC(kernel=svm_kernel, tol=0.001, random_state=666, shrinking=True, C=C,
                                    **svm_kernel_config))
            clf.fit(Xs_new, Ys_new)
            Yt_pred = clf.predict(Xt_new)
            distance = mmd(Xs_new, Xt_new)
            matrix = confusion_matrix(Yt_new, Yt_pred)
            report = classification_report(Yt_new, Yt_pred)
            print('Result:')
            print(matrix)
            print(report)
            print('---------------------------------------------------------------------------------------\n')
            report_print = classification_report(Yt_new, Yt_pred, output_dict=True)
            uar = report_print['macro avg']['recall']
            return {
                'loss'  : distance - uar,
                'status': STATUS_OK
            }

        trials = Trials()
        best = fmin(fn=choose_best_params, space=params, algo=tpe.suggest, max_evals=100, trials=trials)

        print(best)
        print('best: ', trials.best_trial)
示例#28
0
 "leadlag":
 False,
 "batch_size":
 128,
 "verbose":
 True,
 "epochs":
 20,
 "lr":
 hp.loguniform('lr', np.log(1e-5), np.log(1e-1)),
 "wd":
 hp.loguniform('wd', np.log(1e-7), np.log(1e-2)),
 "hidden_rnn_sz":
 scope.int(hp.quniform('hidden_rnn_sz', 32, 128, 1)),
 "rnn_num_layers":
 hp.uniformint('rnn_num_layers', 1, 2),
 "patience":
 10,
 "rnn_dropout":
 hp.uniform('rnn_dropout', 0, 0.9),
 "feedforward_num_layers":
 hp.uniformint('feedforward_num_layers', 1, 3),
 "min_count":
 5,
 "testing_subsample_size":
 None,
 # "testing_subsample_size": 1000
 "feedforward_hidden_dims":
 scope.int(hp.quniform('feedforward_hidden_dims', 32, 256, 4)),
 "feedforward_activations":
 "relu",
示例#29
0
import argparse
import os
import pickle
import sys

import numpy as np
from hyperopt import hp, STATUS_OK, trials_from_docs, Trials, partial, tpe, fmin
from hyperopt.pyll import scope

from gym_locm.agents import MaxAttackBattleAgent, GreedyBattleAgent, MaxAttackDraftAgent
from gym_locm.toolbox.trainer import AsymmetricSelfPlay, model_builder_mlp, model_builder_lstm

hyperparameter_space = {
    'switch_freq': hp.choice('switch_freq', [10, 100, 1000]),
    'layers': hp.uniformint('layers', 1, 3),
    'neurons': hp.uniformint('neurons', 24, 256),
    'activation': hp.choice('activation', ['tanh', 'relu', 'elu']),
    'n_steps': scope.int(hp.quniform('n_steps', 30, 300, 30)),
    'nminibatches': scope.int(hp.quniform('nminibatches', 1, 300, 1)),
    'noptepochs': scope.int(hp.quniform('noptepochs', 3, 20, 1)),
    'cliprange': hp.quniform('cliprange', 0.1, 0.3, 0.1),
    'vf_coef': hp.quniform('vf_coef', 0.5, 1.0, 0.5),
    'ent_coef': hp.uniform('ent_coef', 0, 0.01),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00005),
                                   np.log(0.01)),
}

_counter = 0


def get_arg_parser():
def bo_tpe_lightgbm(X, y):
    # 参考
    # https://qiita.com/TomokIshii/items/3729c1b9c658cc48b5cb

    data = X
    target = y
    # 2次数据划分,这样可以分成3份数据  test  train  validation
    X_intermediate, X_test, y_intermediate, y_test = train_test_split(
        data, target, shuffle=True, test_size=0.2, random_state=1)

    # train/validation split (gives us train and validation sets)
    X_train, X_validation, y_train, y_validation = train_test_split(
        X_intermediate,
        y_intermediate,
        shuffle=False,
        test_size=0.25,
        random_state=1)

    # delete intermediate variables
    del X_intermediate, y_intermediate

    # 显示数据集的分配比例
    print('train: {}% | validation: {}% | test {}%'.format(
        round((len(y_train) / len(target)) * 100, 2),
        round((len(y_validation) / len(target)) * 100, 2),
        round((len(y_test) / len(target)) * 100, 2)))

    starttime = datetime.datetime.now()

    space = {
        # 'learning_rate': hp.uniform('learning_rate', 0.001, 0.5),
        # 'minibatch_frac': hp.choice('minibatch_frac', [1.0, 0.5]),
        # 'Base': hp.choice('Base', [b1, b2, b3])
        "lambda_l1": hp.uniform("lambda_l1", 1e-8, 1.0),
        "lambda_l2": hp.uniform("lambda_l2", 1e-8, 1.0),
        "min_child_samples": hp.uniformint("min_child_samples", 5, 100),
        'learning_rate': hp.uniform("learning_rate", 0.001, 0.5),
        "n_estimators": hp.uniformint("n_estimators", 10, 100),
        "num_leaves": hp.uniformint("num_leaves", 5, 35)
    }

    # n_estimators表示一套参数下,有多少个评估器,简单说就是迭代多少次
    default_params = {
        # "n_estimators": 80,
        "random_state": 1,
        "objective": "regression",
        "boosting_type": "gbdt",
        # "num_leaves": 30,
        # "learning_rate": 0.3,
        "feature_fraction": 0.9,
        "bagging_fraction": 0.8,
        "bagging_freq": 5,
        "verbose": -1,
    }

    def objective(params):
        #     下面这个是分类classification使用的模型,不能用在regressor
        #     dtrain = lgb.Dataset(X_train, label=y_train)
        params.update(default_params)
        clf = lgb.LGBMRegressor(**params)
        score = -np.mean(
            cross_val_score(clf,
                            X_train,
                            y_train,
                            cv=3,
                            n_jobs=-1,
                            scoring="neg_mean_squared_error"))
        return {'loss': score, 'status': STATUS_OK}

    trials_lgb = Trials()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        best = fmin(
            fn=objective,
            space=space,
            algo=tpe.suggest,
            # max_evals是设定多少套参数组合,组合数越大准确度可能更高但是训练的时间越长
            max_evals=50,
            trials=trials_lgb)

    best_params = space_eval(space, best)
    lgb_model = lgb.LGBMRegressor(**best_params).fit(
        X_train,
        y_train,
        eval_set=[(X_validation, y_validation)],
        verbose=-1,
        #  假定n_estimators迭代器有100个设定了早期停止后也许不到100次迭代就完成了训练停止了
        early_stopping_rounds=2)

    y_pred = lgb_model.predict(X_test)
    test_MSE_lgb = mean_squared_error(y_pred, y_test)
    print("LightGBM MSE score:%.4f" % test_MSE_lgb)
    endtime = datetime.datetime.now()
    process_time_lgb = endtime - starttime
    print("程序执行时间(秒):{}".format(process_time_lgb))
    print("最佳超参数值集合:", best_params)
    save_model_object(lgb_model, 'BO-TPE', 'NGBoost', 'NGBoost')
    return test_MSE_lgb, process_time_lgb, best_params
示例#31
0
from tune_sklearn import TuneSearchCV
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from hyperopt import hp

digits = datasets.load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

space = {
    "n_estimators": hp.uniformint("n_estimators", 100, 200),
    "min_weight_fraction_leaf": (0.0, 0.5),
    "min_samples_leaf": hp.uniformint("min_samples_leaf", 1, 5)
}

tune_search = TuneSearchCV(RandomForestClassifier(),
                           space,
                           search_optimization="hyperopt",
                           n_trials=3)
tune_search.fit(X_train, y_train)

print(tune_search.cv_results_)
print(tune_search.best_params_)