示例#1
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch

        searcher = OptunaSearch(space=self.config, metric=self.metric_name, mode="max")

        self._save(searcher)

        searcher = OptunaSearch(space=self.config, metric=self.metric_name, mode="max")
        self._restore(searcher)
示例#2
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.trial import TrialState

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
            points_to_evaluate=[{
                self.param_name: self.valid_value
            }],
            evaluated_rewards=[1.0])

        self.assertGreater(len(searcher._ot_study.trials), 0)

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
        )

        point = {
            self.param_name: self.valid_value,
        }

        self.assertEqual(len(searcher._ot_study.trials), 0)

        searcher.add_evaluated_point(
            point, 1.0, intermediate_values=[0.8, 0.9])
        self.assertEqual(len(searcher._ot_study.trials), 1)
        self.assertTrue(
            searcher._ot_study.trials[-1].state == TrialState.COMPLETE)

        searcher.add_evaluated_point(
            point, 1.0, intermediate_values=[0.8, 0.9], error=True)
        self.assertEqual(len(searcher._ot_study.trials), 2)
        self.assertTrue(searcher._ot_study.trials[-1].state == TrialState.FAIL)

        searcher.add_evaluated_point(
            point, 1.0, intermediate_values=[0.8, 0.9], pruned=True)
        self.assertEqual(len(searcher._ot_study.trials), 3)
        self.assertTrue(
            searcher._ot_study.trials[-1].state == TrialState.PRUNED)

        def dbr_space(trial):
            return {
                self.param_name: trial.suggest_float(self.param_name, 0.0, 5.0)
            }

        dbr_searcher = OptunaSearch(
            space=dbr_space,
            metric="metric",
            mode="max",
        )
        with self.assertRaises(TypeError):
            dbr_searcher.add_evaluated_point(point, 1.0)
示例#3
0
    def testConvertOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch, param
        from optuna.samplers import RandomSampler

        config = {
            "a": tune.sample.Categorical([2, 3, 4]).uniform(),
            "b": {
                "x": tune.sample.Integer(0, 5).quantized(2),
                "y": 4,
                "z": tune.sample.Float(1e-4, 1e-2).loguniform()
            }
        }
        converted_config = OptunaSearch.convert_search_space(config)
        optuna_config = [
            param.suggest_categorical("a", [2, 3, 4]),
            param.suggest_int("b/x", 0, 5, 2),
            param.suggest_loguniform("b/z", 1e-4, 1e-2)
        ]

        sampler1 = RandomSampler(seed=1234)
        searcher1 = OptunaSearch(space=converted_config,
                                 sampler=sampler1,
                                 metric="a",
                                 mode="max")

        sampler2 = RandomSampler(seed=1234)
        searcher2 = OptunaSearch(space=optuna_config,
                                 sampler=sampler2,
                                 metric="a",
                                 mode="max")

        config1 = searcher1.suggest("0")
        config2 = searcher2.suggest("0")

        self.assertEqual(config1, config2)
        self.assertIn(config1["a"], [2, 3, 4])
        self.assertIn(config1["b"]["x"], list(range(5)))
        self.assertLess(1e-4, config1["b"]["z"])
        self.assertLess(config1["b"]["z"], 1e-2)

        searcher = OptunaSearch(metric="a", mode="max")
        analysis = tune.run(_mock_objective,
                            config=config,
                            search_alg=searcher,
                            num_samples=1)
        trial = analysis.trials[0]
        assert trial.config["a"] in [2, 3, 4]

        mixed_config = {
            "a": tune.uniform(5, 6),
            "b": tune.uniform(8, 9)  # Cannot mix List and Dict
        }
        searcher = OptunaSearch(space=mixed_config, metric="a", mode="max")
        config = searcher.suggest("0")
        self.assertTrue(5 <= config["a"] <= 6)
        self.assertTrue(8 <= config["b"] <= 9)
示例#4
0
def set_algorithm(experiment_name, config):
    '''
    Configure search algorithm.
    '''
    if args.algorithm == 'hyperopt':
        algorithm = HyperOptSearch(points_to_evaluate=best_params)
    elif args.algorithm == 'ax':
        ax_client = AxClient(enforce_sequential_optimization=False)
        ax_client.create_experiment(name=experiment_name,
                                    parameters=config,
                                    objective_name="minimum",
                                    minimize=True)
        algorithm = AxSearch(ax_client=ax_client,
                             points_to_evaluate=best_params)
    elif args.algorithm == 'nevergrad':
        algorithm = NevergradSearch(
            points_to_evaluate=best_params,
            optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"])
    elif args.algorithm == 'optuna':
        algorithm = OptunaSearch(points_to_evaluate=best_params,
                                 seed=args.seed)
    elif args.algorithm == 'pbt':
        algorithm = PopulationBasedTraining(
            time_attr="training_iteration",
            perturbation_interval=args.perturbation,
            hyperparam_mutations=config,
            synch=True)
    elif args.algorithm == 'random':
        algorithm = BasicVariantGenerator(max_concurrent=args.jobs)
    if args.algorithm not in ['random', 'pbt']:
        algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs)
    return algorithm
示例#5
0
def tune_fn():
    mlflow.set_experiment(experiment_name=experiment_name)

    optuna_search = OptunaSearch(metric="auroc", mode="max")

    ax_search = AxSearch(metric="auroc", mode="max")

    tune.run(objective,
             name="mlflow_gbdt",
             num_samples=65,
             config={
                 "num_leaves": tune.randint(5, 95),
                 "learning_rate": tune.loguniform(1e-4, 1.0),
                 "n_estimators": tune.randint(100, 100000),
                 "subsample": tune.loguniform(0.01, 1.0),
                 "subsample_freq": tune.randint(1, 5),
                 "objective": "binary",
                 "reg_alpha": tune.loguniform(1e-4, 1.0),
                 "reg_lambda": tune.loguniform(1e-4, 1.0),
                 "tree_learner": "feature",
                 "feature_sel": 0,
                 "mlflow": {
                     "experiment_name": experiment_name,
                     "tracking_uri": mlflow.get_tracking_uri()
                 }
             },
             search_alg=optuna_search)
示例#6
0
    def testOptunaReportTooOften(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.samplers import RandomSampler

        searcher = OptunaSearch(
            sampler=RandomSampler(seed=1234),
            space=OptunaSearch.convert_search_space(self.config),
            metric="metric",
            mode="max",
        )
        searcher.suggest("trial_1")
        searcher.on_trial_result("trial_1", {
            "training_iteration": 1,
            "metric": 1
        })
        searcher.on_trial_complete("trial_1", {
            "training_iteration": 2,
            "metric": 1
        })

        # Report after complete should not fail
        searcher.on_trial_result("trial_1", {
            "training_iteration": 3,
            "metric": 1
        })

        searcher.on_trial_complete("trial_1", {
            "training_iteration": 4,
            "metric": 1
        })
示例#7
0
    def set_basic_conf(self):
        from optuna.samplers import TPESampler
        space = [
            ot_param.suggest_uniform("width", 0, 20),
            ot_param.suggest_uniform("height", -100, 100)
        ]

        def cost(space, reporter):
            reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3))

        search_alg = OptunaSearch(
            space, sampler=TPESampler(seed=10), metric="loss", mode="min")
        return search_alg, cost
示例#8
0
def run_tune(method: str, num_samples: int) -> tune.ExperimentAnalysis:
    optuna_search = OptunaSearch(metric="mean_accuracy", mode="max")
    return tune.run(
        trial,
        config=methods[method],
        num_samples=num_samples,
        search_alg=optuna_search,
        # resources_per_trial={"gpu": 1, "cpu": 16}, # Using the GPU makes the search process a lot slower on my machine
        verbose=1,
        metric=
        "mean_accuracy",  # otherwise I cannot get the dataframe in the end for some reason
        mode="max"  # See above
    )
示例#9
0
    def set_basic_conf(self):
        from optuna.samplers import TPESampler
        space = OptunaSearch.convert_search_space({
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100)
        })

        def cost(space, reporter):
            reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3))

        search_alg = OptunaSearch(
            space, sampler=TPESampler(seed=10), metric="loss", mode="min")
        return search_alg, cost
示例#10
0
def init_search_algorithm(search_alg, metric=None, mode=None):
    """Specify a search algorithm and you must pip install it first.
    See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html
    """
    if search_alg == 'optuna':
        assert metric and mode, "Metric and mode cannot be None for optuna."
        from ray.tune.suggest.optuna import OptunaSearch
        return OptunaSearch(metric=metric, mode=mode)
    elif search_alg == 'bayesopt':
        assert metric and mode, "Metric and mode cannot be None for bayesian optimization."
        from ray.tune.suggest.bayesopt import BayesOptSearch
        return BayesOptSearch(metric=metric, mode=mode)
    logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
示例#11
0
    def testConvergenceOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch

        np.random.seed(1)
        searcher = OptunaSearch(seed=1)
        analysis = self._testConvergence(
            searcher,
            top=5,
        )

        # This assertion is much weaker than in the BO case, but TPE
        # don't converge too close. It is still unlikely to get to this
        # tolerance with random search (5 * 0.1 = 0.5% chance)
        assert len(analysis.trials) < 100
        assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-1)
示例#12
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch(
        space=define_by_run_func, metric="mean_loss", mode="min")
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
    )

    print("Best hyperparameters found were: ", analysis.best_config)
示例#13
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.samplers import RandomSampler

        np.random.seed(1000)  # At least one nan, inf, -inf and float

        out = tune.run(
            _invalid_objective,
            search_alg=OptunaSearch(sampler=RandomSampler(seed=1234)),
            config=self.config,
            mode="max",
            num_samples=8,
            reuse_actors=False)

        best_trial = out.best_trial
        self.assertLessEqual(best_trial.config["report"], 2.0)
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch(metric=["loss", "gain"], mode=["min", "max"])
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    analysis = tune.run(
        easy_objective,
        search_alg=algo,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters for loss found were: ",
          analysis.get_best_config("loss", "min"))
    print("Best hyperparameters for gain found were: ",
          analysis.get_best_config("gain", "max"))
示例#15
0
def init_search_algorithm(search_alg, metric=None, mode=None):
    """Specify a search algorithm and you must pip install it first.
    If no search algorithm is specified, the default search algorithm is BasicVariantGenerator.
    See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html

    Args:
        search_alg (str): One of 'basic_variant', 'bayesopt', or 'optuna'.
        metric (str): The metric to monitor for early stopping.
        mode (str): One of 'min' or 'max' to determine whether to minimize or maximize the metric.
    """
    if search_alg == 'optuna':
        assert metric and mode, "Metric and mode cannot be None for optuna."
        from ray.tune.suggest.optuna import OptunaSearch
        return OptunaSearch(metric=metric, mode=mode)
    elif search_alg == 'bayesopt':
        assert metric and mode, "Metric and mode cannot be None for bayesian optimization."
        from ray.tune.suggest.bayesopt import BayesOptSearch
        return BayesOptSearch(metric=metric, mode=mode)
    logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
示例#16
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters found were: ", analysis.best_config)
示例#17
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.samplers import RandomSampler

        np.random.seed(1000)

        out = tune.run(_multi_objective,
                       search_alg=OptunaSearch(
                           sampler=RandomSampler(seed=1234),
                           metric=["a", "b", "c"],
                           mode=["max", "min", "max"],
                       ),
                       config=self.config,
                       num_samples=16,
                       reuse_actors=False)

        best_trial_a = out.get_best_trial("a", "max")
        self.assertGreaterEqual(best_trial_a.config["a"], 0.8)
        best_trial_b = out.get_best_trial("b", "min")
        self.assertGreaterEqual(best_trial_b.config["b"], 0.8)
        best_trial_c = out.get_best_trial("c", "max")
        self.assertGreaterEqual(best_trial_c.config["c"], 0.8)
示例#18
0
def _test_roberta(method='BlendSearch'):

    max_num_epoch = 100
    num_samples = -1
    time_budget_s = 3600

    search_space = {
        # You can mix constants with search space objects.
        "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
        "learning_rate": flaml.tune.loguniform(1e-5, 3e-5),
        "weight_decay": flaml.tune.uniform(0, 0.3),
        "per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
        "seed": flaml.tune.choice([12, 22, 33, 42]),
    }

    start_time = time.time()
    ray.init(num_cpus=4, num_gpus=4)
    if 'ASHA' == method:
        algo = None
    elif 'BOHB' == method:
        from ray.tune.schedulers import HyperBandForBOHB
        from ray.tune.suggest.bohb import tuneBOHB
        algo = tuneBOHB(max_concurrent=4)
        scheduler = HyperBandForBOHB(max_t=max_num_epoch)
    elif 'Optuna' == method:
        from ray.tune.suggest.optuna import OptunaSearch
        algo = OptunaSearch()
    elif 'CFO' == method:
        from flaml import CFO
        algo = CFO(points_to_evaluate=[{
            "num_train_epochs": 1,
            "per_device_train_batch_size": 128,
        }])
    elif 'BlendSearch' == method:
        from flaml import BlendSearch
        algo = BlendSearch(
            points_to_evaluate=[{
                "num_train_epochs": 1,
                "per_device_train_batch_size": 128,
            }])
    elif 'Dragonfly' == method:
        from ray.tune.suggest.dragonfly import DragonflySearch
        algo = DragonflySearch()
    elif 'SkOpt' == method:
        from ray.tune.suggest.skopt import SkOptSearch
        algo = SkOptSearch()
    elif 'Nevergrad' == method:
        from ray.tune.suggest.nevergrad import NevergradSearch
        import nevergrad as ng
        algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
    elif 'ZOOpt' == method:
        from ray.tune.suggest.zoopt import ZOOptSearch
        algo = ZOOptSearch(budget=num_samples)
    elif 'Ax' == method:
        from ray.tune.suggest.ax import AxSearch
        algo = AxSearch(max_concurrent=3)
    elif 'HyperOpt' == method:
        from ray.tune.suggest.hyperopt import HyperOptSearch
        algo = HyperOptSearch()
        scheduler = None
    if method != 'BOHB':
        from ray.tune.schedulers import ASHAScheduler
        scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1)
    scheduler = None
    analysis = ray.tune.run(train_roberta,
                            metric=HP_METRIC,
                            mode=MODE,
                            resources_per_trial={
                                "gpu": 4,
                                "cpu": 4
                            },
                            config=search_space,
                            local_dir='logs/',
                            num_samples=num_samples,
                            time_budget_s=time_budget_s,
                            keep_checkpoints_num=1,
                            checkpoint_score_attr=HP_METRIC,
                            scheduler=scheduler,
                            search_alg=algo)

    ray.shutdown()

    best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
    metric = best_trial.metric_analysis[HP_METRIC][MODE]

    logger.info(f"method={method}")
    logger.info(f"n_trials={len(analysis.trials)}")
    logger.info(f"time={time.time()-start_time}")
    logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
    logger.info(f"Best model parameters: {best_trial.config}")
示例#19
0
def _test_distillbert(method='BlendSearch'):

    max_num_epoch = 64
    num_samples = -1
    time_budget_s = 10800

    search_space = {
        # You can mix constants with search space objects.
        "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
        "learning_rate": flaml.tune.loguniform(1e-6, 1e-4),
        "adam_beta1": flaml.tune.uniform(0.8, 0.99),
        "adam_beta2": flaml.tune.loguniform(98e-2, 9999e-4),
        "adam_epsilon": flaml.tune.loguniform(1e-9, 1e-7),
    }

    start_time = time.time()
    ray.init(num_cpus=4, num_gpus=4)
    if 'ASHA' == method:
        algo = None
    elif 'BOHB' == method:
        from ray.tune.schedulers import HyperBandForBOHB
        from ray.tune.suggest.bohb import tuneBOHB
        algo = tuneBOHB(max_concurrent=4)
        scheduler = HyperBandForBOHB(max_t=max_num_epoch)
    elif 'Optuna' == method:
        from ray.tune.suggest.optuna import OptunaSearch
        algo = OptunaSearch()
    elif 'CFO' == method:
        from flaml import CFO
        algo = CFO(points_to_evaluate=[{
            "num_train_epochs": 1,
        }])
    elif 'BlendSearch' == method:
        from flaml import BlendSearch
        algo = BlendSearch(points_to_evaluate=[{
            "num_train_epochs": 1,
        }])
    elif 'Dragonfly' == method:
        from ray.tune.suggest.dragonfly import DragonflySearch
        algo = DragonflySearch()
    elif 'SkOpt' == method:
        from ray.tune.suggest.skopt import SkOptSearch
        algo = SkOptSearch()
    elif 'Nevergrad' == method:
        from ray.tune.suggest.nevergrad import NevergradSearch
        import nevergrad as ng
        algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
    elif 'ZOOpt' == method:
        from ray.tune.suggest.zoopt import ZOOptSearch
        algo = ZOOptSearch(budget=num_samples)
    elif 'Ax' == method:
        from ray.tune.suggest.ax import AxSearch
        algo = AxSearch()
    elif 'HyperOpt' == method:
        from ray.tune.suggest.hyperopt import HyperOptSearch
        algo = HyperOptSearch()
        scheduler = None
    if method != 'BOHB':
        from ray.tune.schedulers import ASHAScheduler
        scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1)
    scheduler = None
    analysis = ray.tune.run(
        train_distilbert,
        metric=HP_METRIC,
        mode=MODE,
        # You can add "gpu": 1 to allocate GPUs
        resources_per_trial={"gpu": 1},
        config=search_space,
        local_dir='test/logs/',
        num_samples=num_samples,
        time_budget_s=time_budget_s,
        keep_checkpoints_num=1,
        checkpoint_score_attr=HP_METRIC,
        scheduler=scheduler,
        search_alg=algo)

    ray.shutdown()

    best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
    metric = best_trial.metric_analysis[HP_METRIC][MODE]

    logger.info(f"method={method}")
    logger.info(f"n_trials={len(analysis.trials)}")
    logger.info(f"time={time.time()-start_time}")
    logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
    logger.info(f"Best model parameters: {best_trial.config}")
示例#20
0
def _test_xgboost(method='BlendSearch'):
    try:
        import ray
    except ImportError:
        return
    if method == 'BlendSearch':
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        # You can mix constants with search space objects.
        "max_depth": tune.randint(1, 8) if method in [
            "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight": tune.choice([1, 2, 3]),
        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1)
    }
    max_iter = 10
    for num_samples in [256]:
        time_budget_s = 60 #None
        for n_cpu in [8]:
            start_time = time.time()
            ray.init(num_cpus=n_cpu, num_gpus=0)
            if method == 'BlendSearch':
                analysis = tune.run(
                    train_breast_cancer,
                    init_config={
                        "max_depth": 1,
                        "min_child_weight": 3,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    report_intermediate_result=True,
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir='logs/',
                    num_samples=num_samples*n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True)
            else:
                if 'ASHA' == method:
                    algo = None
                elif 'BOHB' == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB
                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif 'Optuna' == method:
                    from ray.tune.suggest.optuna import OptunaSearch
                    algo = OptunaSearch()
                elif 'CFO' == method:
                    from flaml import CFO
                    algo = CFO(points_to_evaluate=[{
                        "max_depth": 1,
                        "min_child_weight": 3,
                    }], cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    })
                elif 'Dragonfly' == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch
                    algo = DragonflySearch()
                elif 'SkOpt' == method:
                    from ray.tune.suggest.skopt import SkOptSearch
                    algo = SkOptSearch()
                elif 'Nevergrad' == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng
                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif 'ZOOpt' == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch
                    algo = ZOOptSearch(budget=num_samples*n_cpu)
                elif 'Ax' == method:
                    from ray.tune.suggest.ax import AxSearch
                    algo = AxSearch()
                elif 'HyperOpt' == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch
                    algo = HyperOptSearch()
                    scheduler = None
                if method != 'BOHB':
                    from ray.tune.schedulers import ASHAScheduler
                    scheduler = ASHAScheduler(
                        max_t=max_iter,
                        grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space, local_dir='logs/',
                    num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
                    scheduler=scheduler, search_alg=algo)
            ray.shutdown()
            # # Load the best model checkpoint
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss","min","all")
            accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")
示例#21
0
def cifar10_main(method='BlendSearch',
                 num_samples=10,
                 max_num_epochs=100,
                 gpus_per_trial=2):
    data_dir = os.path.abspath("test/data")
    load_data(data_dir)  # Download data for all trials before starting the run
    if method == 'BlendSearch':
        from flaml import tune
    else:
        from ray import tune
    if method in ['BlendSearch', 'BOHB', 'Optuna']:
        config = {
            "l1": tune.randint(2, 8),
            "l2": tune.randint(2, 8),
            "lr": tune.loguniform(1e-4, 1e-1),
            "num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
            "batch_size": tune.randint(1, 4)  #tune.choice([2, 4, 8, 16])
        }
    else:
        config = {
            "l1": tune.randint(2, 9),
            "l2": tune.randint(2, 9),
            "lr": tune.loguniform(1e-4, 1e-1),
            "num_epochs": tune.qloguniform(1, max_num_epochs + 1, q=1),
            "batch_size": tune.randint(1, 5)  #tune.choice([2, 4, 8, 16])
        }
    import ray
    time_budget_s = 3600
    start_time = time.time()
    if method == 'BlendSearch':
        result = tune.run(ray.tune.with_parameters(train_cifar,
                                                   data_dir=data_dir),
                          init_config={
                              "l1": 2,
                              "l2": 2,
                              "num_epochs": 1,
                              "batch_size": 4,
                          },
                          metric="loss",
                          mode="min",
                          max_resource=max_num_epochs,
                          min_resource=1,
                          report_intermediate_result=True,
                          resources_per_trial={
                              "cpu": 2,
                              "gpu": gpus_per_trial
                          },
                          config=config,
                          local_dir='logs/',
                          num_samples=num_samples,
                          time_budget_s=time_budget_s,
                          use_ray=True)
    else:
        if 'ASHA' == method:
            algo = None
        elif 'BOHB' == method:
            from ray.tune.schedulers import HyperBandForBOHB
            from ray.tune.suggest.bohb import TuneBOHB
            algo = TuneBOHB()
            scheduler = HyperBandForBOHB(max_t=max_num_epochs)
        elif 'Optuna' == method:
            from ray.tune.suggest.optuna import OptunaSearch
            algo = OptunaSearch()
        elif 'CFO' == method:
            from flaml import CFO
            algo = CFO(points_to_evaluate=[{
                "l1": 2,
                "l2": 2,
                "num_epochs": 1,
                "batch_size": 4,
            }])
        elif 'Nevergrad' == method:
            from ray.tune.suggest.nevergrad import NevergradSearch
            import nevergrad as ng
            algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
        if method != 'BOHB':
            from ray.tune.schedulers import ASHAScheduler
            scheduler = ASHAScheduler(max_t=max_num_epochs, grace_period=1)
        result = tune.run(tune.with_parameters(train_cifar, data_dir=data_dir),
                          resources_per_trial={
                              "cpu": 2,
                              "gpu": gpus_per_trial
                          },
                          config=config,
                          metric="loss",
                          mode="min",
                          num_samples=num_samples,
                          time_budget_s=time_budget_s,
                          scheduler=scheduler,
                          search_alg=algo)
    ray.shutdown()

    logger.info(f"method={method}")
    logger.info(f"n_samples={num_samples}")
    logger.info(f"time={time.time()-start_time}")
    best_trial = result.get_best_trial("loss", "min", "all")
    logger.info("Best trial config: {}".format(best_trial.config))
    logger.info("Best trial final validation loss: {}".format(
        best_trial.metric_analysis["loss"]["min"]))
    logger.info("Best trial final validation accuracy: {}".format(
        best_trial.metric_analysis["accuracy"]["max"]))

    best_trained_model = Net(2**best_trial.config["l1"],
                             2**best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    test_acc = _test_accuracy(best_trained_model, device)
    logger.info("Best trial test set accuracy: {}".format(test_acc))
示例#22
0
    def _tune_run(self, config, resources_per_trial):
        """Wrapper to call ``tune.run``. Multiple estimators are generated when
        early stopping is possible, whereas a single estimator is
        generated when early stopping is not possible.

        Args:
            config (dict): Configurations such as hyperparameters to run
            ``tune.run`` on.
            resources_per_trial (dict): Resources to use per trial within Ray.
                Accepted keys are `cpu`, `gpu` and custom resources, and values
                are integers specifying the number of each resource to use.

        Returns:
            analysis (`ExperimentAnalysis`): Object returned by
                `tune.run`.

        """
        if self.seed is not None:
            random.seed(self.seed)
            np.random.seed(self.seed)

        trainable = _Trainable
        if self.pipeline_auto_early_stop and check_is_pipeline(
                self.estimator) and self.early_stopping:
            trainable = _PipelineTrainable

        max_iter = self.max_iters
        if self.early_stopping is not None:
            config["estimator_list"] = [
                clone(self.estimator) for _ in range(self.n_splits)
            ]
            if hasattr(self.early_stopping, "_max_t_attr"):
                # we want to delegate stopping to schedulers which
                # support it, but we want it to stop eventually, just in case
                # the solution is to make the stop condition very big
                max_iter = self.max_iters * 10
        else:
            config["estimator_list"] = [self.estimator]

        stopper = MaximumIterationStopper(max_iter=max_iter)
        if self.stopper:
            stopper = CombinedStopper(stopper, self.stopper)

        run_args = dict(scheduler=self.early_stopping,
                        reuse_actors=True,
                        verbose=self.verbose,
                        stop=stopper,
                        num_samples=self.n_trials,
                        config=config,
                        fail_fast="raise",
                        resources_per_trial=resources_per_trial,
                        local_dir=os.path.expanduser(self.local_dir),
                        loggers=self.loggers,
                        time_budget_s=self.time_budget_s)

        if self.search_optimization == "random":
            if isinstance(self.param_distributions, list):
                search_algo = RandomListSearcher(self.param_distributions)
            else:
                search_algo = BasicVariantGenerator()
            run_args["search_alg"] = search_algo
        else:
            search_space = None
            override_search_space = True
            if self._is_param_distributions_all_tune_domains():
                run_args["config"].update(self.param_distributions)
                override_search_space = False

            search_kwargs = self.search_kwargs.copy()
            search_kwargs.update(metric=self._metric_name, mode="max")

            if self.search_optimization == "bayesian":
                from ray.tune.suggest.skopt import SkOptSearch
                if override_search_space:
                    search_space = self.param_distributions
                search_algo = SkOptSearch(space=search_space, **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "bohb":
                from ray.tune.suggest.bohb import TuneBOHB
                if override_search_space:
                    search_space = self._get_bohb_config_space()
                if self.seed:
                    warnings.warn("'seed' is not implemented for BOHB.")
                search_algo = TuneBOHB(space=search_space, **search_kwargs)
                # search_algo = TuneBOHB(
                #     space=search_space, seed=self.seed, **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "optuna":
                from ray.tune.suggest.optuna import OptunaSearch
                from optuna.samplers import TPESampler
                sampler = TPESampler(seed=self.seed)
                if override_search_space:
                    search_space = self._get_optuna_params()
                search_algo = OptunaSearch(space=search_space,
                                           sampler=sampler,
                                           **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "hyperopt":
                from ray.tune.suggest.hyperopt import HyperOptSearch
                if override_search_space:
                    search_space = self._get_hyperopt_params()
                search_algo = HyperOptSearch(space=search_space,
                                             random_state_seed=self.seed,
                                             **search_kwargs)
                run_args["search_alg"] = search_algo

            else:
                # This should not happen as we validate the input before
                # this method. Still, just to be sure, raise an error here.
                raise ValueError(
                    f"Invalid search optimizer: {self.search_optimization}")

        if isinstance(self.n_jobs, int) and self.n_jobs > 0 \
           and not self.search_optimization == "random":
            search_algo = ConcurrencyLimiter(search_algo,
                                             max_concurrent=self.n_jobs)
            run_args["search_alg"] = search_algo

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",
                                    message="fail_fast='raise' "
                                    "detected.")
            analysis = tune.run(trainable, **run_args)
        return analysis
示例#23
0
    My_config.train_dataset_path = parser.parse_args().train_data_path
    My_config.validation_dataset_path = parser.parse_args().valid_data_path
    My_config.model = parser.parse_args().model
    My_config.train_batchsize
    for class_name in os.listdir(My_config.train_dataset_path):
        My_config.FolderNames2English_names[str(
            My_config.num_class)] = class_name
        My_config.num_class += 1
    My_config.class_num = My_config.num_class

    gpus_per_trial = 1
    sample_num = parser.parse_args().sample_num

    dim = random_dim_by_CompoundMethod(parser.parse_args().maxrange)

    algo = OptunaSearch(sampler=TPESampler())
    scheduler = AsyncHyperBandScheduler()

    search_space = {"dim": tune.choice(dim[0:len(dim)])}

    analysis = run(Train_Net,
                   name="Model_Scaling",
                   config=search_space,
                   num_samples=10,
                   metric="accuracy",
                   mode="max",
                   search_alg=algo,
                   scheduler=scheduler,
                   resources_per_trial={
                       "cpu": 48,
                       "gpu": gpus_per_trial
示例#24
0
def run_tuning(
    config: DictConfig,
    output_config_file: str,
    train_fn: Callable[[Dict[str, Any], Any, Any], None],
    config_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
    train: Any = None,
    val: Any = None,
):
    """Run distributed hyperparameter tuning using ray tune

    Uses Optuna TPE search algorithm and ASHA pruning strategy

    Args:
        config (omegaconf.DictConfig): The parsed configuration
        output_config_file (str): Path to save the optimal configuration that yields the best
            result
        train_fn (Callable[[Dict[str, Any], Any, Any], None]): Train function that takes the
            configuration as a python dict, train dataset and validation dataset and fits the
            model. This function is used to create the trainable that will run when calling
            ray.tune.run
        config_fn (Callable[[Dict[str, Any]], Dict[str, Any]]): Configuration function that
            constructs the search space by overriding entries in the input configuration
        train (Dataset): Torch dataset or corpus that will be used for training
        val (Dataset): Torch dataset or corpus that will be used for validation

    Returns:
        Dict[str, Any]: The configuration for the best trial

    Examples:
        >>> # Make search space
        >>> def configure_search_space(config):
        >>>     config["optimizer"] = tune.choice(["SGD", "Adam", "AdamW"])
        >>>     config["optim"]["lr"] = tune.loguniform(1e-4, 1e-1)
        >>>     config["optim"]["weight_decay"] = tune.loguniform(1e-4, 1e-1)
        >>>     config["data"]["batch_size"] = tune.choice([16, 32, 64, 128])
        >>>     return config
        >>> # Training function.
        >>> def train_fn(config, train=None, val=None):
        >>>     config = OmegaConf.create(config) # convert dict from ray tune to DictConfig
        >>>     ldm = PLDataModuleFromDatasets(train, val=val, seed=config.seed, no_test_set=True, **config.data)
        >>>     model = Net(**config.model)
        >>>     optimizer = getattr(optim, config.optimizer)(model.parameters(), **config.optim)
        >>>     criterion = nn.CrossEntropyLoss()
        >>>     lm = PLModule(
        >>>         model, optimizer, criterion,
        >>>         hparams=config,
        >>>         metrics={"acc": FromLogits(pl.metrics.classification.Accuracy())}, # Logs train_acc and val_acc
        >>>     )
        >>>     metrics_map = {"accuracy": "val_acc", "validation_loss": "val_loss"}  # map metrics from pl to ray tune
        >>>     trainer = make_trainer_for_ray_tune(metrics_map=metrics_map, **config.trainer)
        >>>     trainer.fit(lm, datamodule=ldm)
        >>> # Run optimization
        >>> if __name__ == "__main__":
        >>>     config, train_dataset, val_dataset = ...
        >>>     best_config = run_tuning(
        >>>         config,
        >>>         "configs/best.tuning.config.yml",
        >>>         train_fn,
        >>>         configure_search_space,
        >>>         train_dataset,
        >>>         val_dataset,
        >>>     )
    """
    config = _extract_wandb_config(config)
    cfg = config_fn(cast(Dict[str, Any], OmegaConf.to_container(config)))
    cfg["trainer"]["gpus"] = math.ceil(cfg["tune"]["gpus_per_trial"])
    trainable = tune.with_parameters(train_fn, train=train, val=val)
    metric, mode = cfg["tune"]["metric"], cfg["tune"]["mode"]

    analysis = tune.run(
        trainable,
        loggers=[
            WandbLogger
        ],  # WandbLogger logs experiment configurations and metrics reported via tune.report() to W&B Dashboard
        resources_per_trial={
            "cpu": cfg["tune"]["cpus_per_trial"],
            "gpu": cfg["tune"]["gpus_per_trial"],
        },
        config=cfg,
        max_failures=10,
        num_samples=cfg["tune"]["num_trials"],
        search_alg=OptunaSearch(metric=metric, mode=mode),
        metric=metric,
        mode=mode,
        # scheduler=tune.schedulers.ASHAScheduler(metric=metric, mode=mode, reduction_factor=2),
        name=f"{cfg['trainer']['experiment_name']}-tuning",
    )
    best_config = analysis.get_best_config(metric, mode)
    best_result = analysis.get_best_trial(metric=metric, mode=mode).last_result
    logger.info(f"Best hyperparameters found were: {best_config}")
    logger.info(f"Best score: {best_result[metric]}")

    best_config["tune"]["result"] = best_result

    yaml_dump(best_config, output_config_file)

    return best_config
示例#25
0
    def _tune_run(self, config, resources_per_trial):
        """Wrapper to call ``tune.run``. Multiple estimators are generated when
        early stopping is possible, whereas a single estimator is
        generated when early stopping is not possible.

        Args:
            config (dict): Configurations such as hyperparameters to run
            ``tune.run`` on.
            resources_per_trial (dict): Resources to use per trial within Ray.
                Accepted keys are `cpu`, `gpu` and custom resources, and values
                are integers specifying the number of each resource to use.

        Returns:
            analysis (`ExperimentAnalysis`): Object returned by
                `tune.run`.

        """
        stop_condition = {"training_iteration": self.max_iters}
        if self.early_stopping is not None:
            config["estimator_list"] = [
                clone(self.estimator) for _ in range(self.n_splits)
            ]
            if hasattr(self.early_stopping, "_max_t_attr"):
                # we want to delegate stopping to schedulers which
                # support it, but we want it to stop eventually, just in case
                # the solution is to make the stop condition very big
                stop_condition = {"training_iteration": self.max_iters * 10}
        else:
            config["estimator_list"] = [self.estimator]

        if self.search_optimization == "random":
            run_args = dict(scheduler=self.early_stopping,
                            reuse_actors=True,
                            verbose=self.verbose,
                            stop=stop_condition,
                            num_samples=self.num_samples,
                            config=config,
                            fail_fast=True,
                            resources_per_trial=resources_per_trial,
                            local_dir=os.path.expanduser(self.local_dir))

            if isinstance(self.param_distributions, list):
                run_args["search_alg"] = RandomListSearcher(
                    self.param_distributions)

            analysis = tune.run(_Trainable, **run_args)
            return analysis

        elif self.search_optimization == "bayesian":
            from skopt import Optimizer
            from ray.tune.suggest.skopt import SkOptSearch
            hyperparameter_names, spaces = self._get_skopt_params()
            search_algo = SkOptSearch(Optimizer(spaces),
                                      hyperparameter_names,
                                      metric="average_test_score",
                                      **self.search_kwargs)

        elif self.search_optimization == "bohb":
            from ray.tune.suggest.bohb import TuneBOHB
            config_space = self._get_bohb_config_space()
            search_algo = TuneBOHB(config_space,
                                   metric="average_test_score",
                                   mode="max",
                                   **self.search_kwargs)

        elif self.search_optimization == "optuna":
            from ray.tune.suggest.optuna import OptunaSearch
            config_space = self._get_optuna_params()
            search_algo = OptunaSearch(config_space,
                                       metric="average_test_score",
                                       mode="max",
                                       **self.search_kwargs)

        elif self.search_optimization == "hyperopt":
            from ray.tune.suggest.hyperopt import HyperOptSearch
            config_space = self._get_hyperopt_params()
            search_algo = HyperOptSearch(config_space,
                                         metric="average_test_score",
                                         mode="max",
                                         **self.search_kwargs)

        if isinstance(self.n_jobs, int) and self.n_jobs > 0:
            search_algo = ConcurrencyLimiter(search_algo,
                                             max_concurrent=self.n_jobs)

        analysis = tune.run(_Trainable,
                            search_alg=search_algo,
                            scheduler=self.early_stopping,
                            reuse_actors=True,
                            verbose=self.verbose,
                            stop=stop_condition,
                            num_samples=self.num_samples,
                            config=config,
                            fail_fast=True,
                            resources_per_trial=resources_per_trial,
                            local_dir=os.path.expanduser(self.local_dir))

        return analysis
示例#26
0
        # Feed the score back back to Tune.
        tune.report(iterations=step, mean_loss=intermediate_score)
        time.sleep(0.1)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(configure_logging=False)

    space = [
        param.suggest_uniform("width", 0, 20),
        param.suggest_uniform("height", -100, 100),
        # This is an ignored parameter.
        param.suggest_categorical("activation", ["relu", "tanh"])
    ]

    config = {
        "num_samples": 10 if args.smoke_test else 100,
        "config": {
            "steps": 100,
        }
    }
    algo = OptunaSearch(space, metric="mean_loss", mode="min")
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
    tune.run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
示例#27
0
    def testConvertOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch, param
        import optuna
        from optuna.samplers import RandomSampler

        # Grid search not supported, should raise ValueError
        with self.assertRaises(ValueError):
            OptunaSearch.convert_search_space(
                {"grid": tune.grid_search([0, 1])})

        config = {
            "a": tune.sample.Categorical([2, 3, 4]).uniform(),
            "b": {
                "x": tune.sample.Integer(0, 5).quantized(2),
                "y": 4,
                "z": tune.sample.Float(1e-4, 1e-2).loguniform()
            }
        }
        converted_config = OptunaSearch.convert_search_space(config)
        optuna_config = {
            "a": optuna.distributions.CategoricalDistribution([2, 3, 4]),
            "b": {
                "x": optuna.distributions.IntUniformDistribution(0, 5, step=2),
                "z": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)
            }
        }
        legacy_optuna_config = [
            param.suggest_categorical("a", [2, 3, 4]),
            param.suggest_int("b/x", 0, 5, 2),
            param.suggest_loguniform("b/z", 1e-4, 1e-2)
        ]

        sampler1 = RandomSampler(seed=1234)
        searcher1 = OptunaSearch(space=converted_config,
                                 sampler=sampler1,
                                 metric="a",
                                 mode="max")

        sampler2 = RandomSampler(seed=1234)
        searcher2 = OptunaSearch(space=optuna_config,
                                 sampler=sampler2,
                                 metric="a",
                                 mode="max")

        sampler3 = RandomSampler(seed=1234)
        searcher3 = OptunaSearch(space=legacy_optuna_config,
                                 sampler=sampler3,
                                 metric="a",
                                 mode="max")

        config1 = searcher1.suggest("0")
        config2 = searcher2.suggest("0")
        config3 = searcher3.suggest("0")

        self.assertEqual(config1, config2)
        self.assertEqual(config1, config3)
        self.assertIn(config1["a"], [2, 3, 4])
        self.assertIn(config1["b"]["x"], list(range(5)))
        self.assertLess(1e-4, config1["b"]["z"])
        self.assertLess(config1["b"]["z"], 1e-2)

        searcher = OptunaSearch(metric="a", mode="max")
        analysis = tune.run(_mock_objective,
                            config=config,
                            search_alg=searcher,
                            num_samples=1)
        trial = analysis.trials[0]
        assert trial.config["a"] in [2, 3, 4]

        mixed_config = {
            "a": tune.uniform(5, 6),
            "b": tune.uniform(8, 9)  # Cannot mix List and Dict
        }
        searcher = OptunaSearch(space=mixed_config, metric="a", mode="max")
        config = searcher.suggest("0")
        self.assertTrue(5 <= config["a"] <= 6)
        self.assertTrue(8 <= config["b"] <= 9)
示例#28
0
def main(args):
    model = args.model
    config_file = args.config_file
    log_dir = Path(f"./logs/{model.value}")
    log_dir.mkdir(exist_ok=True, parents=True)
    data_root = Path(f"./data/processed/{model.value}/IFCNetCore").absolute()

    with open("IFCNetCore_Classes.json", "r") as f:
        class_names = json.load(f)

    if model == Model.MVCNN:
        config = {
            "batch_size": 64,
            "learning_rate": tune.loguniform(1e-5, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "cnn_name": tune.choice(["vgg11", "resnet34", "resnet50"]),
            "pretrained": True,
            "epochs": 30,
            "num_views": 12
        }

        train_func = partial(train_mvcnn,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)
    elif model == Model.DGCNN:
        config = {
            "batch_size": 8,
            "learning_rate": tune.loguniform(1e-4, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "k": tune.choice([20, 30, 40]),
            "embedding_dim": tune.choice([516, 1024, 2048]),
            "dropout": 0.5,
            "epochs": 250
        }

        train_func = partial(train_dgcnn,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)
    elif model == Model.MeshNet:
        config = {
            "batch_size": tune.choice([8, 16, 32]),
            "learning_rate": tune.loguniform(1e-4, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "num_kernel": tune.choice([32, 64]),
            "sigma": tune.choice([0.1, 0.2, 0.3]),
            "aggregation_method": tune.choice(["Concat", "Max", "Average"]),
            "epochs": 150
        }

        train_func = partial(train_meshnet,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)

    if config_file:
        with config_file.open("r") as f:
            config = json.load(f)

    scheduler = ASHAScheduler(max_t=250, grace_period=10)

    reporter = CLIReporter(metric_columns=[
        "train_balanced_accuracy_score", "val_balanced_accuracy_score",
        "training_iteration"
    ])

    result = tune.run(
        train_func,
        resources_per_trial={
            "cpu": 8,
            "gpu": 1
        },
        local_dir=log_dir,
        config=config,
        mode="max",
        metric="val_balanced_accuracy_score",
        search_alg=OptunaSearch() if config_file is None else None,
        num_samples=20 if config_file is None else 1,
        scheduler=scheduler if config_file is None else None,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("val_balanced_accuracy_score", "max",
                                       "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation accuracy (balanced): {}".format(
        best_trial.last_result["val_balanced_accuracy_score"]))
示例#29
0
        # Feed the score back back to Tune.
        tune.report(iterations=step, mean_loss=intermediate_score)
        time.sleep(0.1)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(configure_logging=False)

    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if args.smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
示例#30
0
def _test_xgboost(method="BlendSearch"):
    try:
        import ray
    except ImportError:
        return
    if method == "BlendSearch":
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        "max_depth":
        tune.randint(1, 9)
        if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight":
        tune.choice([1, 2, 3]),
        "subsample":
        tune.uniform(0.5, 1.0),
        "eta":
        tune.loguniform(1e-4, 1e-1),
    }
    max_iter = 10
    for num_samples in [128]:
        time_budget_s = 60
        for n_cpu in [2]:
            start_time = time.time()
            # ray.init(address='auto')
            if method == "BlendSearch":
                analysis = tune.run(
                    train_breast_cancer,
                    config=search_space,
                    low_cost_partial_config={
                        "max_depth": 1,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    scheduler="asha",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True,
                )
            else:
                if "ASHA" == method:
                    algo = None
                elif "BOHB" == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB

                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif "Optuna" == method:
                    from ray.tune.suggest.optuna import OptunaSearch

                    algo = OptunaSearch()
                elif "CFO" == method:
                    from flaml import CFO

                    algo = CFO(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "CFOCat" == method:
                    from flaml.searcher.cfo_cat import CFOCat

                    algo = CFOCat(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "Dragonfly" == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch

                    algo = DragonflySearch()
                elif "SkOpt" == method:
                    from ray.tune.suggest.skopt import SkOptSearch

                    algo = SkOptSearch()
                elif "Nevergrad" == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng

                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif "ZOOpt" == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch

                    algo = ZOOptSearch(budget=num_samples * n_cpu)
                elif "Ax" == method:
                    from ray.tune.suggest.ax import AxSearch

                    algo = AxSearch()
                elif "HyperOpt" == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch

                    algo = HyperOptSearch()
                    scheduler = None
                if method != "BOHB":
                    from ray.tune.schedulers import ASHAScheduler

                    scheduler = ASHAScheduler(max_t=max_iter, grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    scheduler=scheduler,
                    search_alg=algo,
                )
            # # Load the best model checkpoint
            # import os
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss", "min", "all")
            accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")