Exemplo n.º 1
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch

        searcher = OptunaSearch(space=self.config, metric=self.metric_name, mode="max")

        self._save(searcher)

        searcher = OptunaSearch(space=self.config, metric=self.metric_name, mode="max")
        self._restore(searcher)
Exemplo n.º 2
0
    def set_basic_conf(self):
        from optuna.samplers import TPESampler
        space = OptunaSearch.convert_search_space({
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100)
        })

        def cost(space, reporter):
            reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3))

        search_alg = OptunaSearch(
            space, sampler=TPESampler(seed=10), metric="loss", mode="min")
        return search_alg, cost
Exemplo n.º 3
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.trial import TrialState

        searcher = OptunaSearch(space=self.space,
                                metric="metric",
                                mode="max",
                                points_to_evaluate=[{
                                    self.param_name:
                                    self.valid_value
                                }],
                                evaluated_rewards=[1.0])

        self.assertGreater(len(searcher._ot_study.trials), 0)

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
        )

        point = {
            self.param_name: self.valid_value,
        }

        self.assertEqual(len(searcher._ot_study.trials), 0)

        searcher.add_evaluated_point(point,
                                     1.0,
                                     intermediate_values=[0.8, 0.9])
        self.assertEqual(len(searcher._ot_study.trials), 1)
        self.assertTrue(
            searcher._ot_study.trials[-1].state == TrialState.COMPLETE)

        searcher.add_evaluated_point(point,
                                     1.0,
                                     intermediate_values=[0.8, 0.9],
                                     error=True)
        self.assertEqual(len(searcher._ot_study.trials), 2)
        self.assertTrue(searcher._ot_study.trials[-1].state == TrialState.FAIL)

        searcher.add_evaluated_point(point,
                                     1.0,
                                     intermediate_values=[0.8, 0.9],
                                     pruned=True)
        self.assertEqual(len(searcher._ot_study.trials), 3)
        self.assertTrue(
            searcher._ot_study.trials[-1].state == TrialState.PRUNED)
Exemplo n.º 4
0
def tune_fn():
    mlflow.set_experiment(experiment_name=experiment_name)

    optuna_search = OptunaSearch(metric="auroc", mode="max")

    ax_search = AxSearch(metric="auroc", mode="max")

    tune.run(objective,
             name="mlflow_gbdt",
             num_samples=65,
             config={
                 "num_leaves": tune.randint(5, 95),
                 "learning_rate": tune.loguniform(1e-4, 1.0),
                 "n_estimators": tune.randint(100, 100000),
                 "subsample": tune.loguniform(0.01, 1.0),
                 "subsample_freq": tune.randint(1, 5),
                 "objective": "binary",
                 "reg_alpha": tune.loguniform(1e-4, 1.0),
                 "reg_lambda": tune.loguniform(1e-4, 1.0),
                 "tree_learner": "feature",
                 "feature_sel": 0,
                 "mlflow": {
                     "experiment_name": experiment_name,
                     "tracking_uri": mlflow.get_tracking_uri()
                 }
             },
             search_alg=optuna_search)
Exemplo n.º 5
0
def set_algorithm(experiment_name, config):
    '''
    Configure search algorithm.
    '''
    if args.algorithm == 'hyperopt':
        algorithm = HyperOptSearch(points_to_evaluate=best_params)
    elif args.algorithm == 'ax':
        ax_client = AxClient(enforce_sequential_optimization=False)
        ax_client.create_experiment(name=experiment_name,
                                    parameters=config,
                                    objective_name="minimum",
                                    minimize=True)
        algorithm = AxSearch(ax_client=ax_client,
                             points_to_evaluate=best_params)
    elif args.algorithm == 'nevergrad':
        algorithm = NevergradSearch(
            points_to_evaluate=best_params,
            optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"])
    elif args.algorithm == 'optuna':
        algorithm = OptunaSearch(points_to_evaluate=best_params,
                                 seed=args.seed)
    elif args.algorithm == 'pbt':
        algorithm = PopulationBasedTraining(
            time_attr="training_iteration",
            perturbation_interval=args.perturbation,
            hyperparam_mutations=config,
            synch=True)
    elif args.algorithm == 'random':
        algorithm = BasicVariantGenerator(max_concurrent=args.jobs)
    if args.algorithm not in ['random', 'pbt']:
        algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs)
    return algorithm
Exemplo n.º 6
0
 def set_search_properties(
     self,
     metric: Optional[str] = None,
     mode: Optional[str] = None,
     config: Optional[Dict] = None,
     setting: Optional[Dict] = None,
 ) -> bool:
     metric_changed = mode_changed = False
     if metric and self._metric != metric:
         metric_changed = True
         self._metric = metric
         if self._metric_constraints:
             # metric modified by lagrange
             metric += self.lagrange
             # TODO: don't change metric for global search methods that
             # can handle constraints already
     if mode and self._mode != mode:
         mode_changed = True
         self._mode = mode
     if not self._ls.space:
         # the search space can be set only once
         if self._gs is not None:
             # define-by-run is not supported via set_search_properties
             self._gs.set_search_properties(metric, mode, config)
             self._gs.space = config
         if config:
             add_cost_to_space(config, self._ls.init_config,
                               self._cat_hp_cost)
         self._ls.set_search_properties(metric, mode, config)
         self._init_search()
     else:
         if metric_changed or mode_changed:
             # reset search when metric or mode changed
             self._ls.set_search_properties(metric, mode)
             if self._gs is not None:
                 self._gs = GlobalSearch(
                     space=self._gs._space,
                     metric=metric,
                     mode=mode,
                     sampler=self._gs._sampler,
                 )
                 self._gs.space = self._ls.space
             self._init_search()
     if setting:
         # CFO doesn't need these settings
         if "time_budget_s" in setting:
             self._time_budget_s = setting[
                 "time_budget_s"]  # budget from now
             now = time.time()
             self._time_used += now - self._start_time
             self._start_time = now
             self._set_deadline()
         if "metric_target" in setting:
             self._metric_target = setting.get("metric_target")
         if "num_samples" in setting:
             self._num_samples = (setting["num_samples"] +
                                  len(self._result) +
                                  len(self._trial_proposed_by))
     return True
Exemplo n.º 7
0
    def set_basic_conf(self):
        from optuna.samplers import TPESampler
        space = [
            ot_param.suggest_uniform("width", 0, 20),
            ot_param.suggest_uniform("height", -100, 100)
        ]

        def cost(space, reporter):
            reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3))

        search_alg = OptunaSearch(
            space, sampler=TPESampler(seed=10), metric="loss", mode="min")
        return search_alg, cost
Exemplo n.º 8
0
def run_tune(method: str, num_samples: int) -> tune.ExperimentAnalysis:
    optuna_search = OptunaSearch(metric="mean_accuracy", mode="max")
    return tune.run(
        trial,
        config=methods[method],
        num_samples=num_samples,
        search_alg=optuna_search,
        # resources_per_trial={"gpu": 1, "cpu": 16}, # Using the GPU makes the search process a lot slower on my machine
        verbose=1,
        metric=
        "mean_accuracy",  # otherwise I cannot get the dataframe in the end for some reason
        mode="max"  # See above
    )
Exemplo n.º 9
0
def init_search_algorithm(search_alg, metric=None, mode=None):
    """Specify a search algorithm and you must pip install it first.
    See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html
    """
    if search_alg == 'optuna':
        assert metric and mode, "Metric and mode cannot be None for optuna."
        from ray.tune.suggest.optuna import OptunaSearch
        return OptunaSearch(metric=metric, mode=mode)
    elif search_alg == 'bayesopt':
        assert metric and mode, "Metric and mode cannot be None for bayesian optimization."
        from ray.tune.suggest.bayesopt import BayesOptSearch
        return BayesOptSearch(metric=metric, mode=mode)
    logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
Exemplo n.º 10
0
    def testConvergenceOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch

        np.random.seed(1)
        searcher = OptunaSearch(seed=1)
        analysis = self._testConvergence(
            searcher,
            top=5,
        )

        # This assertion is much weaker than in the BO case, but TPE
        # don't converge too close. It is still unlikely to get to this
        # tolerance with random search (5 * 0.1 = 0.5% chance)
        assert len(analysis.trials) < 100
        assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-1)
Exemplo n.º 11
0
    def testConvertOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch, param
        from optuna.samplers import RandomSampler

        config = {
            "a": tune.sample.Categorical([2, 3, 4]).uniform(),
            "b": {
                "x": tune.sample.Integer(0, 5).quantized(2),
                "y": 4,
                "z": tune.sample.Float(1e-4, 1e-2).loguniform()
            }
        }
        converted_config = OptunaSearch.convert_search_space(config)
        optuna_config = [
            param.suggest_categorical("a", [2, 3, 4]),
            param.suggest_int("b/x", 0, 5, 2),
            param.suggest_loguniform("b/z", 1e-4, 1e-2)
        ]

        sampler1 = RandomSampler(seed=1234)
        searcher1 = OptunaSearch(space=converted_config,
                                 sampler=sampler1,
                                 base_config=config)

        sampler2 = RandomSampler(seed=1234)
        searcher2 = OptunaSearch(space=optuna_config,
                                 sampler=sampler2,
                                 base_config=config)

        config1 = searcher1.suggest("0")
        config2 = searcher2.suggest("0")

        self.assertEqual(config1, config2)
        self.assertIn(config1["a"], [2, 3, 4])
        self.assertIn(config1["b"]["x"], list(range(5)))
        self.assertEqual(config1["b"]["y"], 4)
        self.assertLess(1e-4, config1["b"]["z"])
        self.assertLess(config1["b"]["z"], 1e-2)

        searcher = OptunaSearch(metric="a", mode="max")
        analysis = tune.run(_mock_objective,
                            config=config,
                            search_alg=searcher,
                            num_samples=1)
        trial = analysis.trials[0]
        assert trial.config["a"] in [2, 3, 4]
Exemplo n.º 12
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch(
        space=define_by_run_func, metric="mean_loss", mode="min")
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
    )

    print("Best hyperparameters found were: ", analysis.best_config)
Exemplo n.º 13
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.samplers import RandomSampler

        np.random.seed(1000)  # At least one nan, inf, -inf and float

        out = tune.run(
            _invalid_objective,
            search_alg=OptunaSearch(sampler=RandomSampler(seed=1234)),
            config=self.config,
            mode="max",
            num_samples=8,
            reuse_actors=False)

        best_trial = out.best_trial
        self.assertLessEqual(best_trial.config["report"], 2.0)
Exemplo n.º 14
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch(metric=["loss", "gain"], mode=["min", "max"])
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    analysis = tune.run(
        easy_objective,
        search_alg=algo,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters for loss found were: ",
          analysis.get_best_config("loss", "min"))
    print("Best hyperparameters for gain found were: ",
          analysis.get_best_config("gain", "max"))
Exemplo n.º 15
0
def init_search_algorithm(search_alg, metric=None, mode=None):
    """Specify a search algorithm and you must pip install it first.
    If no search algorithm is specified, the default search algorithm is BasicVariantGenerator.
    See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html

    Args:
        search_alg (str): One of 'basic_variant', 'bayesopt', or 'optuna'.
        metric (str): The metric to monitor for early stopping.
        mode (str): One of 'min' or 'max' to determine whether to minimize or maximize the metric.
    """
    if search_alg == 'optuna':
        assert metric and mode, "Metric and mode cannot be None for optuna."
        from ray.tune.suggest.optuna import OptunaSearch
        return OptunaSearch(metric=metric, mode=mode)
    elif search_alg == 'bayesopt':
        assert metric and mode, "Metric and mode cannot be None for bayesian optimization."
        from ray.tune.suggest.bayesopt import BayesOptSearch
        return BayesOptSearch(metric=metric, mode=mode)
    logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
Exemplo n.º 16
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters found were: ", analysis.best_config)
Exemplo n.º 17
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.samplers import RandomSampler

        np.random.seed(1000)

        out = tune.run(_multi_objective,
                       search_alg=OptunaSearch(
                           sampler=RandomSampler(seed=1234),
                           metric=["a", "b", "c"],
                           mode=["max", "min", "max"],
                       ),
                       config=self.config,
                       num_samples=16,
                       reuse_actors=False)

        best_trial_a = out.get_best_trial("a", "max")
        self.assertGreaterEqual(best_trial_a.config["a"], 0.8)
        best_trial_b = out.get_best_trial("b", "min")
        self.assertGreaterEqual(best_trial_b.config["b"], 0.8)
        best_trial_c = out.get_best_trial("c", "max")
        self.assertGreaterEqual(best_trial_c.config["c"], 0.8)
Exemplo n.º 18
0
class BlendSearch(Searcher):
    '''class for BlendSearch algorithm
    '''
    def __init__(self,
                 metric: Optional[str] = None,
                 mode: Optional[str] = None,
                 space: Optional[dict] = None,
                 points_to_evaluate: Optional[List[Dict]] = None,
                 cat_hp_cost: Optional[dict] = None,
                 prune_attr: Optional[str] = None,
                 min_resource: Optional[float] = None,
                 max_resource: Optional[float] = None,
                 reduction_factor: Optional[float] = None,
                 resources_per_trial: Optional[dict] = None,
                 global_search_alg: Optional[Searcher] = None,
                 mem_size=None):
        '''Constructor

        Args:
            metric: A string of the metric name to optimize for.
                minimization or maximization.
            mode: A string in ['min', 'max'] to specify the objective as
            space: A dictionary to specify the search space.
            points_to_evaluate: Initial parameter suggestions to be run first. 
                The first element needs to be a dictionary from a subset of 
                controlled dimensions to the initial low-cost values. 
                e.g.,
                
                .. code-block:: python
                
                    [{'epochs': 1}]

            cat_hp_cost: A dictionary from a subset of categorical dimensions
                to the relative cost of each choice. 
                e.g.,
                
                .. code-block:: python

                    {'tree_method': [1, 1, 2]}
                
                i.e., the relative cost of the 
                three choices of 'tree_method' is 1, 1 and 2 respectively.
            prune_attr: A string of the attribute used for pruning. 
                Not necessarily in space.
                When prune_attr is in space, it is a hyperparameter, e.g., 
                    'n_iters', and the best value is unknown.
                When prune_attr is not in space, it is a resource dimension, 
                    e.g., 'sample_size', and the peak performance is assumed
                    to be at the max_resource.
            min_resource: A float of the minimal resource to use for the 
                prune_attr; only valid if prune_attr is not in space.
            max_resource: A float of the maximal resource to use for the 
                prune_attr; only valid if prune_attr is not in space.
            reduction_factor: A float of the reduction factor used for
                incremental pruning.
            resources_per_trial: A dictionary of the resources permitted per
                trial, such as 'mem'.
            global_search_alg: A Searcher instance as the global search
                instance. If omitted, Optuna is used. The following algos have
                known issues when used as global_search_alg:
                - HyperOptSearch raises exception sometimes
                - TuneBOHB has its own scheduler
            mem_size: A function to estimate the memory size for a given config.
        '''
        self._metric, self._mode = metric, mode
        if points_to_evaluate: init_config = points_to_evaluate[0]
        else: init_config = {}
        self._points_to_evaluate = points_to_evaluate
        if global_search_alg is not None:
            self._gs = global_search_alg
        elif getattr(self, '__name__', None) != 'CFO':
            self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
        else:
            self._gs = None
        self._ls = LocalSearch(init_config, metric, mode, cat_hp_cost, space,
                               prune_attr, min_resource, max_resource,
                               reduction_factor)
        self._resources_per_trial = resources_per_trial
        self._mem_size = mem_size
        self._mem_threshold = resources_per_trial.get(
            'mem') if resources_per_trial else None
        self._init_search()

    def set_search_properties(self,
                              metric: Optional[str] = None,
                              mode: Optional[str] = None,
                              config: Optional[Dict] = None) -> bool:
        if self._ls.space:
            if 'time_budget_s' in config:
                self._deadline = config.get('time_budget_s') + time.time()
            if 'metric_target' in config:
                self._metric_target = config.get('metric_target')
        else:
            if metric: self._metric = metric
            if mode: self._mode = mode
            self._ls.set_search_properties(metric, mode, config)
            if self._gs is not None:
                self._gs.set_search_properties(metric, mode, config)
            self._init_search()
        return True

    def _init_search(self):
        '''initialize the search
        '''
        self._metric_target = np.inf * self._ls.metric_op
        self._search_thread_pool = {
            # id: int -> thread: SearchThread
            0: SearchThread(self._ls.mode, self._gs)
        }
        self._thread_count = 1  # total # threads created
        self._init_used = self._ls.init_config is None
        self._trial_proposed_by = {}  # trial_id: str -> thread_id: int
        self._admissible_min = self._ls.normalize(self._ls.init_config)
        self._admissible_max = self._admissible_min.copy()
        self._result = {}  # config_signature: tuple -> result: Dict
        self._deadline = np.inf

    def save(self, checkpoint_path: str):
        save_object = (self._metric_target, self._search_thread_pool,
                       self._thread_count, self._init_used,
                       self._trial_proposed_by, self._admissible_min,
                       self._admissible_max, self._result, self._deadline)
        with open(checkpoint_path, "wb") as outputFile:
            pickle.dump(save_object, outputFile)

    def restore(self, checkpoint_path: str):
        with open(checkpoint_path, "rb") as inputFile:
            save_object = pickle.load(inputFile)
        self._metric_target, self._search_thread_pool, \
            self._thread_count, self._init_used, self._trial_proposed_by, \
            self._admissible_min, self._admissible_max, self._result, \
            self._deadline = save_object

    def restore_from_dir(self, checkpoint_dir: str):
        super.restore_from_dir(checkpoint_dir)

    def on_trial_complete(self,
                          trial_id: str,
                          result: Optional[Dict] = None,
                          error: bool = False):
        ''' search thread updater and cleaner
        '''
        thread_id = self._trial_proposed_by.get(trial_id)
        if thread_id in self._search_thread_pool:
            self._search_thread_pool[thread_id].on_trial_complete(
                trial_id, result, error)
            del self._trial_proposed_by[trial_id]
            # if not thread_id: logger.info(f"result {result}")
        if result:
            config = {}
            for key, value in result.items():
                if key.startswith('config/'):
                    config[key[7:]] = value
            if error:  # remove from result cache
                del self._result[self._ls.config_signature(config)]
            else:  # add to result cache
                self._result[self._ls.config_signature(config)] = result
            # update target metric if improved
            if (result[self._metric] -
                    self._metric_target) * self._ls.metric_op < 0:
                self._metric_target = result[self._metric]
            if thread_id:  # from local search
                # update admissible region
                normalized_config = self._ls.normalize(config)
                for key in self._admissible_min:
                    value = normalized_config[key]
                    if value > self._admissible_max[key]:
                        self._admissible_max[key] = value
                    elif value < self._admissible_min[key]:
                        self._admissible_min[key] = value
            elif self._create_condition(result):
                # thread creator
                self._search_thread_pool[self._thread_count] = SearchThread(
                    self._ls.mode,
                    self._ls.create(config,
                                    result[self._metric],
                                    cost=result["time_total_s"]))
                thread_id = self._thread_count
                self._thread_count += 1

        # cleaner
        # logger.info(f"thread {thread_id} in search thread pool="
        #     f"{thread_id in self._search_thread_pool}")
        if thread_id and thread_id in self._search_thread_pool:
            # local search thread
            self._clean(thread_id)

    def _create_condition(self, result: Dict) -> bool:
        ''' create thread condition
        '''
        if len(self._search_thread_pool) < 2: return True
        obj_median = np.median([
            thread.obj_best1
            for id, thread in self._search_thread_pool.items() if id
        ])
        return result[self._metric] * self._ls.metric_op < obj_median

    def _clean(self, thread_id: int):
        ''' delete thread and increase admissible region if converged,
        merge local threads if they are close
        '''
        assert thread_id
        todelete = set()
        for id in self._search_thread_pool:
            if id and id != thread_id:
                if self._inferior(id, thread_id):
                    todelete.add(id)
        for id in self._search_thread_pool:
            if id and id != thread_id:
                if self._inferior(thread_id, id):
                    todelete.add(thread_id)
                    break
        # logger.info(f"thead {thread_id}.converged="
        #     f"{self._search_thread_pool[thread_id].converged}")
        if self._search_thread_pool[thread_id].converged:
            todelete.add(thread_id)
            for key in self._admissible_min:
                self._admissible_max[key] += self._ls.STEPSIZE
                self._admissible_min[key] -= self._ls.STEPSIZE
        for id in todelete:
            del self._search_thread_pool[id]

    def _inferior(self, id1: int, id2: int) -> bool:
        ''' whether thread id1 is inferior to id2
        '''
        t1 = self._search_thread_pool[id1]
        t2 = self._search_thread_pool[id2]
        if t1.obj_best1 < t2.obj_best2: return False
        elif t1.resource and t1.resource < t2.resource: return False
        elif t2.reach(t1): return True
        else: return False

    def on_trial_result(self, trial_id: str, result: Dict):
        if trial_id not in self._trial_proposed_by: return
        thread_id = self._trial_proposed_by[trial_id]
        if not thread_id in self._search_thread_pool: return
        self._search_thread_pool[thread_id].on_trial_result(trial_id, result)

    def suggest(self, trial_id: str) -> Optional[Dict]:
        ''' choose thread, suggest a valid config
        '''
        if self._init_used and not self._points_to_evaluate:
            choice, backup = self._select_thread()
            # logger.debug(f"choice={choice}, backup={backup}")
            if choice < 0: return None  # timeout
            self._use_rs = False
            config = self._search_thread_pool[choice].suggest(trial_id)
            skip = self._should_skip(choice, trial_id, config)
            if skip:
                if choice:
                    # logger.info(f"skipping choice={choice}, config={config}")
                    return None
                # use rs
                self._use_rs = True
                for _, generated in generate_variants(
                    {'config': self._ls.space}):
                    config = generated['config']
                    break
                # logger.debug(f"random config {config}")
                skip = self._should_skip(choice, trial_id, config)
                if skip: return None
            # if not choice: logger.info(config)
            if choice or backup == choice or self._valid(config):
                # LS or valid or no backup choice
                self._trial_proposed_by[trial_id] = choice
            else:  # invalid config proposed by GS
                if not self._use_rs:
                    self._search_thread_pool[choice].on_trial_complete(
                        trial_id, {}, error=True)  # tell GS there is an error
                self._use_rs = False
                config = self._search_thread_pool[backup].suggest(trial_id)
                skip = self._should_skip(backup, trial_id, config)
                if skip:
                    return None
                self._trial_proposed_by[trial_id] = backup
                choice = backup
            # if choice: self._pending.add(choice) # local search thread pending
            if not choice:
                if self._ls._resource:
                    # TODO: add resource to config proposed by GS, min or median?
                    config[self._ls.prune_attr] = self._ls.min_resource
            self._result[self._ls.config_signature(config)] = {}
        else:  # use init config
            init_config = self._points_to_evaluate.pop(
                0) if self._points_to_evaluate else self._ls.init_config
            config = self._ls.complete_config(init_config,
                                              self._admissible_min,
                                              self._admissible_max)
            # logger.info(f"reset config to {config}")
            config_signature = self._ls.config_signature(config)
            result = self._result.get(config_signature)
            if result:  # tried before
                # self.on_trial_complete(trial_id, result)
                return None
            elif result is None:  # not tried before
                self._result[config_signature] = {}
            else:
                return None  # running but no result yet
            self._init_used = True
        # logger.info(f"config={config}")
        return config

    def _should_skip(self, choice, trial_id, config) -> bool:
        ''' if config is None or config's result is known or above mem threshold
            return True; o.w. return False
        '''
        if config is None: return True
        config_signature = self._ls.config_signature(config)
        exists = config_signature in self._result
        # check mem constraint
        if not exists and self._mem_threshold and self._mem_size(
                config) > self._mem_threshold:
            self._result[config_signature] = {
                self._metric: np.inf * self._ls.metric_op,
                'time_total_s': 1
            }
            exists = True
        if exists:
            if not self._use_rs:
                result = self._result.get(config_signature)
                if result:
                    self._search_thread_pool[choice].on_trial_complete(
                        trial_id, result, error=False)
                    if choice:
                        # local search thread
                        self._clean(choice)
                else:
                    # tell the thread there is an error
                    self._search_thread_pool[choice].on_trial_complete(
                        trial_id, {}, error=True)
            return True
        return False

    def _select_thread(self) -> Tuple:
        ''' thread selector; use can_suggest to check LS availability
        '''
        # update priority
        min_eci = self._deadline - time.time()
        if min_eci <= 0: return -1, -1
        max_speed = 0
        for thread in self._search_thread_pool.values():
            if thread.speed > max_speed: max_speed = thread.speed
        for thread in self._search_thread_pool.values():
            thread.update_eci(self._metric_target, max_speed)
            if thread.eci < min_eci: min_eci = thread.eci
        for thread in self._search_thread_pool.values():
            thread.update_priority(min_eci)

        top_thread_id = backup_thread_id = 0
        priority1 = priority2 = self._search_thread_pool[0].priority
        # logger.debug(f"priority of thread 0={priority1}")
        for thread_id, thread in self._search_thread_pool.items():
            # if thread_id:
            #     logger.debug(
            #         f"priority of thread {thread_id}={thread.priority}")
            #     logger.debug(
            #         f"thread {thread_id}.can_suggest={thread.can_suggest}")
            if thread_id and thread.can_suggest:
                priority = thread.priority
                if priority > priority1:
                    priority1 = priority
                    top_thread_id = thread_id
                if priority > priority2 or backup_thread_id == 0:
                    priority2 = priority
                    backup_thread_id = thread_id
        return top_thread_id, backup_thread_id

    def _valid(self, config: Dict) -> bool:
        ''' config validator
        '''
        for key in self._admissible_min:
            if key in config:
                value = config[key]
                # logger.info(
                #     f"{key},{value},{self._admissible_min[key]},{self._admissible_max[key]}")
                if value < self._admissible_min[
                        key] or value > self._admissible_max[key]:
                    return False
        return True
Exemplo n.º 19
0
def _test_xgboost(method='BlendSearch'):
    try:
        import ray
    except ImportError:
        return
    if method == 'BlendSearch':
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        # You can mix constants with search space objects.
        "max_depth": tune.randint(1, 8) if method in [
            "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight": tune.choice([1, 2, 3]),
        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1)
    }
    max_iter = 10
    for num_samples in [256]:
        time_budget_s = 60 #None
        for n_cpu in [8]:
            start_time = time.time()
            ray.init(num_cpus=n_cpu, num_gpus=0)
            if method == 'BlendSearch':
                analysis = tune.run(
                    train_breast_cancer,
                    init_config={
                        "max_depth": 1,
                        "min_child_weight": 3,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    report_intermediate_result=True,
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir='logs/',
                    num_samples=num_samples*n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True)
            else:
                if 'ASHA' == method:
                    algo = None
                elif 'BOHB' == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB
                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif 'Optuna' == method:
                    from ray.tune.suggest.optuna import OptunaSearch
                    algo = OptunaSearch()
                elif 'CFO' == method:
                    from flaml import CFO
                    algo = CFO(points_to_evaluate=[{
                        "max_depth": 1,
                        "min_child_weight": 3,
                    }], cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    })
                elif 'Dragonfly' == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch
                    algo = DragonflySearch()
                elif 'SkOpt' == method:
                    from ray.tune.suggest.skopt import SkOptSearch
                    algo = SkOptSearch()
                elif 'Nevergrad' == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng
                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif 'ZOOpt' == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch
                    algo = ZOOptSearch(budget=num_samples*n_cpu)
                elif 'Ax' == method:
                    from ray.tune.suggest.ax import AxSearch
                    algo = AxSearch()
                elif 'HyperOpt' == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch
                    algo = HyperOptSearch()
                    scheduler = None
                if method != 'BOHB':
                    from ray.tune.schedulers import ASHAScheduler
                    scheduler = ASHAScheduler(
                        max_t=max_iter,
                        grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space, local_dir='logs/',
                    num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
                    scheduler=scheduler, search_alg=algo)
            ray.shutdown()
            # # Load the best model checkpoint
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss","min","all")
            accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")
Exemplo n.º 20
0
def cifar10_main(method='BlendSearch',
                 num_samples=10,
                 max_num_epochs=100,
                 gpus_per_trial=2):
    data_dir = os.path.abspath("test/data")
    load_data(data_dir)  # Download data for all trials before starting the run
    if method == 'BlendSearch':
        from flaml import tune
    else:
        from ray import tune
    if method in ['BlendSearch', 'BOHB', 'Optuna']:
        config = {
            "l1": tune.randint(2, 8),
            "l2": tune.randint(2, 8),
            "lr": tune.loguniform(1e-4, 1e-1),
            "num_epochs": tune.qloguniform(1, max_num_epochs, q=1),
            "batch_size": tune.randint(1, 4)  #tune.choice([2, 4, 8, 16])
        }
    else:
        config = {
            "l1": tune.randint(2, 9),
            "l2": tune.randint(2, 9),
            "lr": tune.loguniform(1e-4, 1e-1),
            "num_epochs": tune.qloguniform(1, max_num_epochs + 1, q=1),
            "batch_size": tune.randint(1, 5)  #tune.choice([2, 4, 8, 16])
        }
    import ray
    time_budget_s = 3600
    start_time = time.time()
    if method == 'BlendSearch':
        result = tune.run(ray.tune.with_parameters(train_cifar,
                                                   data_dir=data_dir),
                          init_config={
                              "l1": 2,
                              "l2": 2,
                              "num_epochs": 1,
                              "batch_size": 4,
                          },
                          metric="loss",
                          mode="min",
                          max_resource=max_num_epochs,
                          min_resource=1,
                          report_intermediate_result=True,
                          resources_per_trial={
                              "cpu": 2,
                              "gpu": gpus_per_trial
                          },
                          config=config,
                          local_dir='logs/',
                          num_samples=num_samples,
                          time_budget_s=time_budget_s,
                          use_ray=True)
    else:
        if 'ASHA' == method:
            algo = None
        elif 'BOHB' == method:
            from ray.tune.schedulers import HyperBandForBOHB
            from ray.tune.suggest.bohb import TuneBOHB
            algo = TuneBOHB()
            scheduler = HyperBandForBOHB(max_t=max_num_epochs)
        elif 'Optuna' == method:
            from ray.tune.suggest.optuna import OptunaSearch
            algo = OptunaSearch()
        elif 'CFO' == method:
            from flaml import CFO
            algo = CFO(points_to_evaluate=[{
                "l1": 2,
                "l2": 2,
                "num_epochs": 1,
                "batch_size": 4,
            }])
        elif 'Nevergrad' == method:
            from ray.tune.suggest.nevergrad import NevergradSearch
            import nevergrad as ng
            algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
        if method != 'BOHB':
            from ray.tune.schedulers import ASHAScheduler
            scheduler = ASHAScheduler(max_t=max_num_epochs, grace_period=1)
        result = tune.run(tune.with_parameters(train_cifar, data_dir=data_dir),
                          resources_per_trial={
                              "cpu": 2,
                              "gpu": gpus_per_trial
                          },
                          config=config,
                          metric="loss",
                          mode="min",
                          num_samples=num_samples,
                          time_budget_s=time_budget_s,
                          scheduler=scheduler,
                          search_alg=algo)
    ray.shutdown()

    logger.info(f"method={method}")
    logger.info(f"n_samples={num_samples}")
    logger.info(f"time={time.time()-start_time}")
    best_trial = result.get_best_trial("loss", "min", "all")
    logger.info("Best trial config: {}".format(best_trial.config))
    logger.info("Best trial final validation loss: {}".format(
        best_trial.metric_analysis["loss"]["min"]))
    logger.info("Best trial final validation accuracy: {}".format(
        best_trial.metric_analysis["accuracy"]["max"]))

    best_trained_model = Net(2**best_trial.config["l1"],
                             2**best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    test_acc = _test_accuracy(best_trained_model, device)
    logger.info("Best trial test set accuracy: {}".format(test_acc))
Exemplo n.º 21
0
    def _tune_run(self, config, resources_per_trial):
        """Wrapper to call ``tune.run``. Multiple estimators are generated when
        early stopping is possible, whereas a single estimator is
        generated when early stopping is not possible.

        Args:
            config (dict): Configurations such as hyperparameters to run
            ``tune.run`` on.
            resources_per_trial (dict): Resources to use per trial within Ray.
                Accepted keys are `cpu`, `gpu` and custom resources, and values
                are integers specifying the number of each resource to use.

        Returns:
            analysis (`ExperimentAnalysis`): Object returned by
                `tune.run`.

        """
        if self.seed is not None:
            random.seed(self.seed)
            np.random.seed(self.seed)

        trainable = _Trainable
        if self.pipeline_auto_early_stop and check_is_pipeline(
                self.estimator) and self.early_stopping:
            trainable = _PipelineTrainable

        max_iter = self.max_iters
        if self.early_stopping is not None:
            config["estimator_list"] = [
                clone(self.estimator) for _ in range(self.n_splits)
            ]
            if hasattr(self.early_stopping, "_max_t_attr"):
                # we want to delegate stopping to schedulers which
                # support it, but we want it to stop eventually, just in case
                # the solution is to make the stop condition very big
                max_iter = self.max_iters * 10
        else:
            config["estimator_list"] = [self.estimator]

        stopper = MaximumIterationStopper(max_iter=max_iter)
        if self.stopper:
            stopper = CombinedStopper(stopper, self.stopper)

        run_args = dict(scheduler=self.early_stopping,
                        reuse_actors=True,
                        verbose=self.verbose,
                        stop=stopper,
                        num_samples=self.n_trials,
                        config=config,
                        fail_fast="raise",
                        resources_per_trial=resources_per_trial,
                        local_dir=os.path.expanduser(self.local_dir),
                        loggers=self.loggers,
                        time_budget_s=self.time_budget_s)

        if self.search_optimization == "random":
            if isinstance(self.param_distributions, list):
                search_algo = RandomListSearcher(self.param_distributions)
            else:
                search_algo = BasicVariantGenerator()
            run_args["search_alg"] = search_algo
        else:
            search_space = None
            override_search_space = True
            if self._is_param_distributions_all_tune_domains():
                run_args["config"].update(self.param_distributions)
                override_search_space = False

            search_kwargs = self.search_kwargs.copy()
            search_kwargs.update(metric=self._metric_name, mode="max")

            if self.search_optimization == "bayesian":
                from ray.tune.suggest.skopt import SkOptSearch
                if override_search_space:
                    search_space = self.param_distributions
                search_algo = SkOptSearch(space=search_space, **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "bohb":
                from ray.tune.suggest.bohb import TuneBOHB
                if override_search_space:
                    search_space = self._get_bohb_config_space()
                if self.seed:
                    warnings.warn("'seed' is not implemented for BOHB.")
                search_algo = TuneBOHB(space=search_space, **search_kwargs)
                # search_algo = TuneBOHB(
                #     space=search_space, seed=self.seed, **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "optuna":
                from ray.tune.suggest.optuna import OptunaSearch
                from optuna.samplers import TPESampler
                sampler = TPESampler(seed=self.seed)
                if override_search_space:
                    search_space = self._get_optuna_params()
                search_algo = OptunaSearch(space=search_space,
                                           sampler=sampler,
                                           **search_kwargs)
                run_args["search_alg"] = search_algo

            elif self.search_optimization == "hyperopt":
                from ray.tune.suggest.hyperopt import HyperOptSearch
                if override_search_space:
                    search_space = self._get_hyperopt_params()
                search_algo = HyperOptSearch(space=search_space,
                                             random_state_seed=self.seed,
                                             **search_kwargs)
                run_args["search_alg"] = search_algo

            else:
                # This should not happen as we validate the input before
                # this method. Still, just to be sure, raise an error here.
                raise ValueError(
                    f"Invalid search optimizer: {self.search_optimization}")

        if isinstance(self.n_jobs, int) and self.n_jobs > 0 \
           and not self.search_optimization == "random":
            search_algo = ConcurrencyLimiter(search_algo,
                                             max_concurrent=self.n_jobs)
            run_args["search_alg"] = search_algo

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",
                                    message="fail_fast='raise' "
                                    "detected.")
            analysis = tune.run(trainable, **run_args)
        return analysis
Exemplo n.º 22
0
    def testOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch
        from optuna.trial import TrialState

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
            points_to_evaluate=[{self.param_name: self.valid_value}],
            evaluated_rewards=[1.0],
        )

        get_len = lambda s: len(s._ot_study.trials)  # noqa E731

        self.assertGreater(get_len(searcher), 0)

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
        )

        point = {
            self.param_name: self.valid_value,
        }

        self.assertEqual(get_len(searcher), 0)

        searcher.add_evaluated_point(point, 1.0, intermediate_values=[0.8, 0.9])
        self.assertEqual(get_len(searcher), 1)
        self.assertTrue(searcher._ot_study.trials[-1].state == TrialState.COMPLETE)

        searcher.add_evaluated_point(
            point, 1.0, intermediate_values=[0.8, 0.9], error=True
        )
        self.assertEqual(get_len(searcher), 2)
        self.assertTrue(searcher._ot_study.trials[-1].state == TrialState.FAIL)

        searcher.add_evaluated_point(
            point, 1.0, intermediate_values=[0.8, 0.9], pruned=True
        )
        self.assertEqual(get_len(searcher), 3)
        self.assertTrue(searcher._ot_study.trials[-1].state == TrialState.PRUNED)

        searcher.suggest("1")

        searcher = OptunaSearch(
            space=self.space,
            metric="metric",
            mode="max",
        )

        self.run_add_evaluated_trials(searcher, get_len, get_len)

        def dbr_space(trial):
            return {self.param_name: trial.suggest_float(self.param_name, 0.0, 5.0)}

        dbr_searcher = OptunaSearch(
            space=dbr_space,
            metric="metric",
            mode="max",
        )
        with self.assertRaises(TypeError):
            dbr_searcher.add_evaluated_point(point, 1.0)
Exemplo n.º 23
0
        # Feed the score back back to Tune.
        tune.report(iterations=step, mean_loss=intermediate_score)
        time.sleep(0.1)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(configure_logging=False)

    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if args.smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
Exemplo n.º 24
0
def _test_xgboost(method="BlendSearch"):
    try:
        import ray
    except ImportError:
        return
    if method == "BlendSearch":
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        "max_depth":
        tune.randint(1, 9)
        if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight":
        tune.choice([1, 2, 3]),
        "subsample":
        tune.uniform(0.5, 1.0),
        "eta":
        tune.loguniform(1e-4, 1e-1),
    }
    max_iter = 10
    for num_samples in [128]:
        time_budget_s = 60
        for n_cpu in [2]:
            start_time = time.time()
            # ray.init(address='auto')
            if method == "BlendSearch":
                analysis = tune.run(
                    train_breast_cancer,
                    config=search_space,
                    low_cost_partial_config={
                        "max_depth": 1,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    scheduler="asha",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True,
                )
            else:
                if "ASHA" == method:
                    algo = None
                elif "BOHB" == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB

                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif "Optuna" == method:
                    from ray.tune.suggest.optuna import OptunaSearch

                    algo = OptunaSearch()
                elif "CFO" == method:
                    from flaml import CFO

                    algo = CFO(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "CFOCat" == method:
                    from flaml.searcher.cfo_cat import CFOCat

                    algo = CFOCat(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "Dragonfly" == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch

                    algo = DragonflySearch()
                elif "SkOpt" == method:
                    from ray.tune.suggest.skopt import SkOptSearch

                    algo = SkOptSearch()
                elif "Nevergrad" == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng

                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif "ZOOpt" == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch

                    algo = ZOOptSearch(budget=num_samples * n_cpu)
                elif "Ax" == method:
                    from ray.tune.suggest.ax import AxSearch

                    algo = AxSearch()
                elif "HyperOpt" == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch

                    algo = HyperOptSearch()
                    scheduler = None
                if method != "BOHB":
                    from ray.tune.schedulers import ASHAScheduler

                    scheduler = ASHAScheduler(max_t=max_iter, grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    scheduler=scheduler,
                    search_alg=algo,
                )
            # # Load the best model checkpoint
            # import os
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss", "min", "all")
            accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")
Exemplo n.º 25
0
    def _tune_run(self, config, resources_per_trial):
        """Wrapper to call ``tune.run``. Multiple estimators are generated when
        early stopping is possible, whereas a single estimator is
        generated when early stopping is not possible.

        Args:
            config (dict): Configurations such as hyperparameters to run
            ``tune.run`` on.
            resources_per_trial (dict): Resources to use per trial within Ray.
                Accepted keys are `cpu`, `gpu` and custom resources, and values
                are integers specifying the number of each resource to use.

        Returns:
            analysis (`ExperimentAnalysis`): Object returned by
                `tune.run`.

        """
        stop_condition = {"training_iteration": self.max_iters}
        if self.early_stopping is not None:
            config["estimator_list"] = [
                clone(self.estimator) for _ in range(self.n_splits)
            ]
            if hasattr(self.early_stopping, "_max_t_attr"):
                # we want to delegate stopping to schedulers which
                # support it, but we want it to stop eventually, just in case
                # the solution is to make the stop condition very big
                stop_condition = {"training_iteration": self.max_iters * 10}
        else:
            config["estimator_list"] = [self.estimator]

        if self.search_optimization == "random":
            run_args = dict(scheduler=self.early_stopping,
                            reuse_actors=True,
                            verbose=self.verbose,
                            stop=stop_condition,
                            num_samples=self.num_samples,
                            config=config,
                            fail_fast=True,
                            resources_per_trial=resources_per_trial,
                            local_dir=os.path.expanduser(self.local_dir))

            if isinstance(self.param_distributions, list):
                run_args["search_alg"] = RandomListSearcher(
                    self.param_distributions)

            analysis = tune.run(_Trainable, **run_args)
            return analysis

        elif self.search_optimization == "bayesian":
            from skopt import Optimizer
            from ray.tune.suggest.skopt import SkOptSearch
            hyperparameter_names, spaces = self._get_skopt_params()
            search_algo = SkOptSearch(Optimizer(spaces),
                                      hyperparameter_names,
                                      metric="average_test_score",
                                      **self.search_kwargs)

        elif self.search_optimization == "bohb":
            from ray.tune.suggest.bohb import TuneBOHB
            config_space = self._get_bohb_config_space()
            search_algo = TuneBOHB(config_space,
                                   metric="average_test_score",
                                   mode="max",
                                   **self.search_kwargs)

        elif self.search_optimization == "optuna":
            from ray.tune.suggest.optuna import OptunaSearch
            config_space = self._get_optuna_params()
            search_algo = OptunaSearch(config_space,
                                       metric="average_test_score",
                                       mode="max",
                                       **self.search_kwargs)

        elif self.search_optimization == "hyperopt":
            from ray.tune.suggest.hyperopt import HyperOptSearch
            config_space = self._get_hyperopt_params()
            search_algo = HyperOptSearch(config_space,
                                         metric="average_test_score",
                                         mode="max",
                                         **self.search_kwargs)

        if isinstance(self.n_jobs, int) and self.n_jobs > 0:
            search_algo = ConcurrencyLimiter(search_algo,
                                             max_concurrent=self.n_jobs)

        analysis = tune.run(_Trainable,
                            search_alg=search_algo,
                            scheduler=self.early_stopping,
                            reuse_actors=True,
                            verbose=self.verbose,
                            stop=stop_condition,
                            num_samples=self.num_samples,
                            config=config,
                            fail_fast=True,
                            resources_per_trial=resources_per_trial,
                            local_dir=os.path.expanduser(self.local_dir))

        return analysis
Exemplo n.º 26
0
        # Feed the score back back to Tune.
        tune.report(iterations=step, mean_loss=intermediate_score)
        time.sleep(0.1)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init(configure_logging=False)

    space = [
        param.suggest_uniform("width", 0, 20),
        param.suggest_uniform("height", -100, 100),
        # This is an ignored parameter.
        param.suggest_categorical("activation", ["relu", "tanh"])
    ]

    config = {
        "num_samples": 10 if args.smoke_test else 100,
        "config": {
            "steps": 100,
        }
    }
    algo = OptunaSearch(space, metric="mean_loss", mode="min")
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
    tune.run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
Exemplo n.º 27
0
    def testConvertOptuna(self):
        from ray.tune.suggest.optuna import OptunaSearch, param
        from optuna.samplers import RandomSampler

        # Grid search not supported, should raise ValueError
        with self.assertRaises(ValueError):
            OptunaSearch.convert_search_space(
                {"grid": tune.grid_search([0, 1])})

        config = {
            "a": tune.sample.Categorical([2, 3, 4]).uniform(),
            "b": {
                "x": tune.sample.Integer(0, 5).quantized(2),
                "y": 4,
                "z": tune.sample.Float(1e-4, 1e-2).loguniform()
            }
        }
        converted_config = OptunaSearch.convert_search_space(config)
        optuna_config = [
            param.suggest_categorical("a", [2, 3, 4]),
            param.suggest_int("b/x", 0, 5, 2),
            param.suggest_loguniform("b/z", 1e-4, 1e-2)
        ]

        sampler1 = RandomSampler(seed=1234)
        searcher1 = OptunaSearch(space=converted_config,
                                 sampler=sampler1,
                                 metric="a",
                                 mode="max")

        sampler2 = RandomSampler(seed=1234)
        searcher2 = OptunaSearch(space=optuna_config,
                                 sampler=sampler2,
                                 metric="a",
                                 mode="max")

        config1 = searcher1.suggest("0")
        config2 = searcher2.suggest("0")

        self.assertEqual(config1, config2)
        self.assertIn(config1["a"], [2, 3, 4])
        self.assertIn(config1["b"]["x"], list(range(5)))
        self.assertLess(1e-4, config1["b"]["z"])
        self.assertLess(config1["b"]["z"], 1e-2)

        searcher = OptunaSearch(metric="a", mode="max")
        analysis = tune.run(_mock_objective,
                            config=config,
                            search_alg=searcher,
                            num_samples=1)
        trial = analysis.trials[0]
        assert trial.config["a"] in [2, 3, 4]

        mixed_config = {
            "a": tune.uniform(5, 6),
            "b": tune.uniform(8, 9)  # Cannot mix List and Dict
        }
        searcher = OptunaSearch(space=mixed_config, metric="a", mode="max")
        config = searcher.suggest("0")
        self.assertTrue(5 <= config["a"] <= 6)
        self.assertTrue(8 <= config["b"] <= 9)
Exemplo n.º 28
0
    def __init__(self,
                 metric: Optional[str] = None,
                 mode: Optional[str] = None,
                 space: Optional[dict] = None,
                 points_to_evaluate: Optional[List[Dict]] = None,
                 cat_hp_cost: Optional[dict] = None,
                 prune_attr: Optional[str] = None,
                 min_resource: Optional[float] = None,
                 max_resource: Optional[float] = None,
                 reduction_factor: Optional[float] = None,
                 resources_per_trial: Optional[dict] = None,
                 global_search_alg: Optional[Searcher] = None,
                 mem_size=None):
        '''Constructor

        Args:
            metric: A string of the metric name to optimize for.
                minimization or maximization.
            mode: A string in ['min', 'max'] to specify the objective as
            space: A dictionary to specify the search space.
            points_to_evaluate: Initial parameter suggestions to be run first. 
                The first element needs to be a dictionary from a subset of 
                controlled dimensions to the initial low-cost values. 
                e.g.,
                
                .. code-block:: python
                
                    [{'epochs': 1}]

            cat_hp_cost: A dictionary from a subset of categorical dimensions
                to the relative cost of each choice. 
                e.g.,
                
                .. code-block:: python

                    {'tree_method': [1, 1, 2]}
                
                i.e., the relative cost of the 
                three choices of 'tree_method' is 1, 1 and 2 respectively.
            prune_attr: A string of the attribute used for pruning. 
                Not necessarily in space.
                When prune_attr is in space, it is a hyperparameter, e.g., 
                    'n_iters', and the best value is unknown.
                When prune_attr is not in space, it is a resource dimension, 
                    e.g., 'sample_size', and the peak performance is assumed
                    to be at the max_resource.
            min_resource: A float of the minimal resource to use for the 
                prune_attr; only valid if prune_attr is not in space.
            max_resource: A float of the maximal resource to use for the 
                prune_attr; only valid if prune_attr is not in space.
            reduction_factor: A float of the reduction factor used for
                incremental pruning.
            resources_per_trial: A dictionary of the resources permitted per
                trial, such as 'mem'.
            global_search_alg: A Searcher instance as the global search
                instance. If omitted, Optuna is used. The following algos have
                known issues when used as global_search_alg:
                - HyperOptSearch raises exception sometimes
                - TuneBOHB has its own scheduler
            mem_size: A function to estimate the memory size for a given config.
        '''
        self._metric, self._mode = metric, mode
        if points_to_evaluate: init_config = points_to_evaluate[0]
        else: init_config = {}
        self._points_to_evaluate = points_to_evaluate
        if global_search_alg is not None:
            self._gs = global_search_alg
        elif getattr(self, '__name__', None) != 'CFO':
            self._gs = GlobalSearch(space=space, metric=metric, mode=mode)
        else:
            self._gs = None
        self._ls = LocalSearch(init_config, metric, mode, cat_hp_cost, space,
                               prune_attr, min_resource, max_resource,
                               reduction_factor)
        self._resources_per_trial = resources_per_trial
        self._mem_size = mem_size
        self._mem_threshold = resources_per_trial.get(
            'mem') if resources_per_trial else None
        self._init_search()
Exemplo n.º 29
0
def _test_roberta(method='BlendSearch'):

    max_num_epoch = 100
    num_samples = -1
    time_budget_s = 3600

    search_space = {
        # You can mix constants with search space objects.
        "num_train_epochs": flaml.tune.loguniform(1, max_num_epoch),
        "learning_rate": flaml.tune.loguniform(1e-5, 3e-5),
        "weight_decay": flaml.tune.uniform(0, 0.3),
        "per_device_train_batch_size": flaml.tune.choice([16, 32, 64, 128]),
        "seed": flaml.tune.choice([12, 22, 33, 42]),
    }

    start_time = time.time()
    ray.init(num_cpus=4, num_gpus=4)
    if 'ASHA' == method:
        algo = None
    elif 'BOHB' == method:
        from ray.tune.schedulers import HyperBandForBOHB
        from ray.tune.suggest.bohb import tuneBOHB
        algo = tuneBOHB(max_concurrent=4)
        scheduler = HyperBandForBOHB(max_t=max_num_epoch)
    elif 'Optuna' == method:
        from ray.tune.suggest.optuna import OptunaSearch
        algo = OptunaSearch()
    elif 'CFO' == method:
        from flaml import CFO
        algo = CFO(points_to_evaluate=[{
            "num_train_epochs": 1,
            "per_device_train_batch_size": 128,
        }])
    elif 'BlendSearch' == method:
        from flaml import BlendSearch
        algo = BlendSearch(
            points_to_evaluate=[{
                "num_train_epochs": 1,
                "per_device_train_batch_size": 128,
            }])
    elif 'Dragonfly' == method:
        from ray.tune.suggest.dragonfly import DragonflySearch
        algo = DragonflySearch()
    elif 'SkOpt' == method:
        from ray.tune.suggest.skopt import SkOptSearch
        algo = SkOptSearch()
    elif 'Nevergrad' == method:
        from ray.tune.suggest.nevergrad import NevergradSearch
        import nevergrad as ng
        algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
    elif 'ZOOpt' == method:
        from ray.tune.suggest.zoopt import ZOOptSearch
        algo = ZOOptSearch(budget=num_samples)
    elif 'Ax' == method:
        from ray.tune.suggest.ax import AxSearch
        algo = AxSearch(max_concurrent=3)
    elif 'HyperOpt' == method:
        from ray.tune.suggest.hyperopt import HyperOptSearch
        algo = HyperOptSearch()
        scheduler = None
    if method != 'BOHB':
        from ray.tune.schedulers import ASHAScheduler
        scheduler = ASHAScheduler(max_t=max_num_epoch, grace_period=1)
    scheduler = None
    analysis = ray.tune.run(train_roberta,
                            metric=HP_METRIC,
                            mode=MODE,
                            resources_per_trial={
                                "gpu": 4,
                                "cpu": 4
                            },
                            config=search_space,
                            local_dir='logs/',
                            num_samples=num_samples,
                            time_budget_s=time_budget_s,
                            keep_checkpoints_num=1,
                            checkpoint_score_attr=HP_METRIC,
                            scheduler=scheduler,
                            search_alg=algo)

    ray.shutdown()

    best_trial = analysis.get_best_trial(HP_METRIC, MODE, "all")
    metric = best_trial.metric_analysis[HP_METRIC][MODE]

    logger.info(f"method={method}")
    logger.info(f"n_trials={len(analysis.trials)}")
    logger.info(f"time={time.time()-start_time}")
    logger.info(f"Best model eval {HP_METRIC}: {metric:.4f}")
    logger.info(f"Best model parameters: {best_trial.config}")
Exemplo n.º 30
0
def main(args):
    model = args.model
    config_file = args.config_file
    log_dir = Path(f"./logs/{model.value}")
    log_dir.mkdir(exist_ok=True, parents=True)
    data_root = Path(f"./data/processed/{model.value}/IFCNetCore").absolute()

    with open("IFCNetCore_Classes.json", "r") as f:
        class_names = json.load(f)

    if model == Model.MVCNN:
        config = {
            "batch_size": 64,
            "learning_rate": tune.loguniform(1e-5, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "cnn_name": tune.choice(["vgg11", "resnet34", "resnet50"]),
            "pretrained": True,
            "epochs": 30,
            "num_views": 12
        }

        train_func = partial(train_mvcnn,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)
    elif model == Model.DGCNN:
        config = {
            "batch_size": 8,
            "learning_rate": tune.loguniform(1e-4, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "k": tune.choice([20, 30, 40]),
            "embedding_dim": tune.choice([516, 1024, 2048]),
            "dropout": 0.5,
            "epochs": 250
        }

        train_func = partial(train_dgcnn,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)
    elif model == Model.MeshNet:
        config = {
            "batch_size": tune.choice([8, 16, 32]),
            "learning_rate": tune.loguniform(1e-4, 1e-2),
            "weight_decay": tune.loguniform(1e-4, 1e-2),
            "num_kernel": tune.choice([32, 64]),
            "sigma": tune.choice([0.1, 0.2, 0.3]),
            "aggregation_method": tune.choice(["Concat", "Max", "Average"]),
            "epochs": 150
        }

        train_func = partial(train_meshnet,
                             data_root=data_root,
                             class_names=class_names,
                             eval_on_test=config_file is not None)

    if config_file:
        with config_file.open("r") as f:
            config = json.load(f)

    scheduler = ASHAScheduler(max_t=250, grace_period=10)

    reporter = CLIReporter(metric_columns=[
        "train_balanced_accuracy_score", "val_balanced_accuracy_score",
        "training_iteration"
    ])

    result = tune.run(
        train_func,
        resources_per_trial={
            "cpu": 8,
            "gpu": 1
        },
        local_dir=log_dir,
        config=config,
        mode="max",
        metric="val_balanced_accuracy_score",
        search_alg=OptunaSearch() if config_file is None else None,
        num_samples=20 if config_file is None else 1,
        scheduler=scheduler if config_file is None else None,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("val_balanced_accuracy_score", "max",
                                       "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation accuracy (balanced): {}".format(
        best_trial.last_result["val_balanced_accuracy_score"]))