Пример #1
0
 def testMedianStoppingOnCompleteOnly(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     self.assertEqual(
         rule.on_trial_result(None, t2, result(100, 0)),
         TrialScheduler.CONTINUE)
     rule.on_trial_complete(None, t1, result(10, 1000))
     self.assertEqual(
         rule.on_trial_result(None, t2, result(101, 0)),
         TrialScheduler.STOP)
Пример #2
0
 def testMedianStoppingUsesMedian(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     rule.on_trial_complete(None, t1, result(10, 1000))
     rule.on_trial_complete(None, t2, result(10, 1000))
     t3 = Trial("PPO")
     self.assertEqual(
         rule.on_trial_result(None, t3, result(1, 260)),
         TrialScheduler.CONTINUE)
     self.assertEqual(
         rule.on_trial_result(None, t3, result(2, 260)),
         TrialScheduler.STOP)
Пример #3
0
    def testAlternateMetrics(self):
        def result2(t, rew):
            return dict(training_iteration=t, neg_mean_loss=rew)

        rule = MedianStoppingRule(
            grace_period=0,
            min_samples_required=1,
            time_attr='training_iteration',
            reward_attr='neg_mean_loss')
        t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
        t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
        for i in range(10):
            self.assertEqual(
                rule.on_trial_result(None, t1, result2(i, i * 100)),
                TrialScheduler.CONTINUE)
        for i in range(5):
            self.assertEqual(
                rule.on_trial_result(None, t2, result2(i, 450)),
                TrialScheduler.CONTINUE)
        rule.on_trial_complete(None, t1, result2(10, 1000))
        self.assertEqual(
            rule.on_trial_result(None, t2, result2(5, 450)),
            TrialScheduler.CONTINUE)
        self.assertEqual(
            rule.on_trial_result(None, t2, result2(6, 0)),
            TrialScheduler.CONTINUE)
Пример #4
0
 def testMedianStoppingConstantPerf(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     rule.on_trial_complete(None, t1, result(10, 1000))
     self.assertEqual(rule.on_trial_result(None, t2, result(5, 450)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t2, result(6, 0)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t2, result(10, 450)),
                      TrialScheduler.STOP)
Пример #5
0
 def testMedianStoppingMinSamples(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=2)
     t1, t2 = self.basicSetup(rule)
     runner = mock_trial_runner()
     rule.on_trial_complete(runner, t1, result(10, 1000))
     t3 = Trial("PPO")
     # Insufficient samples to evaluate t3
     self.assertEqual(
         rule.on_trial_result(runner, t3, result(5, 10)),
         TrialScheduler.CONTINUE)
     rule.on_trial_complete(runner, t2, result(5, 1000))
     # Sufficient samples to evaluate t3
     self.assertEqual(
         rule.on_trial_result(runner, t3, result(5, 10)),
         TrialScheduler.STOP)
Пример #6
0
 def testMedianStoppingGracePeriod(self):
     rule = MedianStoppingRule(grace_period=2.5, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     rule.on_trial_complete(None, t1, result(10, 1000))
     rule.on_trial_complete(None, t2, result(10, 1000))
     t3 = Trial("PPO")
     self.assertEqual(rule.on_trial_result(None, t3, result(1, 10)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t3, result(2, 10)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t3, result(3, 10)),
                      TrialScheduler.STOP)
Пример #7
0
 def testMedianStoppingOnCompleteOnly(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     runner = mock_trial_runner()
     self.assertEqual(rule.on_trial_result(runner, t2, result(100, 0)),
                      TrialScheduler.CONTINUE)
     rule.on_trial_complete(runner, t1, result(101, 1000))
     self.assertEqual(rule.on_trial_result(runner, t2, result(101, 0)),
                      TrialScheduler.STOP)
Пример #8
0
    def test_grid_search_no_score(self):
        # Test grid-search on classifier that has no score function.
        clf = LinearSVC(random_state=0)
        X, y = make_blobs(random_state=0, centers=2)
        Cs = [0.1, 1, 10]
        clf_no_score = LinearSVCNoScore(random_state=0)

        # XXX: It seems there's some global shared state in LinearSVC - fitting
        # multiple `SVC` instances in parallel using threads sometimes results in
        # wrong results. This only happens with threads, not processes/sync.
        # For now, we'll fit using the sync scheduler.
        grid_search = tcv.TuneGridSearchCV(clf, {"C": Cs},
                                           scoring="accuracy",
                                           scheduler=MedianStoppingRule())
        grid_search.fit(X, y)

        grid_search_no_score = tcv.TuneGridSearchCV(
            clf_no_score, {"C": Cs},
            scoring="accuracy",
            scheduler=MedianStoppingRule())
        # smoketest grid search
        grid_search_no_score.fit(X, y)

        # check that best params are equal
        self.assertEqual(grid_search_no_score.best_params,
                         grid_search.best_params_)
        # check that we can call score and that it gives the correct result
        self.assertEqual(grid_search.score(X, y),
                         grid_search_no_score.score(X, y))

        # giving no scoring function raises an error
        grid_search_no_score = tcv.TuneGridSearchCV(clf_no_score, {"C": Cs})
        with self.assertRaises(TypeError) as exc:
            grid_search_no_score.fit([[1]])

        self.assertTrue("no scoring" in str(exc.exception))
Пример #9
0
 def testMedianStoppingUsesMedian(self):
     rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
     t1, t2 = self.basicSetup(rule)
     runner = mock_trial_runner()
     rule.on_trial_complete(runner, t1, result(10, 1000))
     rule.on_trial_complete(runner, t2, result(10, 1000))
     t3 = Trial("PPO")
     self.assertEqual(rule.on_trial_result(runner, t3, result(1, 260)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(runner, t3, result(2, 260)),
                      TrialScheduler.STOP)
Пример #10
0
def run_experiment(config, trainable):
    """Run a single tune experiment in parallel as a "remote" function.

    :param config: The experiment configuration
    :type config: dict
    :param trainable: tune.Trainable class with your experiment
    :type trainable: :class:`ray.tune.Trainable`
    """
    # Stop criteria. Default to total number of iterations/epochs
    stop_criteria = {"training_iteration": config.get("iterations")}
    stop_criteria.update(config.get("stop", {}))

    tune.run(
        trainable,
        name=config["name"],
        local_dir=config["path"],
        stop=stop_criteria,
        config=config,
        num_samples=config.get("repetitions", 1),
        search_alg=config.get("search_alg", None),
        scheduler=config.get(
            "scheduler",
            MedianStoppingRule(
                time_attr="training_iteration",
                reward_attr="noise_accuracy",
                min_samples_required=3,
                grace_period=20,
                verbose=False,
            ),
        ),
        trial_name_creator=tune.function(trial_name_string),
        trial_executor=config.get("trial_executor", None),
        checkpoint_at_end=config.get("checkpoint_at_end", False),
        checkpoint_freq=config.get("checkpoint_freq", 0),
        upload_dir=config.get("upload_dir", None),
        sync_function=config.get("sync_function", None),
        resume=config.get("resume", False),
        reuse_actors=config.get("reuse_actors", False),
        verbose=config.get("verbose", 0),
        resources_per_trial={
            # With lots of trials, optimal seems to be 0.5, or 2 trials per GPU
            # If num trials <= num GPUs, 1.0 is better
            "cpu": 1,
            "gpu": config.get("gpu_percentage", 0.5),
        },
    )
Пример #11
0
 def testMedianStoppingSoftStop(self):
     rule = MedianStoppingRule(grace_period=0,
                               min_samples_required=1,
                               hard_stop=False)
     t1, t2 = self.basicSetup(rule)
     rule.on_trial_complete(None, t1, result(10, 1000))
     rule.on_trial_complete(None, t2, result(10, 1000))
     t3 = Trial("PPO")
     self.assertEqual(rule.on_trial_result(None, t3, result(1, 260)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t3, result(2, 260)),
                      TrialScheduler.PAUSE)
Пример #12
0
def main():
    space = {
        "batch_size": hp.choice("batch_size", [32, 64, 128]),
        "learning_rate": hp.choice("learning_rate", [0.01, 0.001, 0.0005]),
        "target_update": hp.choice("target_update", [4, 10, 100]),
    }

    hyperopt_search = HyperOptSearch(space, metric="mean_reward", mode="max")
    analysis = tune.run(Trainable,
                        stop={'training_iteration': MAX_TRAINING_ITERATION},
                        num_samples=10,
                        scheduler=MedianStoppingRule(metric="mean_reward",
                                                     mode="max"),
                        search_alg=hyperopt_search,
                        local_dir=TUNE_RESULTS_FOLDER,
                        progress_reporter=reporter,
                        checkpoint_freq=1,
                        verbose=1)
Пример #13
0
def train():
    # NOTE: Can use ray through joblib, but the feature is not included in the pip install'd code (it is ~2 weeks old)
    # see here: https://ray.readthedocs.io/en/latest/joblib.html
    # and pip install the nightly wheel from here: https://ray.readthedocs.io/en/latest/installation.html

    n_iter = 10
    cv = 5

    label_drawing_tuples = load_drawings(storage_location='normalized')
    if not label_drawing_tuples:
        raise ValueError("No training drawings found in db")
    labels, drawings = zip(*label_drawing_tuples)

    drawings = np.asarray(drawings)
    labels = np.asarray(labels)

    print(f"drawings.shape = {drawings.shape}")
    print(f"labels.shape = {labels.shape}")

    pipeline_nt = namedtuple('pipeline', ['pipeline', 'parameter_space'])
    pipelines = {
        'svm': pipeline_nt(svm_pipeline(), svm_parameter_space()),
    }

    for name, pipeline_tuple in pipelines.items():
        #     rs = RandomizedSearchCV(case.pipeline, case.parameter_space, n_iter=n_iter, n_jobs=-1, refit=False, cv=cv)
        print(f"running {name}")
        temp_params = {
            'svm__gamma': [0.0001, 0.001],
            'svm__C': [1, 10],
        }
        tune_search = TuneGridSearchCV(pipeline_tuple.pipeline,
                                       temp_params,
                                       scheduler=MedianStoppingRule())
        stuff = tune_search.fit(drawings, labels)

        pred = tune_search.predict(drawings)

        correct = 0
        for i in range(len(pred)):
            if pred[i] == labels[i]:
                correct += 1
        print(correct / len(pred))
        print(tune_search.cv_results_)
Пример #14
0
 def _test_metrics(self, result_func, metric, mode):
     rule = MedianStoppingRule(grace_period=0,
                               min_samples_required=1,
                               time_attr="training_iteration",
                               metric=metric,
                               mode=mode)
     t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
     t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
     for i in range(10):
         self.assertEqual(
             rule.on_trial_result(None, t1, result_func(i, i * 100)),
             TrialScheduler.CONTINUE)
     for i in range(5):
         self.assertEqual(
             rule.on_trial_result(None, t2, result_func(i, 450)),
             TrialScheduler.CONTINUE)
     rule.on_trial_complete(None, t1, result_func(10, 1000))
     self.assertEqual(rule.on_trial_result(None, t2, result_func(5, 450)),
                      TrialScheduler.CONTINUE)
     self.assertEqual(rule.on_trial_result(None, t2, result_func(6, 0)),
                      TrialScheduler.CONTINUE)
Пример #15
0
    def testAlternateMetrics(self):
        def result2(t, rew):
            return dict(training_iteration=t, neg_mean_loss=rew)

        rule = MedianStoppingRule(grace_period=0,
                                  min_samples_required=1,
                                  time_attr='training_iteration',
                                  reward_attr='neg_mean_loss')
        t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
        t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
        for i in range(10):
            self.assertEqual(
                rule.on_trial_result(None, t1, result2(i, i * 100)),
                TrialScheduler.CONTINUE)
        for i in range(5):
            self.assertEqual(rule.on_trial_result(None, t2, result2(i, 450)),
                             TrialScheduler.CONTINUE)
        rule.on_trial_complete(None, t1, result2(10, 1000))
        self.assertEqual(rule.on_trial_result(None, t2, result2(5, 450)),
                         TrialScheduler.CONTINUE)
        self.assertEqual(rule.on_trial_result(None, t2, result2(6, 0)),
                         TrialScheduler.CONTINUE)
Пример #16
0
    def test_local_dir(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        clf = SGDClassifier()
        parameter_grid = {
            "alpha": Real(1e-4, 1e-1, 1),
            "epsilon": Real(0.01, 0.1)
        }

        scheduler = MedianStoppingRule(grace_period=10.0)

        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   early_stopping=scheduler,
                                   max_iters=10,
                                   local_dir="./test-result")
        tune_search.fit(x, y)

        self.assertTrue(len(os.listdir("./test-result")) != 0)
Пример #17
0
    def test_diabetes(self):
        # load the diabetes datasets
        dataset = datasets.load_diabetes()
        X = dataset.data
        y = dataset.target
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=0)
        # prepare a range of alpha values to test
        alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0])
        param_grid = dict(alpha=alphas)
        # create and fit a ridge regression model, testing each alpha
        model = linear_model.Ridge()

        tune_search = tcv.TuneGridSearchCV(model, param_grid,
                                           MedianStoppingRule())
        tune_search.fit(X_train, y_train)

        pred = tune_search.predict(X_test)
        print(pred)
        error = sum(np.array(pred) - np.array(y_test)) / len(pred)
        print(error)
Пример #18
0
def hyper_parameter(task=None,
                    model_name=None,
                    dataset_name=None,
                    config_file=None,
                    space_file=None,
                    scheduler=None,
                    search_alg=None,
                    other_args=None,
                    num_samples=5,
                    max_concurrent=1,
                    cpu_per_trial=1,
                    gpu_per_trial=1):
    """ Use Ray tune to hyper parameter tune

    Args:
        task(str): task name
        model_name(str): model name
        dataset_name(str): dataset name
        config_file(str): config filename used to modify the pipeline's
            settings. the config file should be json.
        space_file(str): the file which specifies the parameter search space
        scheduler(str): the trial sheduler which will be used in ray.tune.run
        search_alg(str): the search algorithm
        other_args(dict): the rest parameter args, which will be pass to the Config
    """
    # load config
    experiment_config = ConfigParser(task,
                                     model_name,
                                     dataset_name,
                                     config_file=config_file,
                                     other_args=other_args)
    # logger
    logger = get_logger(experiment_config)
    logger.info(experiment_config.config)
    # check space_file
    if space_file is None:
        logger.error(
            'the space_file should not be None when hyperparameter tune.')
        exit(0)
    # seed
    seed = experiment_config.get('seed', 0)
    set_random_seed(seed)
    # parse space_file
    search_sapce = parse_search_space(space_file)
    # load dataset
    dataset = get_dataset(experiment_config)
    # get train valid test data
    train_data, valid_data, test_data = dataset.get_data()
    data_feature = dataset.get_data_feature()

    def train(config,
              checkpoint_dir=None,
              experiment_config=None,
              train_data=None,
              valid_data=None,
              data_feature=None):
        """trainable function which meets ray tune API

        Args:
            config (dict): A dict of hyperparameter.
        """
        # modify experiment_config
        for key in config:
            if key in experiment_config:
                experiment_config[key] = config[key]
        experiment_config['hyper_tune'] = True
        logger = get_logger(experiment_config)
        logger.info(
            'Begin pipeline, task={}, model_name={}, dataset_name={}'.format(
                str(task), str(model_name), str(dataset_name)))
        logger.info('running parameters: ' + str(config))
        # load model
        model = get_model(experiment_config, data_feature)
        # load executor
        executor = get_executor(experiment_config, model, data_feature)
        # checkpoint by ray tune
        if checkpoint_dir:
            checkpoint = os.path.join(checkpoint_dir, 'checkpoint')
            executor.load_model(checkpoint)
        # train
        executor.train(train_data, valid_data)

    # init search algorithm and scheduler
    if search_alg == 'BasicSearch':
        algorithm = BasicVariantGenerator()
    elif search_alg == 'BayesOptSearch':
        algorithm = BayesOptSearch(metric='loss', mode='min')
        # add concurrency limit
        algorithm = ConcurrencyLimiter(algorithm,
                                       max_concurrent=max_concurrent)
    elif search_alg == 'HyperOpt':
        algorithm = HyperOptSearch(metric='loss', mode='min')
        # add concurrency limit
        algorithm = ConcurrencyLimiter(algorithm,
                                       max_concurrent=max_concurrent)
    else:
        raise ValueError('the search_alg is illegal.')
    if scheduler == 'FIFO':
        tune_scheduler = FIFOScheduler()
    elif scheduler == 'ASHA':
        tune_scheduler = ASHAScheduler()
    elif scheduler == 'MedianStoppingRule':
        tune_scheduler = MedianStoppingRule()
    else:
        raise ValueError('the scheduler is illegal')
    # ray tune run
    ensure_dir('./libcity/cache/hyper_tune')
    result = tune.run(tune.with_parameters(train,
                                           experiment_config=experiment_config,
                                           train_data=train_data,
                                           valid_data=valid_data,
                                           data_feature=data_feature),
                      resources_per_trial={
                          'cpu': cpu_per_trial,
                          'gpu': gpu_per_trial
                      },
                      config=search_sapce,
                      metric='loss',
                      mode='min',
                      scheduler=tune_scheduler,
                      search_alg=algorithm,
                      local_dir='./libcity/cache/hyper_tune',
                      num_samples=num_samples)
    best_trial = result.get_best_trial("loss", "min", "last")
    logger.info("Best trial config: {}".format(best_trial.config))
    logger.info("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    # save best
    best_path = os.path.join(best_trial.checkpoint.value, "checkpoint")
    model_state, optimizer_state = torch.load(best_path)
    model_cache_file = './libcity/cache/model_cache/{}_{}.m'.format(
        model_name, dataset_name)
    ensure_dir('./libcity/cache/model_cache')
    torch.save((model_state, optimizer_state), model_cache_file)
Пример #19
0
from tune_sklearn import TuneSearchCV
from sklearn.linear_model import SGDClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from ray.tune.schedulers import MedianStoppingRule
import numpy as np

digits = datasets.load_digits()
x = digits.data
y = digits.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)

clf = SGDClassifier()
parameter_grid = {"alpha": (1e-4, 1), "epsilon": (0.01, 0.1)}

scheduler = MedianStoppingRule(grace_period=10.0)

tune_search = TuneSearchCV(clf,
                           parameter_grid,
                           search_optimization="bayesian",
                           n_iter=3,
                           early_stopping=scheduler,
                           max_iters=10)
tune_search.fit(x_train, y_train)

pred = tune_search.predict(x_test)
accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred)
print(accuracy)
Пример #20
0

if __name__ == "__main__":
    ray.init(num_gpus=8, num_cpus=32)

    config = {
        'network': tune.grid_search(['LSTM']),  # 用什么神经网络 (还可以尝试 TCN )
        'loss_func': tune.grid_search(([1, 2, 3, 4, 5, 6])),  # 用什么 loss 函数
        'batch_size': 100,  # 每次抽取多少条训练
        'lr': tune.loguniform(1e-6, 1e-1),
        'hidden_size': tune.grid_search([128]),  # 神经网络 隐藏层单元数
        'hidden_size2': tune.grid_search([16, 64])
    }
    tune.run(Train,
             config=config,
             resources_per_trial={
                 "cpu": 1,
                 "gpu": 0.3
             },
             local_dir=os.path.abspath(
                 os.path.join(__file__, os.pardir, 'TRY_ALL')),
             verbose=1,
             stop={"training_iteration": 2000},
             num_samples=10,
             checkpoint_freq=200,
             checkpoint_at_end=True,
             scheduler=MedianStoppingRule(time_attr='training_iteration',
                                          metric='acc',
                                          mode='max',
                                          grace_period=200))
Пример #21
0
                'Search hyper-parameters for %s based on configuration at %s',
                set_path, conf_path)

            exp_name = '%s-%s' % (setname, base_conf.model.experiment_name)
            if args.trial_scheduler == 'hyperband':
                scheduler = AsyncHyperBandScheduler(time_attr='progress',
                                                    metric='correct',
                                                    mode='max',
                                                    max_t=args.epoch,
                                                    grace_period=args.epoch /
                                                    10,
                                                    reduction_factor=2,
                                                    brackets=4)
            elif args.trial_scheduler == 'median':
                scheduler = MedianStoppingRule(time_attr='progress',
                                               metric='correct',
                                               grace_period=args.epoch / 10,
                                               min_time_slice=args.epoch / 2)
            elif args.trial_scheduler == 'pbt':
                scheduler = PopulationBasedTraining(
                    time_attr='progress',
                    metric='correct',
                    mode='max',
                    perturbation_interval=args.epoch / 4,
                    hyperparam_mutations=perturb_space)
            else:
                scheduler = None

            logger.info('Trial scheduler used: %s', str(scheduler))
            analysis = tune.run(RayTrainer,
                                name=exp_name,
                                config=sampling_space,
Пример #22
0
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 max_iters=10,
                 use_gpu=False):

        self.estimator = estimator

        if early_stopping and self._can_early_stop():
            self.max_iters = max_iters
            if early_stopping is True:
                # Override the early_stopping variable so
                # that it is resolved appropriately in
                # the next block
                early_stopping = "AsyncHyperBandScheduler"
            # Resolve the early stopping object
            if isinstance(early_stopping, str):
                if early_stopping in TuneBaseSearchCV.defined_schedulers:
                    if early_stopping == "PopulationBasedTraining":
                        self.early_stopping = PopulationBasedTraining(
                            metric="average_test_score")
                    elif early_stopping == "AsyncHyperBandScheduler":
                        self.early_stopping = AsyncHyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandScheduler":
                        self.early_stopping = HyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "MedianStoppingRule":
                        self.early_stopping = MedianStoppingRule(
                            metric="average_test_score")
                    elif early_stopping == "ASHAScheduler":
                        self.early_stopping = ASHAScheduler(
                            metric="average_test_score")
                else:
                    raise ValueError(
                        "{} is not a defined scheduler. "
                        "Check the list of available schedulers.".format(
                            early_stopping))
            elif isinstance(early_stopping, TrialScheduler):
                self.early_stopping = early_stopping
                self.early_stopping.metric = "average_test_score"
            else:
                raise TypeError("`early_stopping` must be a str, boolean, "
                                "or tune scheduler")
        elif not early_stopping:
            warnings.warn("Early stopping is not enabled. "
                          "To enable early stopping, pass in a supported "
                          "scheduler from Tune and ensure the estimator "
                          "has `partial_fit`.")

            self.max_iters = 1
            self.early_stopping = None
        else:
            raise ValueError("Early stopping is not supported because "
                             "the estimator does not have `partial_fit`")

        self.cv = cv
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.refit = refit
        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.use_gpu = use_gpu
Пример #23
0
    def __init__(self,
                 estimator,
                 early_stopping=None,
                 scoring=None,
                 n_jobs=None,
                 cv=5,
                 refit=True,
                 verbose=0,
                 error_score="raise",
                 return_train_score=False,
                 max_iters=10,
                 use_gpu=False):

        self.estimator = estimator

        if early_stopping is not None and self._can_early_stop():
            self.max_iters = max_iters
            if isinstance(early_stopping, str):
                if early_stopping in TuneBaseSearchCV.defined_schedulers:
                    if early_stopping == "PopulationBasedTraining":
                        self.early_stopping = PopulationBasedTraining(
                            metric="average_test_score")
                    elif early_stopping == "AsyncHyperBandScheduler":
                        self.early_stopping = AsyncHyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandScheduler":
                        self.early_stopping = HyperBandScheduler(
                            metric="average_test_score")
                    elif early_stopping == "HyperBandForBOHB":
                        self.early_stopping = HyperBandForBOHB(
                            metric="average_test_score")
                    elif early_stopping == "MedianStoppingRule":
                        self.early_stopping = MedianStoppingRule(
                            metric="average_test_score")
                    elif early_stopping == "ASHAScheduler":
                        self.early_stopping = ASHAScheduler(
                            metric="average_test_score")
                else:
                    raise ValueError(
                        "{} is not a defined scheduler. "
                        "Check the list of available schedulers.".format(
                            early_stopping))
            elif isinstance(early_stopping, TrialScheduler):
                self.early_stopping = early_stopping
                self.early_stopping.metric = "average_test_score"
            else:
                raise TypeError("Scheduler must be a str or tune scheduler")
        else:
            warnings.warn("Early stopping is not enabled. "
                          "To enable early stopping, pass in a supported "
                          "scheduler from Tune and ensure the estimator "
                          "has `partial_fit`.")

            self.max_iters = 1
            self.early_stopping = None

        self.cv = cv
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.refit = refit
        self.verbose = verbose
        self.error_score = error_score
        self.return_train_score = return_train_score
        self.use_gpu = use_gpu
Пример #24
0
def hparams(algorithm, scheduler, num_samples, tensorboard, bare):
    from glob import glob

    import tensorflow.summary
    from tensorflow import random as tfrandom, int64 as tfint64
    from ray import init as init_ray, shutdown as shutdown_ray
    from ray import tune
    from wandb.ray import WandbLogger
    from wandb import sweep as wandbsweep
    from wandb.apis import CommError as wandbCommError

    # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency)
    # TODO: use tf.summary.record_if?
    environ["MLENCRYPT_TB"] = str(tensorboard).upper()
    environ["MLENCRYPT_BARE"] = str(bare).upper()
    if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \
            getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE':
        raise ValueError('TensorBoard logging cannot be enabled in bare mode.')

    logdir = f'logs/hparams/{datetime.now()}'

    # "These results show that K = 3 is the optimal choice for the
    # cryptographic application of neural synchronization. K = 1 and K = 2 are
    # too insecure in regard to the geometric attack. And for K > 3 the effort
    # of A and B grows exponentially with increasing L, while the simple attack
    # is quite successful in the limit K -> infinity. Consequently, one should
    # only use Tree Parity Machines with three hidden units for the neural
    # key-exchange protocol." (Ruttor, 2006)
    # https://arxiv.org/pdf/0711.2411.pdf#page=59

    update_rules = [
        'random-same',
        # 'random-different-A-B-E', 'random-different-A-B',
        'hebbian',
        'anti_hebbian',
        'random_walk'
    ]
    K_bounds = {'min': 4, 'max': 8}
    N_bounds = {'min': 4, 'max': 8}
    L_bounds = {'min': 4, 'max': 8}

    # TODO: don't use *_bounds.values() since .values doesn't preserve order

    def get_session_num(logdir):
        current_runs = glob(join(logdir, "run-*"))
        if current_runs:
            last_run_path = current_runs[-1]
            last_run_session_num = int(last_run_path.split('-')[-1])
            return last_run_session_num + 1
        else:  # there are no runs yet, start at 0
            return 0

    def trainable(config, reporter):
        """
        Args:
            config (dict): Parameters provided from the search algorithm
                or variant generation.
        """
        if not isinstance(config['update_rule'], str):
            update_rule = update_rules[int(config['update_rule'])]
        else:
            update_rule = config['update_rule']
        K, N, L = int(config['K']), int(config['N']), int(config['L'])

        run_name = f"run-{get_session_num(logdir)}"
        run_logdir = join(logdir, run_name)
        # for each attack, the TPMs should start with the same weights
        initial_weights_tensors = get_initial_weights(K, N, L)
        training_steps_ls = {}
        eve_scores_ls = {}
        losses_ls = {}
        # for each attack, the TPMs should use the same inputs
        seed = tfrandom.uniform([],
                                minval=0,
                                maxval=tfint64.max,
                                dtype=tfint64).numpy()
        for attack in ['none', 'geometric']:
            initial_weights = {
                tpm: weights_tensor_to_variable(weights, tpm)
                for tpm, weights in initial_weights_tensors.items()
            }
            tfrandom.set_seed(seed)

            if tensorboard:
                attack_logdir = join(run_logdir, attack)
                attack_writer = tensorflow.summary.create_file_writer(
                    attack_logdir)
                with attack_writer.as_default():
                    training_steps, sync_scores, loss = run(
                        update_rule, K, N, L, attack, initial_weights)
            else:
                training_steps, sync_scores, loss = run(
                    update_rule, K, N, L, attack, initial_weights)
            training_steps_ls[attack] = training_steps
            eve_scores_ls[attack] = sync_scores
            losses_ls[attack] = loss
        avg_training_steps = tensorflow.math.reduce_mean(
            list(training_steps_ls.values()))
        avg_eve_score = tensorflow.math.reduce_mean(
            list(eve_scores_ls.values()))
        mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values()))
        reporter(
            avg_training_steps=avg_training_steps.numpy(),
            avg_eve_score=avg_eve_score.numpy(),
            mean_loss=mean_loss.numpy(),
            done=True,
        )

    if algorithm == 'hyperopt':
        from hyperopt import hp as hyperopt
        from hyperopt.pyll.base import scope
        from ray.tune.suggest.hyperopt import HyperOptSearch

        space = {
            'update_rule': hyperopt.choice(
                'update_rule',
                update_rules,
            ),
            'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)),
            'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)),
            'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)),
        }
        algo = HyperOptSearch(
            space,
            metric='mean_loss',
            mode='min',
            points_to_evaluate=[
                {
                    'update_rule': 0,
                    'K': 3,
                    'N': 16,
                    'L': 8
                },
                {
                    'update_rule': 0,
                    'K': 8,
                    'N': 16,
                    'L': 8
                },
                {
                    'update_rule': 0,
                    'K': 8,
                    'N': 16,
                    'L': 128
                },
            ],
        )
    elif algorithm == 'bayesopt':
        from ray.tune.suggest.bayesopt import BayesOptSearch

        space = {
            'update_rule': (0, len(update_rules)),
            'K': tuple(K_bounds.values()),
            'N': tuple(N_bounds.values()),
            'L': tuple(L_bounds.values()),
        }
        algo = BayesOptSearch(
            space,
            metric="mean_loss",
            mode="min",
            # TODO: what is utility_kwargs for and why is it needed?
            utility_kwargs={
                "kind": "ucb",
                "kappa": 2.5,
                "xi": 0.0
            })
    elif algorithm == 'nevergrad':
        from ray.tune.suggest.nevergrad import NevergradSearch
        from nevergrad import optimizers
        from nevergrad import p as ngp

        algo = NevergradSearch(
            optimizers.TwoPointsDE(
                ngp.Instrumentation(
                    update_rule=ngp.Choice(update_rules),
                    K=ngp.Scalar(lower=K_bounds['min'],
                                 upper=K_bounds['max']).set_integer_casting(),
                    N=ngp.Scalar(lower=N_bounds['min'],
                                 upper=N_bounds['max']).set_integer_casting(),
                    L=ngp.Scalar(lower=L_bounds['min'],
                                 upper=L_bounds['max']).set_integer_casting(),
                )),
            None,  # since the optimizer is already instrumented with kwargs
            metric="mean_loss",
            mode="min")
    elif algorithm == 'skopt':
        from skopt import Optimizer
        from ray.tune.suggest.skopt import SkOptSearch

        optimizer = Optimizer([
            update_rules,
            tuple(K_bounds.values()),
            tuple(N_bounds.values()),
            tuple(L_bounds.values())
        ])
        algo = SkOptSearch(
            optimizer,
            ["update_rule", "K", "N", "L"],
            metric="mean_loss",
            mode="min",
            points_to_evaluate=[
                ['random-same', 3, 16, 8],
                ['random-same', 8, 16, 8],
                ['random-same', 8, 16, 128],
            ],
        )
    elif algorithm == 'dragonfly':
        # TODO: doesn't work
        from ray.tune.suggest.dragonfly import DragonflySearch
        from dragonfly.exd.experiment_caller import EuclideanFunctionCaller
        from dragonfly.opt.gp_bandit import EuclideanGPBandit
        # from dragonfly.exd.experiment_caller import CPFunctionCaller
        # from dragonfly.opt.gp_bandit import CPGPBandit
        from dragonfly import load_config

        domain_config = load_config({
            "domain": [
                {
                    "name": "update_rule",
                    "type": "discrete",
                    "dim": 1,
                    "items": update_rules
                },
                {
                    "name": "K",
                    "type": "int",
                    "min": K_bounds['min'],
                    "max": K_bounds['max'],
                    # "dim": 1
                },
                {
                    "name": "N",
                    "type": "int",
                    "min": N_bounds['min'],
                    "max": N_bounds['max'],
                    # "dim": 1
                },
                {
                    "name": "L",
                    "type": "int",
                    "min": L_bounds['min'],
                    "max": L_bounds['max'],
                    # "dim": 1
                }
            ]
        })
        func_caller = EuclideanFunctionCaller(
            None, domain_config.domain.list_of_domains[0])
        optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True)
        algo = DragonflySearch(
            optimizer,
            metric="mean_loss",
            mode="min",
            points_to_evaluate=[
                ['random-same', 3, 16, 8],
                ['random-same', 8, 16, 8],
                ['random-same', 8, 16, 128],
            ],
        )
    elif algorithm == 'bohb':
        from ConfigSpace import ConfigurationSpace
        from ConfigSpace import hyperparameters as CSH
        from ray.tune.suggest.bohb import TuneBOHB

        config_space = ConfigurationSpace()
        config_space.add_hyperparameter(
            CSH.CategoricalHyperparameter("update_rule", choices=update_rules))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='K',
                                             lower=K_bounds['min'],
                                             upper=K_bounds['max']))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='N',
                                             lower=N_bounds['min'],
                                             upper=N_bounds['max']))
        config_space.add_hyperparameter(
            CSH.UniformIntegerHyperparameter(name='L',
                                             lower=L_bounds['min'],
                                             upper=L_bounds['max']))
        algo = TuneBOHB(config_space, metric="mean_loss", mode="min")
    elif algorithm == 'zoopt':
        from ray.tune.suggest.zoopt import ZOOptSearch
        from zoopt import ValueType

        space = {
            "update_rule":
            (ValueType.DISCRETE, range(0, len(update_rules)), False),
            "K": (ValueType.DISCRETE,
                  range(K_bounds['min'], K_bounds['max'] + 1), True),
            "N": (ValueType.DISCRETE,
                  range(N_bounds['min'], N_bounds['max'] + 1), True),
            "L": (ValueType.DISCRETE,
                  range(L_bounds['min'], L_bounds['max'] + 1), True),
        }
        # TODO: change budget to a large value
        algo = ZOOptSearch(budget=10,
                           dim_dict=space,
                           metric="mean_loss",
                           mode="min")

    # TODO: use more appropriate arguments for schedulers:
    # https://docs.ray.io/en/master/tune/api_docs/schedulers.html
    if scheduler == 'fifo':
        sched = None  # Tune defaults to FIFO
    elif scheduler == 'pbt':
        from ray.tune.schedulers import PopulationBasedTraining
        from random import randint
        sched = PopulationBasedTraining(
            metric="mean_loss",
            mode="min",
            hyperparam_mutations={
                "update_rule": update_rules,
                "K": lambda: randint(K_bounds['min'], K_bounds['max']),
                "N": lambda: randint(N_bounds['min'], N_bounds['max']),
                "L": lambda: randint(L_bounds['min'], L_bounds['max']),
            })
    elif scheduler == 'ahb' or scheduler == 'asha':
        # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler
        from ray.tune.schedulers import AsyncHyperBandScheduler
        sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
    elif scheduler == 'hb':
        from ray.tune.schedulers import HyperBandScheduler
        sched = HyperBandScheduler(metric="mean_loss", mode="min")
    elif algorithm == 'bohb' or scheduler == 'bohb':
        from ray.tune.schedulers import HyperBandForBOHB
        sched = HyperBandForBOHB(metric="mean_loss", mode="min")
    elif scheduler == 'msr':
        from ray.tune.schedulers import MedianStoppingRule
        sched = MedianStoppingRule(metric="mean_loss", mode="min")
    init_ray(
        address=getenv("ip_head"),
        redis_password=getenv('redis_password'),
    )
    analysis = tune.run(
        trainable,
        name='mlencrypt_research',
        config={
            "monitor": True,
            "env_config": {
                "wandb": {
                    "project": "mlencrypt-research",
                    "sync_tensorboard": True,
                },
            },
        },
        # resources_per_trial={"cpu": 1, "gpu": 3},
        local_dir='./ray_results',
        export_formats=['csv'],  # TODO: add other formats?
        num_samples=num_samples,
        loggers=[
            tune.logger.JsonLogger, tune.logger.CSVLogger,
            tune.logger.TBXLogger, WandbLogger
        ],
        search_alg=algo,
        scheduler=sched,
        queue_trials=True,
    )
    try:
        wandbsweep(analysis)
    except wandbCommError:
        # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility
        pass
    best_config = analysis.get_best_config(metric='mean_loss', mode='min')
    print(f"Best config: {best_config}")
    shutdown_ray()