def testMedianStoppingOnCompleteOnly(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1) t1, t2 = self.basicSetup(rule) self.assertEqual( rule.on_trial_result(None, t2, result(100, 0)), TrialScheduler.CONTINUE) rule.on_trial_complete(None, t1, result(10, 1000)) self.assertEqual( rule.on_trial_result(None, t2, result(101, 0)), TrialScheduler.STOP)
def testMedianStoppingUsesMedian(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1) t1, t2 = self.basicSetup(rule) rule.on_trial_complete(None, t1, result(10, 1000)) rule.on_trial_complete(None, t2, result(10, 1000)) t3 = Trial("PPO") self.assertEqual( rule.on_trial_result(None, t3, result(1, 260)), TrialScheduler.CONTINUE) self.assertEqual( rule.on_trial_result(None, t3, result(2, 260)), TrialScheduler.STOP)
def testAlternateMetrics(self): def result2(t, rew): return dict(training_iteration=t, neg_mean_loss=rew) rule = MedianStoppingRule( grace_period=0, min_samples_required=1, time_attr='training_iteration', reward_attr='neg_mean_loss') t1 = Trial("PPO") # mean is 450, max 900, t_max=10 t2 = Trial("PPO") # mean is 450, max 450, t_max=5 for i in range(10): self.assertEqual( rule.on_trial_result(None, t1, result2(i, i * 100)), TrialScheduler.CONTINUE) for i in range(5): self.assertEqual( rule.on_trial_result(None, t2, result2(i, 450)), TrialScheduler.CONTINUE) rule.on_trial_complete(None, t1, result2(10, 1000)) self.assertEqual( rule.on_trial_result(None, t2, result2(5, 450)), TrialScheduler.CONTINUE) self.assertEqual( rule.on_trial_result(None, t2, result2(6, 0)), TrialScheduler.CONTINUE)
def testMedianStoppingConstantPerf(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1) t1, t2 = self.basicSetup(rule) rule.on_trial_complete(None, t1, result(10, 1000)) self.assertEqual(rule.on_trial_result(None, t2, result(5, 450)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t2, result(6, 0)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t2, result(10, 450)), TrialScheduler.STOP)
def testMedianStoppingMinSamples(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=2) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) t3 = Trial("PPO") # Insufficient samples to evaluate t3 self.assertEqual( rule.on_trial_result(runner, t3, result(5, 10)), TrialScheduler.CONTINUE) rule.on_trial_complete(runner, t2, result(5, 1000)) # Sufficient samples to evaluate t3 self.assertEqual( rule.on_trial_result(runner, t3, result(5, 10)), TrialScheduler.STOP)
def testMedianStoppingGracePeriod(self): rule = MedianStoppingRule(grace_period=2.5, min_samples_required=1) t1, t2 = self.basicSetup(rule) rule.on_trial_complete(None, t1, result(10, 1000)) rule.on_trial_complete(None, t2, result(10, 1000)) t3 = Trial("PPO") self.assertEqual(rule.on_trial_result(None, t3, result(1, 10)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t3, result(2, 10)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t3, result(3, 10)), TrialScheduler.STOP)
def testMedianStoppingOnCompleteOnly(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() self.assertEqual(rule.on_trial_result(runner, t2, result(100, 0)), TrialScheduler.CONTINUE) rule.on_trial_complete(runner, t1, result(101, 1000)) self.assertEqual(rule.on_trial_result(runner, t2, result(101, 0)), TrialScheduler.STOP)
def test_grid_search_no_score(self): # Test grid-search on classifier that has no score function. clf = LinearSVC(random_state=0) X, y = make_blobs(random_state=0, centers=2) Cs = [0.1, 1, 10] clf_no_score = LinearSVCNoScore(random_state=0) # XXX: It seems there's some global shared state in LinearSVC - fitting # multiple `SVC` instances in parallel using threads sometimes results in # wrong results. This only happens with threads, not processes/sync. # For now, we'll fit using the sync scheduler. grid_search = tcv.TuneGridSearchCV(clf, {"C": Cs}, scoring="accuracy", scheduler=MedianStoppingRule()) grid_search.fit(X, y) grid_search_no_score = tcv.TuneGridSearchCV( clf_no_score, {"C": Cs}, scoring="accuracy", scheduler=MedianStoppingRule()) # smoketest grid search grid_search_no_score.fit(X, y) # check that best params are equal self.assertEqual(grid_search_no_score.best_params, grid_search.best_params_) # check that we can call score and that it gives the correct result self.assertEqual(grid_search.score(X, y), grid_search_no_score.score(X, y)) # giving no scoring function raises an error grid_search_no_score = tcv.TuneGridSearchCV(clf_no_score, {"C": Cs}) with self.assertRaises(TypeError) as exc: grid_search_no_score.fit([[1]]) self.assertTrue("no scoring" in str(exc.exception))
def testMedianStoppingUsesMedian(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) rule.on_trial_complete(runner, t2, result(10, 1000)) t3 = Trial("PPO") self.assertEqual(rule.on_trial_result(runner, t3, result(1, 260)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(runner, t3, result(2, 260)), TrialScheduler.STOP)
def run_experiment(config, trainable): """Run a single tune experiment in parallel as a "remote" function. :param config: The experiment configuration :type config: dict :param trainable: tune.Trainable class with your experiment :type trainable: :class:`ray.tune.Trainable` """ # Stop criteria. Default to total number of iterations/epochs stop_criteria = {"training_iteration": config.get("iterations")} stop_criteria.update(config.get("stop", {})) tune.run( trainable, name=config["name"], local_dir=config["path"], stop=stop_criteria, config=config, num_samples=config.get("repetitions", 1), search_alg=config.get("search_alg", None), scheduler=config.get( "scheduler", MedianStoppingRule( time_attr="training_iteration", reward_attr="noise_accuracy", min_samples_required=3, grace_period=20, verbose=False, ), ), trial_name_creator=tune.function(trial_name_string), trial_executor=config.get("trial_executor", None), checkpoint_at_end=config.get("checkpoint_at_end", False), checkpoint_freq=config.get("checkpoint_freq", 0), upload_dir=config.get("upload_dir", None), sync_function=config.get("sync_function", None), resume=config.get("resume", False), reuse_actors=config.get("reuse_actors", False), verbose=config.get("verbose", 0), resources_per_trial={ # With lots of trials, optimal seems to be 0.5, or 2 trials per GPU # If num trials <= num GPUs, 1.0 is better "cpu": 1, "gpu": config.get("gpu_percentage", 0.5), }, )
def testMedianStoppingSoftStop(self): rule = MedianStoppingRule(grace_period=0, min_samples_required=1, hard_stop=False) t1, t2 = self.basicSetup(rule) rule.on_trial_complete(None, t1, result(10, 1000)) rule.on_trial_complete(None, t2, result(10, 1000)) t3 = Trial("PPO") self.assertEqual(rule.on_trial_result(None, t3, result(1, 260)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t3, result(2, 260)), TrialScheduler.PAUSE)
def main(): space = { "batch_size": hp.choice("batch_size", [32, 64, 128]), "learning_rate": hp.choice("learning_rate", [0.01, 0.001, 0.0005]), "target_update": hp.choice("target_update", [4, 10, 100]), } hyperopt_search = HyperOptSearch(space, metric="mean_reward", mode="max") analysis = tune.run(Trainable, stop={'training_iteration': MAX_TRAINING_ITERATION}, num_samples=10, scheduler=MedianStoppingRule(metric="mean_reward", mode="max"), search_alg=hyperopt_search, local_dir=TUNE_RESULTS_FOLDER, progress_reporter=reporter, checkpoint_freq=1, verbose=1)
def train(): # NOTE: Can use ray through joblib, but the feature is not included in the pip install'd code (it is ~2 weeks old) # see here: https://ray.readthedocs.io/en/latest/joblib.html # and pip install the nightly wheel from here: https://ray.readthedocs.io/en/latest/installation.html n_iter = 10 cv = 5 label_drawing_tuples = load_drawings(storage_location='normalized') if not label_drawing_tuples: raise ValueError("No training drawings found in db") labels, drawings = zip(*label_drawing_tuples) drawings = np.asarray(drawings) labels = np.asarray(labels) print(f"drawings.shape = {drawings.shape}") print(f"labels.shape = {labels.shape}") pipeline_nt = namedtuple('pipeline', ['pipeline', 'parameter_space']) pipelines = { 'svm': pipeline_nt(svm_pipeline(), svm_parameter_space()), } for name, pipeline_tuple in pipelines.items(): # rs = RandomizedSearchCV(case.pipeline, case.parameter_space, n_iter=n_iter, n_jobs=-1, refit=False, cv=cv) print(f"running {name}") temp_params = { 'svm__gamma': [0.0001, 0.001], 'svm__C': [1, 10], } tune_search = TuneGridSearchCV(pipeline_tuple.pipeline, temp_params, scheduler=MedianStoppingRule()) stuff = tune_search.fit(drawings, labels) pred = tune_search.predict(drawings) correct = 0 for i in range(len(pred)): if pred[i] == labels[i]: correct += 1 print(correct / len(pred)) print(tune_search.cv_results_)
def _test_metrics(self, result_func, metric, mode): rule = MedianStoppingRule(grace_period=0, min_samples_required=1, time_attr="training_iteration", metric=metric, mode=mode) t1 = Trial("PPO") # mean is 450, max 900, t_max=10 t2 = Trial("PPO") # mean is 450, max 450, t_max=5 for i in range(10): self.assertEqual( rule.on_trial_result(None, t1, result_func(i, i * 100)), TrialScheduler.CONTINUE) for i in range(5): self.assertEqual( rule.on_trial_result(None, t2, result_func(i, 450)), TrialScheduler.CONTINUE) rule.on_trial_complete(None, t1, result_func(10, 1000)) self.assertEqual(rule.on_trial_result(None, t2, result_func(5, 450)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t2, result_func(6, 0)), TrialScheduler.CONTINUE)
def testAlternateMetrics(self): def result2(t, rew): return dict(training_iteration=t, neg_mean_loss=rew) rule = MedianStoppingRule(grace_period=0, min_samples_required=1, time_attr='training_iteration', reward_attr='neg_mean_loss') t1 = Trial("PPO") # mean is 450, max 900, t_max=10 t2 = Trial("PPO") # mean is 450, max 450, t_max=5 for i in range(10): self.assertEqual( rule.on_trial_result(None, t1, result2(i, i * 100)), TrialScheduler.CONTINUE) for i in range(5): self.assertEqual(rule.on_trial_result(None, t2, result2(i, 450)), TrialScheduler.CONTINUE) rule.on_trial_complete(None, t1, result2(10, 1000)) self.assertEqual(rule.on_trial_result(None, t2, result2(5, 450)), TrialScheduler.CONTINUE) self.assertEqual(rule.on_trial_result(None, t2, result2(6, 0)), TrialScheduler.CONTINUE)
def test_local_dir(self): digits = datasets.load_digits() x = digits.data y = digits.target clf = SGDClassifier() parameter_grid = { "alpha": Real(1e-4, 1e-1, 1), "epsilon": Real(0.01, 0.1) } scheduler = MedianStoppingRule(grace_period=10.0) tune_search = TuneSearchCV(clf, parameter_grid, early_stopping=scheduler, max_iters=10, local_dir="./test-result") tune_search.fit(x, y) self.assertTrue(len(os.listdir("./test-result")) != 0)
def test_diabetes(self): # load the diabetes datasets dataset = datasets.load_diabetes() X = dataset.data y = dataset.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) # prepare a range of alpha values to test alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0]) param_grid = dict(alpha=alphas) # create and fit a ridge regression model, testing each alpha model = linear_model.Ridge() tune_search = tcv.TuneGridSearchCV(model, param_grid, MedianStoppingRule()) tune_search.fit(X_train, y_train) pred = tune_search.predict(X_test) print(pred) error = sum(np.array(pred) - np.array(y_test)) / len(pred) print(error)
def hyper_parameter(task=None, model_name=None, dataset_name=None, config_file=None, space_file=None, scheduler=None, search_alg=None, other_args=None, num_samples=5, max_concurrent=1, cpu_per_trial=1, gpu_per_trial=1): """ Use Ray tune to hyper parameter tune Args: task(str): task name model_name(str): model name dataset_name(str): dataset name config_file(str): config filename used to modify the pipeline's settings. the config file should be json. space_file(str): the file which specifies the parameter search space scheduler(str): the trial sheduler which will be used in ray.tune.run search_alg(str): the search algorithm other_args(dict): the rest parameter args, which will be pass to the Config """ # load config experiment_config = ConfigParser(task, model_name, dataset_name, config_file=config_file, other_args=other_args) # logger logger = get_logger(experiment_config) logger.info(experiment_config.config) # check space_file if space_file is None: logger.error( 'the space_file should not be None when hyperparameter tune.') exit(0) # seed seed = experiment_config.get('seed', 0) set_random_seed(seed) # parse space_file search_sapce = parse_search_space(space_file) # load dataset dataset = get_dataset(experiment_config) # get train valid test data train_data, valid_data, test_data = dataset.get_data() data_feature = dataset.get_data_feature() def train(config, checkpoint_dir=None, experiment_config=None, train_data=None, valid_data=None, data_feature=None): """trainable function which meets ray tune API Args: config (dict): A dict of hyperparameter. """ # modify experiment_config for key in config: if key in experiment_config: experiment_config[key] = config[key] experiment_config['hyper_tune'] = True logger = get_logger(experiment_config) logger.info( 'Begin pipeline, task={}, model_name={}, dataset_name={}'.format( str(task), str(model_name), str(dataset_name))) logger.info('running parameters: ' + str(config)) # load model model = get_model(experiment_config, data_feature) # load executor executor = get_executor(experiment_config, model, data_feature) # checkpoint by ray tune if checkpoint_dir: checkpoint = os.path.join(checkpoint_dir, 'checkpoint') executor.load_model(checkpoint) # train executor.train(train_data, valid_data) # init search algorithm and scheduler if search_alg == 'BasicSearch': algorithm = BasicVariantGenerator() elif search_alg == 'BayesOptSearch': algorithm = BayesOptSearch(metric='loss', mode='min') # add concurrency limit algorithm = ConcurrencyLimiter(algorithm, max_concurrent=max_concurrent) elif search_alg == 'HyperOpt': algorithm = HyperOptSearch(metric='loss', mode='min') # add concurrency limit algorithm = ConcurrencyLimiter(algorithm, max_concurrent=max_concurrent) else: raise ValueError('the search_alg is illegal.') if scheduler == 'FIFO': tune_scheduler = FIFOScheduler() elif scheduler == 'ASHA': tune_scheduler = ASHAScheduler() elif scheduler == 'MedianStoppingRule': tune_scheduler = MedianStoppingRule() else: raise ValueError('the scheduler is illegal') # ray tune run ensure_dir('./libcity/cache/hyper_tune') result = tune.run(tune.with_parameters(train, experiment_config=experiment_config, train_data=train_data, valid_data=valid_data, data_feature=data_feature), resources_per_trial={ 'cpu': cpu_per_trial, 'gpu': gpu_per_trial }, config=search_sapce, metric='loss', mode='min', scheduler=tune_scheduler, search_alg=algorithm, local_dir='./libcity/cache/hyper_tune', num_samples=num_samples) best_trial = result.get_best_trial("loss", "min", "last") logger.info("Best trial config: {}".format(best_trial.config)) logger.info("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) # save best best_path = os.path.join(best_trial.checkpoint.value, "checkpoint") model_state, optimizer_state = torch.load(best_path) model_cache_file = './libcity/cache/model_cache/{}_{}.m'.format( model_name, dataset_name) ensure_dir('./libcity/cache/model_cache') torch.save((model_state, optimizer_state), model_cache_file)
from tune_sklearn import TuneSearchCV from sklearn.linear_model import SGDClassifier from sklearn import datasets from sklearn.model_selection import train_test_split from ray.tune.schedulers import MedianStoppingRule import numpy as np digits = datasets.load_digits() x = digits.data y = digits.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2) clf = SGDClassifier() parameter_grid = {"alpha": (1e-4, 1), "epsilon": (0.01, 0.1)} scheduler = MedianStoppingRule(grace_period=10.0) tune_search = TuneSearchCV(clf, parameter_grid, search_optimization="bayesian", n_iter=3, early_stopping=scheduler, max_iters=10) tune_search.fit(x_train, y_train) pred = tune_search.predict(x_test) accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred) print(accuracy)
if __name__ == "__main__": ray.init(num_gpus=8, num_cpus=32) config = { 'network': tune.grid_search(['LSTM']), # 用什么神经网络 (还可以尝试 TCN ) 'loss_func': tune.grid_search(([1, 2, 3, 4, 5, 6])), # 用什么 loss 函数 'batch_size': 100, # 每次抽取多少条训练 'lr': tune.loguniform(1e-6, 1e-1), 'hidden_size': tune.grid_search([128]), # 神经网络 隐藏层单元数 'hidden_size2': tune.grid_search([16, 64]) } tune.run(Train, config=config, resources_per_trial={ "cpu": 1, "gpu": 0.3 }, local_dir=os.path.abspath( os.path.join(__file__, os.pardir, 'TRY_ALL')), verbose=1, stop={"training_iteration": 2000}, num_samples=10, checkpoint_freq=200, checkpoint_at_end=True, scheduler=MedianStoppingRule(time_attr='training_iteration', metric='acc', mode='max', grace_period=200))
'Search hyper-parameters for %s based on configuration at %s', set_path, conf_path) exp_name = '%s-%s' % (setname, base_conf.model.experiment_name) if args.trial_scheduler == 'hyperband': scheduler = AsyncHyperBandScheduler(time_attr='progress', metric='correct', mode='max', max_t=args.epoch, grace_period=args.epoch / 10, reduction_factor=2, brackets=4) elif args.trial_scheduler == 'median': scheduler = MedianStoppingRule(time_attr='progress', metric='correct', grace_period=args.epoch / 10, min_time_slice=args.epoch / 2) elif args.trial_scheduler == 'pbt': scheduler = PopulationBasedTraining( time_attr='progress', metric='correct', mode='max', perturbation_interval=args.epoch / 4, hyperparam_mutations=perturb_space) else: scheduler = None logger.info('Trial scheduler used: %s', str(scheduler)) analysis = tune.run(RayTrainer, name=exp_name, config=sampling_space,
def __init__(self, estimator, early_stopping=None, scoring=None, n_jobs=None, cv=5, refit=True, verbose=0, error_score="raise", return_train_score=False, max_iters=10, use_gpu=False): self.estimator = estimator if early_stopping and self._can_early_stop(): self.max_iters = max_iters if early_stopping is True: # Override the early_stopping variable so # that it is resolved appropriately in # the next block early_stopping = "AsyncHyperBandScheduler" # Resolve the early stopping object if isinstance(early_stopping, str): if early_stopping in TuneBaseSearchCV.defined_schedulers: if early_stopping == "PopulationBasedTraining": self.early_stopping = PopulationBasedTraining( metric="average_test_score") elif early_stopping == "AsyncHyperBandScheduler": self.early_stopping = AsyncHyperBandScheduler( metric="average_test_score") elif early_stopping == "HyperBandScheduler": self.early_stopping = HyperBandScheduler( metric="average_test_score") elif early_stopping == "MedianStoppingRule": self.early_stopping = MedianStoppingRule( metric="average_test_score") elif early_stopping == "ASHAScheduler": self.early_stopping = ASHAScheduler( metric="average_test_score") else: raise ValueError( "{} is not a defined scheduler. " "Check the list of available schedulers.".format( early_stopping)) elif isinstance(early_stopping, TrialScheduler): self.early_stopping = early_stopping self.early_stopping.metric = "average_test_score" else: raise TypeError("`early_stopping` must be a str, boolean, " "or tune scheduler") elif not early_stopping: warnings.warn("Early stopping is not enabled. " "To enable early stopping, pass in a supported " "scheduler from Tune and ensure the estimator " "has `partial_fit`.") self.max_iters = 1 self.early_stopping = None else: raise ValueError("Early stopping is not supported because " "the estimator does not have `partial_fit`") self.cv = cv self.scoring = scoring self.n_jobs = n_jobs self.refit = refit self.verbose = verbose self.error_score = error_score self.return_train_score = return_train_score self.use_gpu = use_gpu
def __init__(self, estimator, early_stopping=None, scoring=None, n_jobs=None, cv=5, refit=True, verbose=0, error_score="raise", return_train_score=False, max_iters=10, use_gpu=False): self.estimator = estimator if early_stopping is not None and self._can_early_stop(): self.max_iters = max_iters if isinstance(early_stopping, str): if early_stopping in TuneBaseSearchCV.defined_schedulers: if early_stopping == "PopulationBasedTraining": self.early_stopping = PopulationBasedTraining( metric="average_test_score") elif early_stopping == "AsyncHyperBandScheduler": self.early_stopping = AsyncHyperBandScheduler( metric="average_test_score") elif early_stopping == "HyperBandScheduler": self.early_stopping = HyperBandScheduler( metric="average_test_score") elif early_stopping == "HyperBandForBOHB": self.early_stopping = HyperBandForBOHB( metric="average_test_score") elif early_stopping == "MedianStoppingRule": self.early_stopping = MedianStoppingRule( metric="average_test_score") elif early_stopping == "ASHAScheduler": self.early_stopping = ASHAScheduler( metric="average_test_score") else: raise ValueError( "{} is not a defined scheduler. " "Check the list of available schedulers.".format( early_stopping)) elif isinstance(early_stopping, TrialScheduler): self.early_stopping = early_stopping self.early_stopping.metric = "average_test_score" else: raise TypeError("Scheduler must be a str or tune scheduler") else: warnings.warn("Early stopping is not enabled. " "To enable early stopping, pass in a supported " "scheduler from Tune and ensure the estimator " "has `partial_fit`.") self.max_iters = 1 self.early_stopping = None self.cv = cv self.scoring = scoring self.n_jobs = n_jobs self.refit = refit self.verbose = verbose self.error_score = error_score self.return_train_score = return_train_score self.use_gpu = use_gpu
def hparams(algorithm, scheduler, num_samples, tensorboard, bare): from glob import glob import tensorflow.summary from tensorflow import random as tfrandom, int64 as tfint64 from ray import init as init_ray, shutdown as shutdown_ray from ray import tune from wandb.ray import WandbLogger from wandb import sweep as wandbsweep from wandb.apis import CommError as wandbCommError # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency) # TODO: use tf.summary.record_if? environ["MLENCRYPT_TB"] = str(tensorboard).upper() environ["MLENCRYPT_BARE"] = str(bare).upper() if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \ getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE': raise ValueError('TensorBoard logging cannot be enabled in bare mode.') logdir = f'logs/hparams/{datetime.now()}' # "These results show that K = 3 is the optimal choice for the # cryptographic application of neural synchronization. K = 1 and K = 2 are # too insecure in regard to the geometric attack. And for K > 3 the effort # of A and B grows exponentially with increasing L, while the simple attack # is quite successful in the limit K -> infinity. Consequently, one should # only use Tree Parity Machines with three hidden units for the neural # key-exchange protocol." (Ruttor, 2006) # https://arxiv.org/pdf/0711.2411.pdf#page=59 update_rules = [ 'random-same', # 'random-different-A-B-E', 'random-different-A-B', 'hebbian', 'anti_hebbian', 'random_walk' ] K_bounds = {'min': 4, 'max': 8} N_bounds = {'min': 4, 'max': 8} L_bounds = {'min': 4, 'max': 8} # TODO: don't use *_bounds.values() since .values doesn't preserve order def get_session_num(logdir): current_runs = glob(join(logdir, "run-*")) if current_runs: last_run_path = current_runs[-1] last_run_session_num = int(last_run_path.split('-')[-1]) return last_run_session_num + 1 else: # there are no runs yet, start at 0 return 0 def trainable(config, reporter): """ Args: config (dict): Parameters provided from the search algorithm or variant generation. """ if not isinstance(config['update_rule'], str): update_rule = update_rules[int(config['update_rule'])] else: update_rule = config['update_rule'] K, N, L = int(config['K']), int(config['N']), int(config['L']) run_name = f"run-{get_session_num(logdir)}" run_logdir = join(logdir, run_name) # for each attack, the TPMs should start with the same weights initial_weights_tensors = get_initial_weights(K, N, L) training_steps_ls = {} eve_scores_ls = {} losses_ls = {} # for each attack, the TPMs should use the same inputs seed = tfrandom.uniform([], minval=0, maxval=tfint64.max, dtype=tfint64).numpy() for attack in ['none', 'geometric']: initial_weights = { tpm: weights_tensor_to_variable(weights, tpm) for tpm, weights in initial_weights_tensors.items() } tfrandom.set_seed(seed) if tensorboard: attack_logdir = join(run_logdir, attack) attack_writer = tensorflow.summary.create_file_writer( attack_logdir) with attack_writer.as_default(): training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) else: training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) training_steps_ls[attack] = training_steps eve_scores_ls[attack] = sync_scores losses_ls[attack] = loss avg_training_steps = tensorflow.math.reduce_mean( list(training_steps_ls.values())) avg_eve_score = tensorflow.math.reduce_mean( list(eve_scores_ls.values())) mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values())) reporter( avg_training_steps=avg_training_steps.numpy(), avg_eve_score=avg_eve_score.numpy(), mean_loss=mean_loss.numpy(), done=True, ) if algorithm == 'hyperopt': from hyperopt import hp as hyperopt from hyperopt.pyll.base import scope from ray.tune.suggest.hyperopt import HyperOptSearch space = { 'update_rule': hyperopt.choice( 'update_rule', update_rules, ), 'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)), 'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)), 'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)), } algo = HyperOptSearch( space, metric='mean_loss', mode='min', points_to_evaluate=[ { 'update_rule': 0, 'K': 3, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 128 }, ], ) elif algorithm == 'bayesopt': from ray.tune.suggest.bayesopt import BayesOptSearch space = { 'update_rule': (0, len(update_rules)), 'K': tuple(K_bounds.values()), 'N': tuple(N_bounds.values()), 'L': tuple(L_bounds.values()), } algo = BayesOptSearch( space, metric="mean_loss", mode="min", # TODO: what is utility_kwargs for and why is it needed? utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) elif algorithm == 'nevergrad': from ray.tune.suggest.nevergrad import NevergradSearch from nevergrad import optimizers from nevergrad import p as ngp algo = NevergradSearch( optimizers.TwoPointsDE( ngp.Instrumentation( update_rule=ngp.Choice(update_rules), K=ngp.Scalar(lower=K_bounds['min'], upper=K_bounds['max']).set_integer_casting(), N=ngp.Scalar(lower=N_bounds['min'], upper=N_bounds['max']).set_integer_casting(), L=ngp.Scalar(lower=L_bounds['min'], upper=L_bounds['max']).set_integer_casting(), )), None, # since the optimizer is already instrumented with kwargs metric="mean_loss", mode="min") elif algorithm == 'skopt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch optimizer = Optimizer([ update_rules, tuple(K_bounds.values()), tuple(N_bounds.values()), tuple(L_bounds.values()) ]) algo = SkOptSearch( optimizer, ["update_rule", "K", "N", "L"], metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'dragonfly': # TODO: doesn't work from ray.tune.suggest.dragonfly import DragonflySearch from dragonfly.exd.experiment_caller import EuclideanFunctionCaller from dragonfly.opt.gp_bandit import EuclideanGPBandit # from dragonfly.exd.experiment_caller import CPFunctionCaller # from dragonfly.opt.gp_bandit import CPGPBandit from dragonfly import load_config domain_config = load_config({ "domain": [ { "name": "update_rule", "type": "discrete", "dim": 1, "items": update_rules }, { "name": "K", "type": "int", "min": K_bounds['min'], "max": K_bounds['max'], # "dim": 1 }, { "name": "N", "type": "int", "min": N_bounds['min'], "max": N_bounds['max'], # "dim": 1 }, { "name": "L", "type": "int", "min": L_bounds['min'], "max": L_bounds['max'], # "dim": 1 } ] }) func_caller = EuclideanFunctionCaller( None, domain_config.domain.list_of_domains[0]) optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) algo = DragonflySearch( optimizer, metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'bohb': from ConfigSpace import ConfigurationSpace from ConfigSpace import hyperparameters as CSH from ray.tune.suggest.bohb import TuneBOHB config_space = ConfigurationSpace() config_space.add_hyperparameter( CSH.CategoricalHyperparameter("update_rule", choices=update_rules)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='K', lower=K_bounds['min'], upper=K_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='N', lower=N_bounds['min'], upper=N_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='L', lower=L_bounds['min'], upper=L_bounds['max'])) algo = TuneBOHB(config_space, metric="mean_loss", mode="min") elif algorithm == 'zoopt': from ray.tune.suggest.zoopt import ZOOptSearch from zoopt import ValueType space = { "update_rule": (ValueType.DISCRETE, range(0, len(update_rules)), False), "K": (ValueType.DISCRETE, range(K_bounds['min'], K_bounds['max'] + 1), True), "N": (ValueType.DISCRETE, range(N_bounds['min'], N_bounds['max'] + 1), True), "L": (ValueType.DISCRETE, range(L_bounds['min'], L_bounds['max'] + 1), True), } # TODO: change budget to a large value algo = ZOOptSearch(budget=10, dim_dict=space, metric="mean_loss", mode="min") # TODO: use more appropriate arguments for schedulers: # https://docs.ray.io/en/master/tune/api_docs/schedulers.html if scheduler == 'fifo': sched = None # Tune defaults to FIFO elif scheduler == 'pbt': from ray.tune.schedulers import PopulationBasedTraining from random import randint sched = PopulationBasedTraining( metric="mean_loss", mode="min", hyperparam_mutations={ "update_rule": update_rules, "K": lambda: randint(K_bounds['min'], K_bounds['max']), "N": lambda: randint(N_bounds['min'], N_bounds['max']), "L": lambda: randint(L_bounds['min'], L_bounds['max']), }) elif scheduler == 'ahb' or scheduler == 'asha': # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler from ray.tune.schedulers import AsyncHyperBandScheduler sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min") elif scheduler == 'hb': from ray.tune.schedulers import HyperBandScheduler sched = HyperBandScheduler(metric="mean_loss", mode="min") elif algorithm == 'bohb' or scheduler == 'bohb': from ray.tune.schedulers import HyperBandForBOHB sched = HyperBandForBOHB(metric="mean_loss", mode="min") elif scheduler == 'msr': from ray.tune.schedulers import MedianStoppingRule sched = MedianStoppingRule(metric="mean_loss", mode="min") init_ray( address=getenv("ip_head"), redis_password=getenv('redis_password'), ) analysis = tune.run( trainable, name='mlencrypt_research', config={ "monitor": True, "env_config": { "wandb": { "project": "mlencrypt-research", "sync_tensorboard": True, }, }, }, # resources_per_trial={"cpu": 1, "gpu": 3}, local_dir='./ray_results', export_formats=['csv'], # TODO: add other formats? num_samples=num_samples, loggers=[ tune.logger.JsonLogger, tune.logger.CSVLogger, tune.logger.TBXLogger, WandbLogger ], search_alg=algo, scheduler=sched, queue_trials=True, ) try: wandbsweep(analysis) except wandbCommError: # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility pass best_config = analysis.get_best_config(metric='mean_loss', mode='min') print(f"Best config: {best_config}") shutdown_ray()