def run(config_file, tunable_id, local_dir): register_trainable(tunable_id, TrainTunable) lm_config = config_util.load(config_file) def easydict_to_dict(config): if isinstance(config, EasyDict): config = dict(config) for key, value in config.items(): if isinstance(value, EasyDict): value = dict(value) easydict_to_dict(value) config[key] = value return config tune_space = easydict_to_dict(lm_config['TUNE_SPACE']) tune_spec = easydict_to_dict(lm_config['TUNE_SPEC']) tune_spec['run'] = tunable_id tune_spec['config'] = {'lm_config': os.path.join(os.getcwd(), config_file)} tune_spec['local_dir'] = local_dir tune_spec['trial_name_creator'] = ray.tune.function(trial_str_creator) # Expecting use of gpus to do parameter search ray.init(num_cpus=multiprocessing.cpu_count() // 2, num_gpus=max(get_num_gpu(), 1)) algo = HyperOptSearch(tune_space, max_concurrent=4, reward_attr="mean_accuracy") scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="mean_accuracy", max_t=200) trials = run_experiments(experiments={'exp_tune': tune_spec}, search_alg=algo, scheduler=scheduler) print("The best result is", get_best_result(trials, metric="mean_accuracy", param='config'))
reward_list = [] searched_dict = {} searched_dict['scores'] = [] searched_dict['policies'] = [] for _ in range(1): # run multiple times. for cv_fold in range(cv_num): name = "search_%s_%s_fold%d_ratio%.1f" % (C.get()['dataset'], C.get()['model']['type'], cv_fold, args.cv_ratio) print(name) register_trainable( name, lambda augs, rpt: eval_tta(copy.deepcopy(copied_c), augs, rpt)) # register_trainable(name, eval_tta) algo = HyperOptSearch(space, max_concurrent=20, reward_attr=reward_attr) exp_config = { paths[cv_fold]: { 'run': name, 'num_samples': 4 if args.smoke_test else args.num_search, 'resources_per_trial': { 'gpu': 1 }, 'stop': { 'training_iteration': args.num_policy }, 'config': { 'dataroot': os.path.abspath(args.dataroot), 'save_path': paths[cv_fold],
import argparse from hyperopt import hp parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init(redirect_output=True) register_trainable("exp", easy_objective) space = { 'width': hp.uniform('width', 0, 20), 'height': hp.uniform('height', -100, 100), 'activation': hp.choice("activation", ["relu", "tanh"]) } config = { "my_exp": { "run": "exp", "num_samples": 10 if args.smoke_test else 1000, "stop": { "training_iteration": 100 }, } } algo = HyperOptSearch(space, max_concurrent=4, reward_attr="neg_mean_loss") scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run_experiments(config, search_alg=algo, scheduler=scheduler)
hp.uniform("momentum", 0, 0.99), 'weight_decay': hp.choice( "weight_decay", np.concatenate((10**-np.random.uniform(1, 5, size=100), [0]))) } current_best_params = [{ "factor": 0.1, "lr": 0.01, "momentum": 0.9, "weight_decay": 0 }] algo2 = HyperOptSearch(space2, max_concurrent=4, reward_attr="neg_mean_loss", points_to_evaluate=current_best_params) algo1 = BayesOptSearch( space1, max_concurrent=10, reward_attr="neg_mean_loss", #reward_attr="mean_accuracy", utility_kwargs={ # "kind": "ucb", # "kappa": 2.5, # "xi": 0.0 "kind": "ei", "kappa": 2.5, "xi": 0.01 },
def search(conf): sw = StopWatch.get() # region conf vars conf_dataset = conf['dataset'] dataroot = conf['dataroot'] redis_ip = conf['redis'] conf_loader = conf['autoaug']['loader'] conf_model = conf['autoaug']['model'] model_type = conf_model['type'] ds_name = conf_dataset['name'] aug = conf_loader['aug'] val_ratio = conf_loader['val_ratio'] epochs = conf_loader['epochs'] val_fold = conf_loader['val_fold'] cv_num = conf_loader['cv_num'] num_policy = conf['autoaug']['num_policy'] num_op = conf['autoaug']['num_op'] num_search = conf['autoaug']['num_search'] num_result_per_cv = conf['autoaug']['num_result_per_cv'] smoke_test = conf['smoke_test'] resume = conf['resume'] # endregion ray.init(redis_address=redis_ip, # allocate all GPUs on local node if cluster is not specified num_gpus=torch.cuda.device_count() if not redis_ip else None) # first train with no aug _train_no_aug(conf) # get values from config num_samples = 4 if smoke_test else num_search logger.info('----- Search Test-Time Augmentation Policies -----') sw.start(tag='search') save_paths = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_fold%d' % (val_ratio, i)) for i in range(cv_num)] copied_c = copy.deepcopy(conf) ops = augment_list(False) space = {} for i in range(num_policy): for j in range(num_op): space['policy_%d_%d' % (i, j)] = hp.choice('policy_%d_%d' % (i, j), list(range(0, len(ops)))) space['prob_%d_%d' % (i, j)] = hp.uniform('prob_%d_ %d' % (i, j), 0.0, 1.0) space['level_%d_%d' % (i, j)] = hp.uniform('level_%d_ %d' % (i, j), 0.0, 1.0) final_policy_set = [] total_computation = 0 reward_attr = 'top1_valid' # top1_valid or minus_loss for _ in range(1): # run multiple times. for val_fold in range(cv_num): name = "search_%s_%s_fold%d_ratio%.1f" % (ds_name, model_type, val_fold, val_ratio) #logger.info(name) register_trainable(name, (lambda augs, rpt: _eval_tta(copy.deepcopy(copied_c), augs, rpt))) algo = HyperOptSearch(space, max_concurrent=4*20, reward_attr=reward_attr) exp_config = { name: { 'run': name, 'num_samples': num_samples, 'resources_per_trial': {'gpu': 1}, 'stop': {'training_iteration': num_policy}, 'config': { 'dataroot': dataroot, 'save_path': save_paths[val_fold], 'val_ratio': val_ratio, 'val_fold': val_fold, 'num_op': num_op, 'num_policy': num_policy }, } } results = run_experiments(exp_config, search_alg=algo, scheduler=None, verbose=0, queue_trials=True, resume=resume, raise_on_failed_trial=False) results = [x for x in results if x.last_result is not None] results = sorted(results, key=lambda x: x.last_result[reward_attr], reverse=True) # calculate computation usage for result in results: total_computation += result.last_result['elapsed_time'] for result in results[:num_result_per_cv]: final_policy = policy_decoder(result.config, num_policy, num_op) logger.info('loss=%.12f top1_valid=%.4f %s' % (result.last_result['minus_loss'], result.last_result['top1_valid'], final_policy)) final_policy = remove_deplicates(final_policy) final_policy_set.extend(final_policy) logger.info(json.dumps(final_policy_set)) logger.info('final_policy=%d' % len(final_policy_set)) logger.info('processed in %.4f secs, gpu hours=%.4f' % (sw.pause('search'), total_computation / 3600.)) logger.info('----- Train with Augmentations model=%s dataset=%s aug=%s ratio(test)=%.1f -----' \ % (model_type, ds_name, aug, val_ratio)) sw.start(tag='train_aug') num_experiments = 5 default_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_default%d' \ % (val_ratio, _)) for _ in range(num_experiments)] augment_path = [_get_model_filepath(ds_name, model_type, 'ratio%.1f_augment%d' \ % (val_ratio, _)) for _ in range(num_experiments)] reqs = [_train_model.remote(copy.deepcopy(copied_c), dataroot, aug, 0.0, 0, save_path=default_path[_], only_eval=True) \ for _ in range(num_experiments)] + \ [_train_model.remote(copy.deepcopy(copied_c), dataroot, final_policy_set, 0.0, 0, save_path=augment_path[_]) \ for _ in range(num_experiments)] tqdm_epoch = tqdm(range(epochs)) is_done = False for epoch in tqdm_epoch: while True: epochs = OrderedDict() for exp_idx in range(num_experiments): try: if os.path.exists(default_path[exp_idx]): latest_ckpt = torch.load(default_path[exp_idx]) epochs['default_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch'] except: pass try: if os.path.exists(augment_path[exp_idx]): latest_ckpt = torch.load(augment_path[exp_idx]) epochs['augment_exp%d' % (exp_idx + 1)] = latest_ckpt['epoch'] except: pass tqdm_epoch.set_postfix(epochs) if len(epochs) == num_experiments*2 and min(epochs.values()) >= epochs: is_done = True if len(epochs) == num_experiments*2 and min(epochs.values()) >= epoch: break time.sleep(10) if is_done: break logger.info('getting results...') final_results = ray.get(reqs) for train_mode in ['default', 'augment']: avg = 0. for _ in range(num_experiments): r_model, r_cv, r_dict = final_results.pop(0) logger.info('[%s] top1_train=%.4f top1_test=%.4f' % (train_mode, r_dict['top1_train'], r_dict['top1_test'])) avg += r_dict['top1_test'] avg /= num_experiments logger.info('[%s] top1_test average=%.4f (#experiments=%d)' % (train_mode, avg, num_experiments)) logger.info('processed in %.4f secs' % sw.pause('train_aug')) logger.info(sw)
"nc": 3, "nz": 100, "ngf": 64, "ndf": 64, "lrD": hp.loguniform('lrD', -8, -1), "lrG": hp.loguniform('lrG', -8, -1), "beta1": hp.uniform('beta1', 0, 1), "beta2": hp.uniform('beta2', 0, 1), "Diters": 5, "noBN": False, "type": hp.choice('type', ["dcgan", "mlp", "resnet"]), } ray.init() algo = HyperOptSearch(space, max_concurrent=4, reward_attr="inception") sched = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="inception", max_t=8, grace_period=2) def train(config, reporter): args.update(config) main(args, reporter) tune.register_trainable("main", train) tune.run_experiments(
def get_best_model(x_train, y_train, **kwargs): y_pred = kwargs['primal_data']['y_pred'] model_name = kwargs['primal_data']['model_name'] fn_name, param_name = get_model_design(model_name) mapping_instance = create_model(fn_name=fn_name, param_name=param_name) def train_model(config, reporter): ''' This function is used by Tune to train the model with each iteration variations. Args: config(dict): A dictionary with the search params passed by Tune. Similar to the JSON we already have. reporter: A function used by Tune to keep a track of the metric by which the iterations should be optimized. ''' model = mapping_instance.__call__(x_train=x_train, params=config) model.fit(x_train, y_pred) last_checkpoint = "weights_tune_{}.h5".format(config) model.save_weights(last_checkpoint) accuracy = model.evaluate(x_train, y_pred)[1] reporter(mean_accuracy=accuracy, checkpoint=last_checkpoint) # Define experiment configuration configuration = tune.Experiment("experiment_name", run=train_model, resources_per_trial={"cpu": 4}, stop={"mean_accuracy": 95}, config=kwargs['params']) # This validation is to check if the user has opted for hyperopt search method if kwargs['space']: print('hyperopt choosen-------') space = kwargs['space'] hyperopt_search = HyperOptSearch(space, reward_attr="mean_accuracy") # TODO # Should this wrapper be avoided(instead the user passes the HyperOptSearch). # Add other args for hyperopt search. # Add the remaining search_algos if necessary. trials = tune.run_experiments(configuration, search_alg=hyperopt_search, verbose=2) else: trials = tune.run_experiments(configuration, verbose=2) metric = "mean_accuracy" """Restore a model from the best trial.""" sorted_trials = get_sorted_trials(trials, metric) for best_trial in sorted_trials: try: print("Creating model...") best_model = mapping_instance.__call__( x_train=x_train, params=best_trial.config) # TODO Pass config as argument # best_model = make_model(None) weights = os.path.join(best_trial.logdir, best_trial.last_result["checkpoint"]) print("Loading from", weights) best_model.load_weights( weights) # TODO Validate this loaded model. break except Exception as e: print(e) print("Loading failed. Trying next model") return best_model
# stop condition for VOT and OTB if args.dataset.startswith('VOT'): stop = { "EAO": 0.50, # if EAO >= 0.50, this procedures will stop # "timesteps_total": 100, # iteration times } tune_spec['zp_tune']['stop'] = stop scheduler = AsyncHyperBandScheduler( # time_attr="timesteps_total", reward_attr="EAO", max_t=400, grace_period=20) algo = HyperOptSearch( params, max_concurrent=args.gpu_nums * 2 + 1, reward_attr="EAO") # max_concurrent: the max running task elif args.dataset.startswith('OTB'): stop = { # "timesteps_total": 100, # iteration times "AUC": 0.80 } tune_spec['zp_tune']['stop'] = stop scheduler = AsyncHyperBandScheduler( # time_attr="timesteps_total", reward_attr="AUC", max_t=400, grace_period=20) algo = HyperOptSearch(params, max_concurrent=args.gpu_nums * 2 + 1,
if __name__ == "__main__": from hyperopt import hp tune.register_trainable("my_class", memLstm) ray.init(redis_address="192.168.1.153:9023") space = { 'learning_rate': hp.uniform('learning_rate', 0.0005, 0.0001), 'memn2n_rnn_dim': hp.uniform('memn2n_rnn_dim', 128, 257), 'hops': hp.choice('hops', [3, 4, 5, 6]), 'amp': hp.choice('amp', [1, 2, 3, 4, 5]) } config = { 'my_exp': { 'run': memLstm, 'trial_resources': { 'gpu': 1 }, 'stop': { "training_iteration": 5 }, 'num_samples': 8 } } algo = HyperOptSearch(space, max_concurrent=4, reward_attr="mean_accuracy") scheduler = AsyncHyperBandScheduler(reward_attr="mean_accuracy") run_experiments(config, search_alg=algo, scheduler=scheduler)
register_trainable("exp", create_model) # Hyperparameter space space = { #'_features':hp.choice('_features',allowed_indices) #'_layers' : hp.choice('_layers',[1,2]), '_l1nn': hp.randint('_l1nn', 9), '_l2nn': hp.randint('_l2nn', 9), '_act': hp.choice('_act', ['relu', 'tanh']), '_lr': hp.uniform('_lr', 0.001, 0.05) } config = {"my_exp": {"run": "exp", "num_samples": 1000}} start = time.time() algo = HyperOptSearch(space, max_concurrent=10, reward_attr="mean_acc") scheduler = AsyncHyperBandScheduler(reward_attr="mean_acc") train_results = run_experiments(config, search_alg=algo, scheduler=scheduler) end = time.time() results = [ vvv['mean_acc'] for i, vvv in enumerate(item.last_result for item in train_results) ] configs = [vvv for i, vvv in enumerate(item.config for item in train_results)] rdf = pd.DataFrame(results) cdf = pd.DataFrame(configs) cdf['_acc'] = rdf
reward_attr="mean_accuracy", max_t=200, grace_period=15) space = { 'L2': hp.loguniform('L2', -2.3 * 5, -2.3 * 9), 'mixing_ratio': hp.uniform('mixing_ratio', 0.9, 1) } exp = { 'run': "my_class", 'num_samples': 50, 'stop': { "training_iteration": 200 }, 'trial_resources': { "gpu": 1, "cpu": 1 }, 'config': { "L2": lambda spec: 10**(3 * random.random() - 8), "mixing_ratio": lambda spec: random.random() } } algo = HyperOptSearch(space, reward_attr="mean_accuracy") tune.run_experiments({"GRU_teach": exp}, search_alg=algo, scheduler=hyperband) print("Finished with the simulations")
def main(args): ray.init(num_cpus=args.rayNumCpu, num_gpus=args.rayNumGpu) t_loader, v_loader = get_loaders(train_batch_size=16, num_workers=1, data_folder=args.dataFolder, cuda_available=torch.cuda.is_available()) pinned_obj_dict['data_loader_train'] = pin_in_object_store(t_loader) pinned_obj_dict['data_loader_valid'] = pin_in_object_store(v_loader) pinned_obj_dict['args'] = pin_in_object_store(args) trainable_name = 'hyp_search_train' register_trainable(trainable_name, TrainerClass) reward_attr = "acc" ############################# # Define hyperband scheduler ############################# hpb = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr=reward_attr, grace_period=40, max_t=300) ############################## # Define hyperopt search algo ############################## space = { 'lr': hp.uniform('lr', 0.001, 0.1), 'optimizer': hp.choice("optimizer", ['SGD', 'Adam' ]), #, 'Adadelta']), # Adadelta gets the worst results 'batch_accumulation': hp.choice("batch_accumulation", [4, 8, 16]) } hos = HyperOptSearch(space, max_concurrent=4, reward_attr=reward_attr) ##################### # Define experiments ##################### exp_name = "resnet152_hyp_search_hyperband_hyperopt_{}".format( time.strftime("%Y-%m-%d_%H.%M.%S")) exp = Experiment( name=exp_name, run=trainable_name, num_samples=args.numSamples, # the number of experiments resources_per_trial={ "cpu": args.trialNumCpu, "gpu": args.trialNumGpu }, checkpoint_freq=args.checkpointFreq, checkpoint_at_end=True, stop={ reward_attr: 0.95, "training_iteration": args. trainingIteration, # how many times a specific config will be trained }) ################## # Run tensorboard ################## if args.runTensorBoard: thread = threading.Thread(target=launch_tensorboard, args=[exp_name]) thread.start() launch_tensorboard(exp_name) ################## # Run experiments ################## run_experiments(exp, search_alg=hos, scheduler=hpb, verbose=False)