def hyperopt(source_dir, only, target_imgs, target_annotations, eval_imgs, eval_annotations, masks, batch_size, dataloader_workers, epochs, samples, name, load_gan, load, load_algo, out_dir): ''' Optimize DIHE hyperparameters. ''' config = { 'batchnorm': tune.choice([True, False]), 'multiplier': tune.uniform(0.5, 0.99999), 'enc_lr': tune.uniform(1e-9, 1e-3), } algo = HyperOptSearch() if load_algo is not None: algo.restore(load_algo) scheduler = ASHAScheduler(max_t = epochs) result = tune.run( partial(hyperopt.dihe, source_dir=source_dir, target_imgs=target_imgs, target_annotations=target_annotations, eval_imgs=eval_imgs, eval_annotations=eval_annotations, load_gan=load_gan, masks=masks, source_only=only, target_skip=SKU110K_SKIP, eval_only=GP_TEST_VALIDATION_SET_SIZE, batch_size=batch_size, dataloader_workers=dataloader_workers, epochs=epochs), name=name, metric='accuracy', mode='max', resources_per_trial={'gpu': 1, 'cpu': dataloader_workers + 1}, config=config, num_samples=samples, scheduler=scheduler, search_alg=algo, resume=load, ) algo.save(os.path.join(out_dir, f'{name}_search.pkl')) df = result.results_df for batchnorm in (True, False): matching = df[df['config.batchnorm'] == batchnorm] print(f'Best with batchnorm={batchnorm}: {matching.loc[matching["accuracy"].idxmax()]}') print()
def tune_training(config): global global_dict from hyper_tune import TuneTrainable import ray from ray import tune from ray.tune.schedulers import HyperBandScheduler, HyperBandForBOHB, AsyncHyperBandScheduler, FIFOScheduler import ray.tune.suggest as suggest from ray.tune import sample_from, Experiment import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH from ray.tune.suggest.hyperopt import HyperOptSearch from hyperopt import hp ray.init() stop_dict = { 'num_examples': config['tune']['max_t'], 'no_change_in_accu': 2 } if config['tune']['tuning_method'] == 'grid': config['training']['dataset_size'] = tune.grid_search( [0.2, 0.4, 0.6, 0.8]) tune.run(TuneTrainable, config=config, num_samples=config['tune']['n_samples'], name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) elif config['tune']['tuning_method'] == 'bohb': config_space = CS.ConfigurationSpace(seed=42) # replace | convention is a kludge because of BOHB's specialized interface config_space.add_hyperparameters([ CSH.UniformIntegerHyperparameter('replace|num_layers', lower=1, upper=5), CSH.UniformIntegerHyperparameter('replace|hidden_size', lower=64, upper=512), CSH.UniformIntegerHyperparameter('replace|embedding_size', lower=64, upper=512), CSH.UniformFloatHyperparameter('replace|dropout', lower=0.0, upper=0.5), CSH.CategoricalHyperparameter('replace|bidirectional', choices=[True, False]), CSH.UniformFloatHyperparameter('replace|lr', lower=0.00001, upper=0.1, log=True) ]) bohb_hyperband = HyperBandForBOHB( time_attr='num_examples', metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode'], max_t=config['tune']['max_t']) bohb_search = suggest.bohb.TuneBOHB( config_space, max_concurrent=1, metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode']) return tune.run( TuneTrainable, config=config, scheduler=bohb_hyperband, search_alg=bohb_search, num_samples=1, name=config['experiment_name'], resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir']) elif config['tune']['tuning_method'] == 'hyperopt': def get_hyperopt_space(config): if config['model'] == 'simple_lstm': return { "allocate|hidden_size": hp.quniform("hidden_size", 32, 700, 2), "allocate|embedding_size": hp.quniform("embedding_size", 32, 700, 2), "allocate|bidirectional": hp.choice("bidirectional", [True, False]), "allocate|num_layers": hp.quniform("num_layers", 1, 5, 1), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]) } elif config['model'] == 'awd_rnn': return { "allocate|hidden_size": hp.quniform("hidden_size", 32, 1024, 4), "allocate|embedding_size": hp.quniform("embedding_size", 32, 1024, 4), "allocate|num_layers": hp.quniform("num_layers", 1, 6, 1), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]), "allocate|dropouto": hp.normal("dropouto", 0.3, 0.2), "allocate|dropouth": hp.normal("dropouth", 0.3, 0.2), "allocate|dropouti": hp.normal("dropouti", 0.3, 0.2), "allocate|dropoute": hp.normal("dropoute", 0.0, 0.13), # "allocate|wdrop": hp.normal("wdrop", 0.0, 0.1), "allocate|ar_alpha": hp.normal("ar_alpha", 2, 3), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } elif config['model'] == 'vdcnn': return { "allocate|embedding_size": hp.quniform("embedding_size", 32, 1024, 4), "allocate|dropout": hp.normal("dropout", 0.3, 0.2), "allocate|apply_shortcut": hp.choice("apply_shortcut", [True, False]), "allocate|k": hp.normal("k", 8, 2), "allocate|dense_nlayers": hp.normal("dense_nlayers", 3, 1), "allocate|dense_nfeatures": hp.normal("dense_nfeatures", 2048, 900), "allocate|conv1_nblocks": hp.uniform("conv1_nblocks", 0, 10), "allocate|conv2_nblocks": hp.uniform("conv2_nblocks", 0, 10), "allocate|conv3_nblocks": hp.uniform("conv3_nblocks", 0, 5), "allocate|conv4_nblocks": hp.uniform("conv4_nblocks", 0, 5), "allocate|conv0_nfmaps": hp.normal("conv0_nfmaps", 64, 20), "allocate|conv1_nfmaps": hp.normal("conv1_nfmaps", 64, 20), "allocate|conv2_nfmaps": hp.normal("conv2_nfmaps", 128, 30), "allocate|conv3_nfmaps": hp.normal("conv3_nfmaps", 256, 50), "allocate|conv4_nfmaps": hp.normal("conv4_nfmaps", 512, 100), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } elif config['model'] == 'bert': return { "allocate|hidden_dropout": hp.normal("hidden_dropout", 0.0, 0.2), "allocate|att_dropout": hp.normal("att_dropout", 0.0, 0.2), "allocate|hidden_size": hp.quniform("hidden_size", 32, 1024, 4), "allocate|n_bert_layers": hp.uniform("n_bert_layers", 1, 8), "allocate|n_att_heads": hp.uniform("n_att_heads", 1, 8), "allocate|intermediate_dense_size": hp.quniform("intermediate_dense_size", 32, 1024, 4), "allocate|penalize_all_steps": hp.choice("penalize_all_steps", [True, False]), "allocate|weight_decay": hp.lognormal("weight_decay", -13, 5), "allocate|lr": hp.lognormal('lr', -6, 1), "nested|tokens_config": hp.choice('tokens_config', [{ 'allocate|tokenizer': 'standard_tokenizer', 'nested|tokenization_method': hp.choice('tokenization_method', [{ 'allocate|tokenization': 'char' }, { 'allocate|tokenization': 'word', 'allocate|per_class_vocab_size': hp.uniform('per_class_vocab_size', 1000, 10000) }]) }, { 'allocate|tokenizer': 'youtokentome', 'allocate|vocab_size': hp.uniform('vocab_size', 50, 50000) }]) } else: raise NotImplementedError() class HyperOptFIFO(FIFOScheduler): def on_trial_complete(self, trial_runner, trial, result): algo.save(hyper_opt_checkpoint_dir) print( f'{Fore.GREEN} Checkpointing hyperopt ...{Style.RESET_ALL}' ) return super().on_trial_complete(trial_runner, trial, result) space = get_hyperopt_space(config) algo = HyperOptSearch( space, max_concurrent=1, metric=config['tune']['discriminating_metric'], mode=config['tune']['discriminating_metric_mode'], n_initial_points=7, random_state_seed=42) hyper_opt_checkpoint_dir = os.path.join(config['tune']['working_dir'], config['experiment_name'], 'hyperopt') if config['tune']['resume']: try: algo.restore(hyper_opt_checkpoint_dir) n_trials = len(algo._hpopt_trials) print( f"{Fore.GREEN}{n_trials} trials loaded. Warm starting ...{Style.RESET_ALL}" ) except: print( f'{Fore.RED}Unable to load trials. Cold starting ...{Style.RESET_ALL}' ) tune.run(TuneTrainable, config=config, search_alg=algo, num_samples=config['tune']['n_samples'], scheduler=HyperOptFIFO(), name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) elif config['tune']['tuning_method'] == 'no_search': tune.run(TuneTrainable, config=config, num_samples=config['tune']['n_samples'], name=config['experiment_name'], resume=False, checkpoint_at_end=False, resources_per_trial=config['tune']['resources_per_trial'], local_dir=config['tune']['working_dir'], stop=stop_dict) else: raise NotImplementedError()
resources = {"cpu": int(args.numCPU)} else: resources = {"cpu": 10, "gpu": int(args.numGPU)} #to-do: update points_to_evaluate for ray 1.3.0 hyperopt = HyperOptSearch( metric="val_loss", mode="min", #points_to_evaluate=initial_best_config3, n_initial_points=4) local_dir = "" if args.restorePath: path = args.restorePath + "/experiment/" + args.restoreFile print('Restore from ' + path) hyperopt.restore(path) local_dir = args.restorePath print("Training logs will be saved to " + local_dir) else: local_dir = args.rayResult hyperopt_limited = ConcurrencyLimiter(hyperopt, max_concurrent=args.max_concurrent) trainable = DistributedTrainableCreator(training_initialization(), num_slots=int(args.numGPU), use_gpu=True) analysis = tune.run( trainable, #resources_per_trial=resources,
def hyperopt(imgs, annotations, eval_annotations, name, batch_size, dataloader_workers, epochs, samples, load, load_algo, out_dir): ''' Optimize GLN hyperparameters. ''' config = { 'tanh': tune.choice([True, False]), 'multiplier': tune.uniform(0.8, 0.99999), 'scale_class': tune.uniform(0.1, 10), 'scale_gaussian': tune.uniform(0.1, 100), 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': tune.uniform(0, 1), } initial_configs = [ { 'tanh': True, 'multiplier': 0.99, 'scale_class': 1, 'scale_gaussian': 1, 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': 0.1, }, { 'tanh': False, 'multiplier': 0.99, 'scale_class': 1, 'scale_gaussian': 1, 'gauss_loss_neg_thresh': 0, 'gauss_loss_pos_thresh': 0.1, }, ] algo = HyperOptSearch(points_to_evaluate=initial_configs if not load and load_algo is None else None) if load_algo is not None: algo.restore(load_algo) scheduler = ASHAScheduler(max_t=epochs, grace_period=2) result = tune.run( partial(hyperopt.gln, imgs=imgs, annotations=annotations, eval_annotations=eval_annotations, skip=SKU110K_SKIP, batch_size=batch_size, dataloader_workers=dataloader_workers, epochs=epochs), name=name, metric='average_precision', mode='max', resources_per_trial={ 'gpu': 1, 'cpu': dataloader_workers + 1 }, max_failures= 2, # Single-GPU training of GLN is prone to exploding gradients raise_on_failed_trial=False, config=config, num_samples=samples, scheduler=scheduler, search_alg=algo, resume=load, ) algo.save(os.path.join(out_dir, f'{name}_search.pkl')) df = result.results_df for tanh in (True, False): matching = df[df['config.tanh'] == tanh] print( f'Best with tanh={tanh}: {matching.loc[matching["average_precision"].idxmax()]}' ) print()
t._train() exit() ray.shutdown() ray.init(local_mode=True, dashboard_host="0.0.0.0", num_cpus=2 ) # num_cpus limited the cpu assign to ray, default will use all algo = HyperOptSearch( hp_space, metric="acc", mode="max", n_initial_points=5, max_concurrent=1 ) # make the expierment will add new trial one after one hyperopt_h = os.getenv( "HOME" ) + "/dcase/dev/ray_results/2019_diff_net_report/Trainable_1_batch_size=32,feature_folder=logmel_delta2_128_44k,lr=0.0001,mini_batch_cnt=1,mixup_alpha=0,mixup_concat_ori=False,out_2020-10-28_11-18-35ekvaaay3/hyperopt.cp" print("hyperopt restor: ", hyperopt_h) algo.restore(hyperopt_h) import asc.train asc.train.set_hyperopt(algo) scheduler = AsyncHyperBandScheduler(metric="acc", mode="max", max_t=200) analysis = tune.run(exp, resources_per_trial={"gpu": 1}, search_alg=algo, scheduler=scheduler, num_samples=60, verbose=1, resume=False)
"pca_gens_n_comp": 0.7, "network": 0, "opt": 0 } ] hyperopt = HyperOptSearch( metric="valid_loss", mode="min", n_initial_points=NFOLDS*5, max_concurrent=1, # points_to_evaluate=current_best_params, space=space) hyperopt_cp = "./hyperopt.cp" if os.path.isfile(hyperopt_cp): print("Restore Hyperopt from checkpoint: ", hyperopt_cp) hyperopt.restore(hyperopt_cp) re_search_alg = Repeater(hyperopt, repeat=NFOLDS) from moa_utils.main import set_hyperopt set_hyperopt(hyperopt) ahb = AsyncHyperBandScheduler( time_attr="training_iteration", metric="valid_loss", mode="min", grace_period=5, max_t=100) tune.run(run_training, # config=config,