def fft_experiment_block(trainable, size, ntrials, nsteps, nepochsvalid, result_dir, nthreads, smoke_test): config = { 'target_matrix': named_target_matrix('dft', size), 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid, 'complex': True, } experiment = RayExperiment( name=f'Fft_factorization_{trainable.__name__}_{size}', run=trainable, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': nthreads, 'gpu': 0 }, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def vandermonde_experiment_complex(fixed_order, softmax_fn, size, ntrials, nsteps, result_dir, nthreads, smoke_test): assert softmax_fn in ['softmax', 'sparsemax'] config={ 'fixed_order': fixed_order, 'softmax_fn': softmax_fn, 'size': size, 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'perm': sample_from(lambda spec: random.choice(['id', 'br', 'dct'])), 'n_steps_per_epoch': nsteps, } if (not fixed_order) and softmax_fn == 'softmax': config['semantic_loss_weight'] = sample_from(lambda spec: math.exp(random.uniform(math.log(5e-3), math.log(5e-1)))) experiment = RayExperiment( name=f'VandermondeEval_factorization_complex_{fixed_order}_{softmax_fn}_{size}', run=TrainableVandermondeComplex, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={'cpu': nthreads, 'gpu': 0}, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def ops_experiment(size, ntrials, nsteps, result_dir, nthreads, smoke_test): config = { 'size': size, 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': nsteps, } experiment = RayExperiment( name=f'Ops_factorization_{size}', run=TrainableOps, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': nthreads, 'gpu': 0 }, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def fft_factorization_sparsemax_perm_front(argv): parser = argparse.ArgumentParser(description='Learn to factor Fft matrix') parser.add_argument('--size', type=int, default=8, help='Size of matrix to factor, must be power of 2') parser.add_argument('--ntrials', type=int, default=20, help='Number of trials for hyperparameter tuning') parser.add_argument('--nsteps', type=int, default=200, help='Number of steps per epoch') parser.add_argument('--nmaxepochs', type=int, default=200, help='Maximum number of epochs') parser.add_argument('--result-dir', type=str, default='./results', help='Directory to store results') parser.add_argument('--nthreads', type=int, default=1, help='Number of CPU threads per job') parser.add_argument('--smoke-test', action='store_true', help='Finish quickly for testing') args = parser.parse_args(argv) experiment = RayExperiment( name=f'Fft_factorization_sparsemax_perm_front_{args.size}', run=TrainableFftFactorSparsemaxPermFront, local_dir=args.result_dir, num_samples=args.ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': args.nthreads, 'gpu': 0 }, stop={ 'training_iteration': 1 if args.smoke_test else 99999, 'negative_loss': -1e-8 }, config={ 'size': args.size, 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': args.nsteps, }, ) return experiment, args
def distillation_experiment(model_args, objective, optimizer, ntrials, result_dir, cuda, smoke_test, teacher_model, teacher_model_path, input_cov_path, dataset, min_lr, max_lr, momentum, nsteps, nepochsvalid): # config={'objective': objective, 'optimizer': optimizer, 'lr': 0.001, 'seed': 42, 'device': 'cuda', 'model_args': dict(model_args), 'teacher_model': teacher_model, 'teacher_model_path': teacher_model_path, 'input_cov_path': input_cov_path, 'dataset': dataset, 'momentum': momentum, 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid,} assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported' config = { 'objective': objective, 'optimizer': optimizer, 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(min_lr), math.log(max_lr)) if optimizer == 'Adam' else random.uniform(math.log(min_lr), math.log(max_lr)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'device': 'cuda' if cuda else 'cpu', 'model_args': dict(model_args ), # Need to copy @encoder as sacred created a read-only dict 'teacher_model': teacher_model, 'teacher_model_path': teacher_model_path, 'input_cov_path': input_cov_path, 'dataset': dataset, 'momentum': momentum, 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid, } model_args_print = '_'.join( [f'{key}_{value}' for key, value in model_args.items()]) experiment = RayExperiment( name=f'{teacher_model}_{objective}_{model_args_print}_{optimizer}', run=TrainableDistillCovModel, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=-1, resources_per_trial={ 'cpu': 2, 'gpu': 0.5 if cuda else 0 }, stop={"training_iteration": 1 if smoke_test else 9999}, config=config, ) return experiment
def transform_experiment(model, target, size, complex, param, lr_min, lr_max, ntrials, nsteps, nepochsvalid, result_dir, cuda, nthreads, smoke_test, b): # assert model in ['B', 'BP', 'PBT', 'BPP', 'BPBP', 'BBT', 'BBB'], f'Model {model} not implemented' config = { 'model': model, 'target_matrix': target, 'size': size, 'complex': complex, # 'share_logit': sample_from(lambda spec: np.random.choice((True, False), size=2)), 'share_logit': True, 'bfargs': b, 'param': param, # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))), 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(lr_min), math.log(lr_max)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid, 'device': 'cuda' if cuda else 'cpu', } b_args = '_'.join([k + ':' + str(v) for k, v in b.items()]) commit_id = subprocess.check_output( ['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8') experiment = RayExperiment( # name=f'{commit_id}_{target}_factorization_{model}_{complex}_{size}_{param}', name=f'{size}_{target}_{model}_{b_args}_c{complex}_{commit_id}', run=TrainableBP, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': nthreads, 'gpu': 0.25 if cuda else 0 }, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def distillation_experiment(model, model_args, optimizer, ntrials, result_dir, train_dir, workers, cuda, smoke_test, teacher_model, dataset, min_lr, max_lr, momentum, pretrained): assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported' config = { 'optimizer': optimizer, 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(min_lr), math.log(max_lr)) if optimizer == 'Adam' else random.uniform(math.log(min_lr), math.log(max_lr)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'device': 'cuda' if cuda else 'cpu', 'model': { 'name': model, 'args': model_args }, 'teacher_model': teacher_model, 'train_dir': train_dir, 'workers': workers, 'dataset': dataset, 'momentum': momentum, 'pretrained': pretrained } model_args_print = '_'.join( [f'{key}_{value}' for key, value in model_args.items()]) experiment = RayExperiment( name=f'{model}_{model_args_print}_{optimizer}', run=TrainableModel, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=-1, resources_per_trial={ 'cpu': 4, 'gpu': 1 if cuda else 0 }, stop={"training_iteration": 1 if smoke_test else 9999}, config=config, ) return experiment
def dynamic_conv_experiment(model, model_args, encoder, decoder, structure_lr_multiplier, nmaxupdates, ntrials, result_dir, cuda, smoke_test): # name=f"{model}_{model_args}_encoder_[{'-'.join(encoder)}]_decoder_[{'-'.join(decoder)}]_structlr_{structure_lr_multiplier}" name = f"{model}_{model_args}_encoder_[{'-'.join(encoder)}]_decoder_[{'-'.join(decoder)}]_structlr_grid" config = { # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(1e-3)))), # 'lr': grid_search([5e-4, 7e-4, 9e-4, 11e-4]), # 'lr': grid_search([1e-4, 2.5e-4, 5e-4, 7.5e-4]), 'lr': 5e-4, # 'weight_decay': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-6), math.log(5e-4)))) if model == 'DynamicConv' else 1e-4, 'weight_decay': 1e-4, # Transformer seems to need dropout 0.3 # 'dropout': sample_from(lambda spec: random.uniform(0.1, 0.3)) if model == 'DynamicConv' else 0.3, 'dropout': 0.3, 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'encoder': list(encoder ), # Need to copy @encoder as sacred created a read-only list 'decoder': list(decoder), # 'structure-lr-multiplier': structure_lr_multiplier, 'structure-lr-multiplier': grid_search([0.25, 0.5, 1.0, 2.0, 4.0]), 'device': 'cuda' if cuda else 'cpu', 'model': { 'name': model, 'args': model_args }, 'nmaxupdates': nmaxupdates, 'result_dir': result_dir + '/' + name } experiment = RayExperiment( name=name, run=TrainableModel, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=False, checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=-1, resources_per_trial={ 'cpu': 2, 'gpu': 1 if cuda else 0 }, stop={"training_iteration": 1}, config=config, ) return experiment
def transform_experiment(model, target, size, complex, ntrials, nsteps, nepochsvalid, result_dir, cuda, nthreads, smoke_test): assert model in ['B', 'BP', 'PBT', 'BPP', 'BPBP'], f'Model {model} not implemented' config = { 'model': model, 'target_matrix': target, 'size': size, 'complex': complex, 'share_logit': sample_from(lambda spec: np.random.choice((True, False), size=2)), 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid, 'device': 'cuda' if cuda else 'cpu', } experiment = RayExperiment( name=f'{target}_factorization_{model}_{complex}_{size}', run=TrainableBP, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': nthreads, 'gpu': 0.25 if cuda else 0 }, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def cifar10_experiment(dataset, model, args, optimizer, use_hyperband, lr, lr_decay, weight_decay, ntrials, nmaxepochs, batch, resume_pth, result_dir, cuda, smoke_test): assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported' if lr_decay is None: lr_decay = {'factor': 1.0, 'period': 1000, 'milestones': None} config={ 'optimizer': optimizer, 'switch_ams': int(0.5 * nmaxepochs) if optimizer == 'Adam' else None, 'lr': grid_search(lr['grid']) if lr['grid'] is not None else sample_from(lambda spec: math.exp(random.uniform(math.log(lr['min']), math.log(lr['max'])))), # 'lr_decay_factor': 0.2 if lr_decay else 1.0, # 'lr_decay_period': lr_decay_period if lr_decay else 10000, # 'decay_milestones': decay_milestones, 'lr_decay' : lr_decay, 'weight_decay': 5e-4 if weight_decay else 0.0, 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'device': 'cuda' if cuda else 'cpu', 'model': {'name': model, 'args': args}, 'dataset': {'name': dataset, 'batch': batch}, } smoke_str = 'smoke_' if smoke_test else '' # for easy finding and deleting unimportant logs args_str = '_'.join([k+':'+str(v) for k,v in args.items()]) timestamp = datetime.datetime.now().replace(microsecond=0).isoformat() commit_id = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8') experiment = RayExperiment( name=f'{smoke_str}{dataset.lower()}_{model}_{args_str}_{optimizer}_epochs_{nmaxepochs}_{timestamp}_{commit_id}', run=TrainableModel, local_dir=result_dir, num_samples=ntrials if not smoke_test else 1, checkpoint_at_end=True, checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=0, resources_per_trial={'cpu': 4, 'gpu': 1 if cuda else 0}, stop={"training_iteration": 1 if smoke_test else nmaxepochs}, restore=resume_pth, config=config, ) return experiment
def cifar10_experiment( dataset, model, args, optimizer, nmaxepochs, lr_decay, lr_decay_period, plr_min, plr_max, weight_decay, pwd, pwd_min, pwd_max, ntrials, result_dir, cuda, smoke_test, unsupervised, batch, tv_norm, tv_p, tv_sym, temp_min, temp_max, anneal_ent_min, anneal_ent_max, anneal_sqrt, entropy_p, restore_perm, resume_pth): # TODO clean up and set min,max to pairs/dicts assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported' assert restore_perm is None or resume_pth is None # If we're fully resuming training from the checkpoint, no point in restoring any part of the model if restore_perm is not None: restore_perm = '/dfs/scratch1/albertgu/learning-circuits/cnn/saved_perms/' + restore_perm print("Restoring permutation from", restore_perm) args_rand = args.copy() if temp_min is not None and temp_max is not None: args_rand['temp'] = sample_from(lambda spec: math.exp( random.uniform(math.log(temp_min), math.log(temp_max)))) # args_rand['samples'] = sample_from(lambda _: np.random.choice((8,16))) # args_rand['sig'] = sample_from(lambda _: np.random.choice(('BT1', 'BT4'))) tv = {'norm': tv_norm, 'p': tv_p} if tv_sym is 'true': tv['sym'] = sample_from(lambda _: np.random.choice((True, ))) elif tv_sym is 'false': tv['sym'] = sample_from(lambda _: np.random.choice((False, ))) elif tv_sym is 'random': tv['sym'] = sample_from(lambda _: np.random.choice((True, False))) else: assert tv_sym is None, 'tv_sym must be true, false, or random' tv['sym'] = False if anneal_ent_max == 0.0: anneal_entropy = 0.0 else: anneal_entropy = sample_from(lambda _: math.exp( random.uniform(math.log(anneal_ent_min), math.log(anneal_ent_max))) ), name_smoke_test = 'smoke_' if smoke_test else '' # for easy finding and deleting unimportant logs name_args = '_'.join([k + ':' + str(v) for k, v in args.items()]) config = { 'optimizer': optimizer, # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(2e-5), math.log(1e-2)) if optimizer == 'Adam' 'lr': 2e-4 if optimizer == 'Adam' else math.exp( random.uniform(math.log(0.025), math.log(0.2))), 'plr': sample_from(lambda spec: math.exp( random.uniform(math.log(plr_min), math.log(plr_max)))), # 'lr_decay_factor': sample_from(lambda spec: random.choice([0.1, 0.2])) if lr_decay else 1.0, 'lr_decay_factor': 0.2 if lr_decay else 1.0, 'lr_decay_period': lr_decay_period, # 'weight_decay': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-6), math.log(5e-4)))) if weight_decay else 0.0, 'weight_decay': 5e-4 if weight_decay else 0.0, 'pwd': sample_from(lambda spec: math.exp( random.uniform(math.log(pwd_min), math.log(pwd_max)))) if pwd else 0.0, 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'device': 'cuda' if cuda else 'cpu', 'model': { 'name': model, 'args': args_rand }, # 'model': {'name': model, 'args': args.update({'temp': sample_from(lambda spec: math.exp(random.uniform(math.log(temp_min), math.log(temp_max))))})}, 'dataset': { 'name': dataset, 'batch': batch }, 'unsupervised': unsupervised, # 'tv': {'norm': tv_norm, 'p': tv_p, 'sym': tv_sym}, # 'tv': {'norm': tv_norm, 'p': tv_p, 'sym': sample_from(lambda _: np.random.choice((True,False)))}, 'tv': tv if unsupervised else None, # 'anneal_entropy': anneal_entropy, # 'anneal_entropy': sample_from(lambda _: random.uniform(anneal_ent_min, anneal_ent_max)), 'anneal_entropy': 0.0 if anneal_ent_max == 0.0 else sample_from(lambda _: math.exp( random.uniform(math.log(anneal_ent_min), math.log(anneal_ent_max))) ), 'anneal_sqrt': anneal_sqrt, 'entropy_p': entropy_p, 'restore_perm': restore_perm, } timestamp = datetime.datetime.now().replace(microsecond=0).isoformat() commit_id = subprocess.check_output( ['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8') stopping_criteria = {"training_iteration": 1 if smoke_test else nmaxepochs} if unsupervised: # TODO group all the unsupervised casework together stopping_criteria.update({'model_ent': 200, 'neg_ent': -5.0}) experiment = RayExperiment( # name=f'pcifar10_{model}_{args}_{optimizer}_lr_decay_{lr_decay}_weight_decay_{weight_decay}', name= f'{name_smoke_test}{dataset.lower()}_{model}_{name_args}_{optimizer}_epochs_{nmaxepochs}_plr_{plr_min}-{plr_max}_{timestamp}_{commit_id}', # name=f'{dataset.lower()}_{model}_{args_orig}_{optimizer}_epochs_{nmaxepochs}_lr_decay_{lr_decay}_plr_{plr_min}-{plr_max}_tvsym_{tv_sym}_{timestamp}_{commit_id}', run=TrainableModel, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, checkpoint_freq=500, # Just to enable recovery with @max_failures max_failures=0, # resources_per_trial={'cpu': 4, 'gpu': 0.5 if cuda else 0}, resources_per_trial={ 'cpu': 4, 'gpu': 1 if cuda else 0 }, # stop={"training_iteration": 1 if smoke_test else nmaxepochs, 'model_ent': 200, 'neg_ent': -5.0}, stop=stopping_criteria, # stop={"training_iteration": 1 if smoke_test else nmaxepochs}, restore=resume_pth, config=config, ) return experiment