def __init__(self, objectives, dim, n_test_rays, alpha, internal_solver, **kwargs): self.objectives = objectives self.n_test_rays = n_test_rays self.alpha = alpha self.K = len(objectives) if len(dim) == 1: # tabular hnet = FCPHNHyper(dim, ray_hidden_dim=100) net = FCPHNTarget() elif len(dim) == 3: # image hnet: nn.Module = LeNetPHNHyper([9, 5], ray_hidden_dim=100) net: nn.Module = LeNetPHNTargetWrapper([9, 5]) else: raise ValueError(f"Unkown dim {dim}, expected len 1 or len 3") print("Number of parameters: {}".format(num_parameters(hnet))) self.model = hnet.cuda() self.net = net.cuda() if internal_solver == 'linear': self.solver = LinearScalarizationSolver(n_tasks=len(objectives)) elif internal_solver == 'epo': self.solver = EPOSolver(n_tasks=len(objectives), n_params=num_parameters(hnet))
def __init__(self, objectives, alpha, lamda, dim, n_test_rays, **kwargs): """ Instanciate the cosmos solver. Args: objectives: A list of objectives alpha: Dirichlet sampling parameter (list or float) lamda: Cosine similarity penalty dim: Dimensions of the data n_test_rays: The number of test rays used for evaluation. """ self.objectives = objectives self.K = len(objectives) self.alpha = alpha self.n_test_rays = n_test_rays self.lamda = lamda dim = list(dim) dim[0] = dim[0] + self.K model = model_from_dataset(method='cosmos', dim=dim, **kwargs) self.model = Upsampler(self.K, model, dim).cuda() self.n_params = num_parameters(self.model) print("Number of parameters: {}".format(self.n_params))
def main(settings): print("start processig with settings", settings) utils.set_seed(settings['seed']) global elapsed_time # create the experiment folders logdir = os.path.join(settings['logdir'], settings['method'], settings['dataset'], utils.get_runname(settings)) pathlib.Path(logdir).mkdir(parents=True, exist_ok=True) # prepare train_set = utils.dataset_from_name(split='train', **settings) val_set = utils.dataset_from_name(split='val', **settings) test_set = utils.dataset_from_name(split='test', **settings) train_loader = data.DataLoader(train_set, settings['batch_size'], shuffle=True, num_workers=settings['num_workers']) val_loader = data.DataLoader(val_set, settings['batch_size'], shuffle=True, num_workers=settings['num_workers']) test_loader = data.DataLoader(test_set, settings['batch_size'], settings['num_workers']) objectives = from_name(settings.pop('objectives'), train_set.task_names()) scores = from_objectives(objectives) rm1 = utils.RunningMean(400) rm2 = utils.RunningMean(400) method = method_from_name(objectives=objectives, **settings) train_results = dict(settings=settings, num_parameters=utils.num_parameters( method.model_params())) val_results = dict(settings=settings, num_parameters=utils.num_parameters( method.model_params())) test_results = dict(settings=settings, num_parameters=utils.num_parameters( method.model_params())) with open(pathlib.Path(logdir) / "settings.json", "w") as file: json.dump(train_results, file) # main for j in range(settings['num_starts']): train_results[f"start_{j}"] = {} val_results[f"start_{j}"] = {} test_results[f"start_{j}"] = {} optimizer = torch.optim.Adam(method.model_params(), settings['lr']) if settings['use_scheduler']: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, settings['scheduler_milestones'], gamma=settings['scheduler_gamma']) for e in range(settings['epochs']): print(f"Epoch {e}") tick = time.time() method.new_epoch(e) for b, batch in enumerate(train_loader): batch = utils.dict_to_cuda(batch) optimizer.zero_grad() stats = method.step(batch) optimizer.step() loss, sim = stats if isinstance(stats, tuple) else (stats, 0) print( "Epoch {:03d}, batch {:03d}, train_loss {:.4f}, sim {:.4f}, rm train_loss {:.3f}, rm sim {:.3f}" .format(e, b, loss, sim, rm1(loss), rm2(sim))) tock = time.time() elapsed_time += (tock - tick) if settings['use_scheduler']: val_results[f"start_{j}"][f"epoch_{e}"] = { 'lr': scheduler.get_last_lr()[0] } scheduler.step() # run eval on train set (mainly for debugging) if settings['train_eval_every'] > 0 and ( e + 1) % settings['train_eval_every'] == 0: train_results = evaluate( j, e, method, scores, train_loader, logdir, reference_point=settings['reference_point'], split='train', result_dict=train_results) if settings['eval_every'] > 0 and ( e + 1) % settings['eval_every'] == 0: # Validation results val_results = evaluate( j, e, method, scores, val_loader, logdir, reference_point=settings['reference_point'], split='val', result_dict=val_results) # Test results test_results = evaluate( j, e, method, scores, test_loader, logdir, reference_point=settings['reference_point'], split='test', result_dict=test_results) # Checkpoints if settings['checkpoint_every'] > 0 and ( e + 1) % settings['checkpoint_every'] == 0: pathlib.Path(os.path.join(logdir, 'checkpoints')).mkdir(parents=True, exist_ok=True) torch.save( method.model.state_dict(), os.path.join(logdir, 'checkpoints', 'c_{}-{:03d}.pth'.format(j, e))) print("epoch_max={}, val_volume_max={}".format(epoch_max, volume_max)) pathlib.Path(os.path.join(logdir, 'checkpoints')).mkdir(parents=True, exist_ok=True) torch.save( method.model.state_dict(), os.path.join(logdir, 'checkpoints', 'c_{}-{:03d}.pth'.format(j, 999999))) return volume_max
batch_size =args.batch_size, size_max=args.size_max, num_workers=4, pin_memory=True, collate_fn=None) num_classes = utils.get_num_classes(args.dataset) imsize = next(iter(train_loader))[0].size()[1:] input_dim = imsize[0] * imsize[1] * imsize[2] model = models.classifiers.FCNHelper(num_layers=args.depth, input_dim=input_dim, num_classes=num_classes, width=args.width) num_parameters = utils.num_parameters(model) num_samples_train = size_train num_samples_test = size_test print('Number of parameters: {}'.format(num_parameters), file=logs) print('Number of training samples: {}'.format(num_samples_train), file=logs) print('Number of testing samples: {}'.format(size_test), file=logs) print('Image size:'.format(imsize), file=logs) print('Model: {}'.format(str(model)), file=logs) model.to(device) if 'model' in checkpoint.keys(): try: model.load_state_dict(checkpoint['model']) model.train() except RuntimeError as e:
def eval(settings): """ The full evaluation loop. Generate scores for all checkpoints found in the directory specified above. Uses the same ArgumentParser as main.py to determine the method and dataset. """ settings['batch_size'] = 2048 print("start evaluation with settings", settings) # create the experiment folders logdir = os.path.join(settings['logdir'], settings['method'], settings['dataset'], utils.get_runname(settings)) pathlib.Path(logdir).mkdir(parents=True, exist_ok=True) # prepare train_set = utils.dataset_from_name(split='train', **settings) val_set = utils.dataset_from_name(split='val', **settings) test_set = utils.dataset_from_name(split='test', **settings) train_loader = data.DataLoader(train_set, settings['batch_size'], shuffle=True, num_workers=settings['num_workers']) val_loader = data.DataLoader(val_set, settings['batch_size'], shuffle=True, num_workers=settings['num_workers']) test_loader = data.DataLoader(test_set, settings['batch_size'], settings['num_workers']) objectives = from_name(settings.pop('objectives'), val_set.task_names()) scores1 = from_objectives(objectives) scores2 = [mcr(o.label_name, o.logits_name) for o in objectives] solver = solver_from_name(objectives=objectives, **settings) train_results = dict(settings=settings, num_parameters=utils.num_parameters( solver.model_params())) val_results = dict(settings=settings, num_parameters=utils.num_parameters( solver.model_params())) test_results = dict(settings=settings, num_parameters=utils.num_parameters( solver.model_params())) task_ids = settings['task_ids'] if settings[ 'method'] == 'SingleTask' else [0] for j in task_ids: if settings['method'] == 'SingleTask': # we ran it in parallel checkpoints = pathlib.Path(CHECKPOINT_DIR).glob( f'**/*_{j:03d}/*/c_*.pth') else: checkpoints = pathlib.Path(CHECKPOINT_DIR).glob('**/c_*.pth') train_results[f"start_{j}"] = {} val_results[f"start_{j}"] = {} test_results[f"start_{j}"] = {} for c in sorted(checkpoints): #c = list(sorted(checkpoints))[-1] print("checkpoint", c) _, e = c.stem.replace('c_', '').split('-') j = int(j) e = int(e) solver.model.load_state_dict(torch.load(c)) # Validation results val_results = evaluate(j, e, solver, scores1, scores2, val_loader, logdir, reference_point=settings['reference_point'], split='val', result_dict=val_results) # Test results test_results = evaluate( j, e, solver, scores1, scores2, test_loader, logdir, reference_point=settings['reference_point'], split='test', result_dict=test_results)
# losses supported_losses = ["RaSGAN","WGAN-GP"] loss_name = config["train_config"]["loss_fun"] if config["train_config"]["loss_fun"] in supported_losses else "WGAN-GP" if loss_name == "RaSGAN": # RaSGAN loss_fun = RaSGANLoss() else: # WGAN-GP loss_fun = WGAN_GPLoss(discriminator) loss_weight = torch.ones(num_classes).to(device) loss_weight[SOS_TOKEN] = 0.0 #loss_weight[EOS_TOKEN] = 0.0 #loss_weight[UNK_TOKEN] = 0.0 loss_weight[PAD_TOKEN] = 0.0 pretrain_loss_fun = nn.NLLLoss(weight=loss_weight) np_g = num_parameters(generator) np_d = num_parameters(discriminator) print("Number of parameters for G: {}\nNumber of parameters for D: {}\nNumber of parameters in total: {}" .format(np_g,np_d,np_g+np_d)) def pretrain_generator(real_data,fake_data,optimizer): ''' Pretrain the generator to generate realistic samples for a good initialization ''' # Reset gradients optimizer.zero_grad() loss = 0 fake_data = torch.log(fake_data+1e-8) for i in range(fake_data.size(1)): loss += pretrain_loss_fun(fake_data[:,i,:],real_data[:,i]) loss /= fake_data.size(1)
generator = getattr(generators,config["model_config"]["generator"]["name"])(hidden_size=config["model_config"]["generator"]["hidden_size"], noise_size=noise_size,output_size=num_classes,vocab=vocab,SOS_TOKEN=SOS_TOKEN,beam_width=config["model_config"]["generator"]["beam_width"]).to(device) load_model(generator,summary_path) text_log = open(os.path.join(summary_path,"eval_log.txt"),"a") # losses loss_weight = torch.ones(num_classes).to(device) loss_weight[SOS_TOKEN] = 0.0 #loss_weight[EOS_TOKEN] = 0.0 #loss_weight[UNK_TOKEN] = 0.0 loss_weight[PAD_TOKEN] = 0.0 pretrain_loss_fun = nn.NLLLoss(weight=loss_weight) np_g = num_parameters(generator) text_log.write("Number of parameters for G: {}\n" .format(np_g)) max_temperature = torch.FloatTensor([config["train_config"]["max_temperature"]]).to(device) def nll_gen(real_data,fake_data): ''' Evaluate the generators ability to generate diverse samples ''' loss = 0 fake_data = torch.log(fake_data+1e-8) for i in range(fake_data.size(1)): loss += pretrain_loss_fun(fake_data[:,i,:],real_data[:,i]) loss /= fake_data.size(1) return loss
#classifier = models.classifiers.Linear(model, args.ndraw, args.keep_ratio).to(device) #classifier = models.classifiers.ClassifierFCN(model, num_tries=args.ndraw, Rs=args.remove, =args.depth_max).to(device) #if args.remove is not None: # remove = args.remove # the number of neurons #else: # fraction is not None fraction = 1 / args.fraction classifier = models.classifiers.AnnexVGG( model, F=fraction, idx_entry=args.entry_layer).to(device) if 'classifier' in checkpoint.keys(): classifier.load_state_dict(checkpoint['classifier']) classifier.to(device) num_parameters = utils.num_parameters(classifier, only_require_grad=False) num_parameters_trainable = utils.num_parameters(classifier, only_require_grad=True) num_layers = 1 num_samples_train = size_train num_samples_test = size_test print('Number of parameters: {}'.format(num_parameters), file=logs) print( 'Number of trainable parameters: {}'.format(num_parameters_trainable), file=logs) print('Number of training samples: {}'.format(num_samples_train), file=logs) print('Number of testing samples: {}'.format(size_test), file=logs) #print('Layer dimensions'.format(classifier.size_out), file=logs) print('Annex classifier: {}'.format(str(classifier)), file=logs)