help='Number of frames to skip') args = parser.parse_args() print(args) utils.prepare_dir(args) utils.print_host_info() tf.get_variable_scope()._reuse = None _seed = args.base_seed + args.add_seed tf.set_random_seed(_seed) np.random.seed(_seed) tg, test_graph = graph_builder.build_graph_subsample(args) tvars = tf.trainable_variables() print([tvar.name for tvar in tvars]) print("Model size: {:.2f}M".format(utils.get_model_size(tvars))) tg_ml_cost = tf.reduce_mean(tg.ml_cost) global_step = tf.Variable(0, trainable=False, name="global_step") lr = tf.Variable(args.lr, trainable=False, name="lr") ml_opt_func = tf.train.AdamOptimizer(learning_rate=lr) ml_grads, _ = tf.clip_by_global_norm(tf.gradients(tg_ml_cost, tvars), clip_norm=1.0) ml_op = ml_opt_func.apply_gradients(zip(ml_grads, tvars), global_step=global_step) tf.add_to_collection('n_skip', args.n_skip) tf.add_to_collection('n_hidden', args.n_hidden)
) args.eval_aligned = False print("The parameters are: \n", args) config, second_config = utils._get_config(args) setattr(args, 'autoencoder', False) train_loader, test_loader, retrain_loader = get_dataloaders(args, config) models, accuracies = load_pretrained_models(args, config) recheck_accuracy(args, models, test_loader) for idx, model in enumerate(models): print(f'model {idx} size is ', utils.get_model_size(model)) test_model(args, model, test_loader) if args.gpu_id == -1: device = torch.device('cpu') else: device = torch.device('cuda:{}'.format(args.gpu_id)) print("------- Prediction based ensembling -------") prediction_acc = baseline.prediction_ensembling(args, models, test_loader) print("------- Geometric Ensembling -------") activations = utils.get_model_activations(args, models, config=config) geometric_acc, geometric_model = wasserstein_ensemble.geometric_ensembling_modularized( args, models, train_loader, test_loader, activations) utils.get_model_size(geometric_model)
def train(self, t, train_data, valid_data, device='cuda'): self.writer.add_text( "ModelSize/Task_{}".format(t), "model size = {}".format(utils.get_model_size(self.model))) best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr # 1 define the optimizer and scheduler self.optimizer = self._get_optimizer(lr) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=self.lr_patience, # factor=self.lr_factor, threshold=0.001) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, self.epochs) # 2 define the dataloader train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.batch, shuffle=True, num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=self.batch, shuffle=False, num_workers=4, pin_memory=True) # 3 training the model for e in range(self.epochs): # 3.1 train self.train_epoch(t, train_loader, device=device) # 3.2 compute training loss train_loss, train_acc = self.eval(t, train_loader, mode='train', device=device) # 3.3 compute valid loss valid_loss, valid_acc = self.eval(t, valid_loader, mode='train', device=device) # 3.4 logging print( '| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |' .format(e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Train_Loss/Task: {}'.format(t), { 'train_loss': train_loss, 'valid_loss': valid_loss }, global_step=e) self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t), { 'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100 }, global_step=e) # 3.5 Adapt learning rate scheduler.step() # 3.6 update the best model if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) # 4 Restore best model utils.set_model_(self.model, best_model) # Update old self.model_old = deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # Fisher ops if t > 0: fisher_old = {} for n, _ in self.model.named_parameters(): fisher_old[n] = self.fisher[n].clone() self.fisher = utils.fisher_matrix_diag(t, train_loader, self.model, self.criterion, device, self.batch) if t > 0: # Watch out! We do not want to keep t models (or fisher diagonals) in memory, # therefore we have to merge fisher diagonals for n, _ in self.model.named_parameters(): self.fisher[n] = (self.fisher[n] + fisher_old[n] * t) / (t + 1) return
test_data = MyDataset(data[u]['test'], debug=args.debug) test_loader = torch.utils.data.DataLoader( test_data, batch_size=args.batch, shuffle=False, pin_memory=True, num_workers=4) test_loss, test_acc = appr.eval(u, test_loader, mode='train', device=device) print('>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.1f}% <<<'.format( u, data[u]['name'], test_loss, 100*test_acc)) writer.add_scalars('Test/Loss', {'task{}'.format(u): test_loss}, global_step=t) writer.add_scalars('Test/Accuracy', {'task{}'.format(u): test_acc * 100}, global_step=t) acc[t, u] = test_acc lss[t, u] = test_loss model_size.append(utils.get_model_size(appr.model, mode='M')) writer.add_scalars('ModelParameter(M)', {'ModelParameter(M)': utils.get_model_size(appr.model, 'M')}, global_step=t) # Done, logging the experiment results print('*'*100) print('Accuracies =') for i in range(acc.shape[0]): print('\t', end='') for j in range(acc.shape[1]): writer.add_text("Results/Acc", '{:5.1f}% '.format(100*acc[i, j]), i) print('{:5.1f}% '.format(100*acc[i, j]), end='') print() print('*'*100) print('Done!')
elif args.partition_type == 'small_big': assert args.dataset == 'mnist' print("------- Split dataloaders wrt small big data setting-------") trailo_a, trailo_b, personal_trainset, other_trainset = partition.get_small_big_split(args, split_frac= args.personal_split_frac, is_train=True, return_dataset=True) teslo_a, teslo_b, personal_testset, other_testset = partition.get_small_big_split(args, split_frac=args.personal_split_frac, is_train=False, return_dataset=True) print("------- Training independent models -------") choices = list(range(0, 10)) models, accuracies, local_accuracies = routines.train_data_separated_models(args, [trailo_a, trailo_b], [teslo_a, teslo_b], test_loader, [choices, choices]) for idx, model in enumerate(models): setattr(args, f'params_model_{idx}', utils.get_model_size(model)) personal_dataset = None if args.partition_type == 'personalized' or args.partition_type == 'small_big': if args.partition_dataloader == 0: personal_dataset = personal_trainset elif args.partition_dataloader == 1: personal_dataset = other_trainset activations = utils.get_model_activations(args, models, config=config, personal_dataset=personal_dataset) # run geometric aka wasserstein ensembling print("------- Geometric Ensembling -------") geometric_acc, geometric_model = wasserstein_ensemble.geometric_ensembling_modularized(args, models, train_loader, test_loader, activations)