def train(epochs, layer, lr, lambd) : model = FNN(feature.shape[2], 1, layer, 128) model.apply(weight_init) optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = lambd) if args.cuda : model = model.cuda() print("Training FNN for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd)) for epoch in range(epochs) : train_epoch(epoch, model, optimizer, lambd) output = model(feature[:,0,:]) output = output.unsqueeze(1) for i in range(1, feature.shape[1]) : output = torch.cat((output, model(feature[:, i, :]).unsqueeze(1)), 1) # t_weight = torch.stack((weight, weight),2) t_weight = weight output = output.squeeze() output = torch.mul(t_weight, output) output = torch.sum(output, 1) loss_train = Loss(output[idx_train], out[idx_train]) loss_val = Loss(output[idx_val], out[idx_val]) print("Result for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd)) print('loss_val: {:.4f}'.format(loss_val.item())) return model, output, loss_val
def train(epochs, layer, lr, lambd, idx_train, idx_val): model = FNN(feature.shape[1], out.shape[1], layer, 128) model.apply(weight_init) optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = lambd) if args.cuda: model = model.cuda() print("Training FNN for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd)) for epoch in range(epochs) : train_epoch(epoch, model, optimizer, lambd, idx_train, idx_val) output = model(feature) loss_val = F.mse_loss(output[idx_val], out[idx_val]) print("Result for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd)) print('loss_val: {:.4f}'.format(loss_val.item())) return output, loss_val
'lr': float(args.learning_rate), 'momentum': float(args.momentum), 'weight_decay': float(args.weight_decay) } path = args.path training_set = SignalDataset(path, train=True) train_loader = torch.utils.data.DataLoader(training_set, **params_dataloader) num_classes = training_set.num_classes test_set = SignalDataset(path, train=False) test_loader = torch.utils.data.DataLoader(test_set, **params_dataloader) model = FNN(**params_model, output_size=num_classes).to(device=device) nll_loss = nn.NLLLoss() op = torch.optim.SGD(model.parameters(), **params_op) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) best_acc_train = -1.
def main_func(activation, data_path, save_path, batch_size, epochs, layer_sizes, mi_methods, test_size, num_bins=[30], num_runs=1, try_gpu=False): check_for_data(save_path) if try_gpu: cuda = torch.cuda.is_available() device = torch.device("cuda" if cuda else "cpu") else: device = torch.device("cpu") print("Using "+ str(device)) loss_function = nn.CrossEntropyLoss() # Only one supported as of now max_values = [] for i in tqdm.tqdm(range(args.start_from, num_runs)): torch.manual_seed(i) torch.cuda.manual_seed(i) np.random.seed(i) train_loader, test_loader, act_full_loader = prepare_data(data_path, test_size, i, batch_size) model = FNN(layer_sizes, activation=activation, seed=i).to(device) optimizer = optim.Adam(model.parameters(), lr=0.0004) tr = Trainer(loss_function, epochs, model, optimizer, device) print("Start Training...") tr.train(train_loader, test_loader, act_full_loader) if args.save_train_error: print("Saving train and test error...") with open(save_path + '/training_history_run_{}_{}.pickle'.format(i, batch_size), 'wb') as f: pickle.dump([tr.error_train, tr.error_test], f, protocol=pickle.HIGHEST_PROTOCOL) f.close() with open(save_path + '/loss_run_{}_{}.pickle'.format(i, batch_size), 'wb') as f: pickle.dump([tr.train_loss, tr.val_loss], f, protocol=pickle.HIGHEST_PROTOCOL) f.close() if args.save_max_vals: print("Saving max activation values...") with open(save_path + '/max_values{}_{}.pickle'.format(i, batch_size), 'wb') as f: print(np.array(tr.max_value_layers_mi).max()) pickle.dump(tr.max_value_layers_mi, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() if args.save_mutual_information: for j in num_bins: print("Saving mutual information with {} bins...".format(j)) if "variable" in mi_methods: max_value = info_utils.get_max_value(tr.hidden_activations) num_bins = int(max_value*15) mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j) MI_XH, MI_YH = mutual_inf.get_mi(method="fixed") with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}variable.pickle'.format(i, batch_size, j), 'wb') as f: pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL) f.close() if "fixed" in mi_methods: mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j) MI_XH, MI_YH = mutual_inf.get_mi(method="fixed") with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}bins.pickle'.format(i, batch_size, j), 'wb') as f: pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL) f.close() if "adaptive" in mi_methods: mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j) MI_XH, MI_YH = mutual_inf.get_mi(method="adaptive") with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}adaptive.pickle'.format(i, batch_size, j), 'wb') as f: pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL) f.close() minv, maxv = info_utils.get_min_max_vals(activation, tr.hidden_activations) max_values.append(maxv) print(max_values) # Need to delete everything from memory # because python will keep things in memory until computation of overwriting # variable is finished for the next iteration. This simply fills up my RAM. del model del tr if args.save_mutual_information: del mutual_inf del MI_XH del MI_YH del train_loader del test_loader del act_full_loader print("Done runnning...")