def train_network(train_loader, test_loader, depth, width, epochs, init_lr, decay): model = SimpleNet(depth, width).cuda() optim = Nero(model.parameters(), lr=init_lr) lr_lambda = lambda x: decay**x lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda) model.train() train_acc_list = [] for epoch in range(epochs): correct = 0 total = 0 for data, target in tqdm(train_loader): data, target = (data.cuda(), target.cuda()) data, target = normalize_data(data, target) y_pred = model(data).squeeze() loss = (y_pred - target).norm() correct += (target.float() == y_pred.sign()).sum().item() total += target.shape[0] model.zero_grad() loss.backward() optim.step() lr_scheduler.step() train_acc_list.append(correct / total) model.eval() correct = 0 total = 0 for data, target in tqdm(test_loader): data, target = (data.cuda(), target.cuda()) data, target = normalize_data(data, target) y_pred = model(data).squeeze() correct += (target.float() == y_pred.sign()).sum().item() total += target.shape[0] test_acc = correct / total return train_acc_list, test_acc, model
## Check kernel compared to random networks depth = 3 width = 5000 num_train_examples = 5 num_networks = 10**3 _, _, train_loader, _ = get_data(num_train_examples=num_train_examples, num_test_examples=None, batch_size=num_train_examples, random_labels=False, binary_digits=False) for data, target in train_loader: data, target = normalize_data(data, target) out_matrix = np.zeros((num_train_examples, num_networks)) with torch.no_grad(): print(f"Sampling {num_networks} random networks") for network_idx in tqdm(range(num_networks)): model = SimpleNet(depth, width) for p in model.parameters(): p.data = torch.randn_like(p) / math.sqrt(p.shape[1]) pred = model(data).squeeze() out_matrix[:, network_idx] = pred.numpy() sample_mean = np.mean(out_matrix, axis=1) sample_cov = np.cov(out_matrix)
def train_network(train_loader, test_loader, depth, width, init_lr, decay, cuda, alpha, break_on_fit=True): model = SimpleNet(depth, width, alpha, residual=True) if cuda: model = model.cuda() optim = Nero(model.parameters(), lr=init_lr) lr_lambda = lambda x: decay**x lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda) train_acc_list = [] train_acc = 0 # import pdb; pdb.set_trace() for epoch in tqdm(range(100)): model.train() for data, target in train_loader: if cuda: data, target = (data.cuda(), target.cuda()) data, target = normalize_data(data, target) y_pred = model(data).squeeze() loss = (y_pred - target).norm() model.zero_grad() loss.backward() optim.step() # analyze_model(model) # print("haha") lr_scheduler.step() model.eval() correct = 0 total = 0 for data, target in train_loader: if cuda: data, target = (data.cuda(), target.cuda()) data, target = normalize_data(data, target) y_pred = model(data).squeeze() correct += (target.float() == y_pred.sign()).sum().item() total += target.shape[0] train_acc = correct/total train_acc_list.append(train_acc) if break_on_fit and train_acc == 1.0: break model.eval() correct = 0 total = 0 for data, target in test_loader: if cuda: data, target = (data.cuda(), target.cuda()) data, target = normalize_data(data, target) y_pred = model(data).squeeze() correct += (target.float() == y_pred.sign()).sum().item() total += target.shape[0] test_acc = correct/total return train_acc_list, test_acc, model