def main(): train_loader, test_loader = create_data_loaders() print(train_loader) print(test_loader) fname = os.path.join(os.getcwd(), 'saved_weights/mnist_weights.mat') # nom NN # define neural network model and print summary net_dims = [INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE] device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 model = Network(activation=nn.ReLU).to(device) summary(model, (392, 784)) # train model print("Beginnning nominal NN training") t = time.time() parametersNom, Lip_course, loss_course, CEloss_course, accuracy_course = train_network( model, train_loader, test_loader) timeNom = time.time() - t print("Nominal training complete after {} seconds".format(timeNom)) # save data to saved_weights/ directory weights, biases = model.extract_weights() data = {'weights': np.array(weights, dtype=np.object)} savemat(fname, data) Lip_dic = solve_SDP_multi.build_T_multi(weights, biases, net_dims) Lip_nom = Lip_dic["Lipschitz"] torch.save(model, 'MNIST784_NomModel.pt') # plot losscourse plt.plot(loss_course) plt.xlabel('# epochs x 5') plt.ylabel('Loss Nom') plt.show() # plot CElosscourse plt.plot(CEloss_course) plt.xlabel('# epochs x 5') plt.ylabel('CE-Loss Nom') plt.show() # plot Lip_course plt.plot(Lip_course) plt.xlabel('# epochs x 5') plt.ylabel('Lip_course Nom') plt.show() # plot accuracy_course plt.plot(accuracy_course) plt.xlabel('# epochs x 5') plt.ylabel('accuracy_course Nom') plt.show() # L2 NN # define neural network model and print summary modelL2 = Network(activation=nn.ReLU).to(device) summary(model, (392, 784)) # train model print("Beginnning L2 training") t = time.time() parametersL2, Lip_courseL2, loss_courseL2, CEloss_courseL2, accuracy_courseL2 = train_network( modelL2, train_loader, test_loader, lmbd=lmbd) timeL2 = time.time() - t print("L2 training complete after {} seconds".format(timeL2)) # save data to saved_weights/ directory weightsL2, biasesL2 = modelL2.extract_weights() data = {'weightsL2': np.array(weightsL2, dtype=np.object)} savemat(fname, data) Lip_L2 = solve_SDP_multi.build_T_multi(weightsL2, biasesL2, net_dims) torch.save(modelL2, 'MNIST784_L2Model.pt') # plot losscourse plt.plot(loss_courseL2) plt.xlabel('# epochs x 5') plt.ylabel('Loss L2') plt.show() # plot CElosscourse plt.plot(CEloss_courseL2) plt.xlabel('# epochs x 5') plt.ylabel('CE-Loss L2') plt.show() # plot Lip_course plt.plot(Lip_courseL2) plt.xlabel('# epochs x 5') plt.ylabel('Lip_course L2') plt.show() # plot accuracy_course plt.plot(accuracy_courseL2) plt.xlabel('# epochs x 5') plt.ylabel('accuracy_course L2') plt.show() # NN with Lipschitz regularizer # define neural network model and print summary modelLip = Network(activation=nn.ReLU).to(device) modelLip.load_state_dict(modelL2.state_dict()) summary(model, (392, 784)) # train model L_des = Lip_L2["Lipschitz"] print("Beginnning parameters = solve_SDP1") t1 = time.time() parameters = solve_SDP_multi.initialize_parameters(weights, biases) timeSolveSDP1 = time.time() - t1 print("Complete parameters = solve_SDP1 after {} seconds".format( timeSolveSDP1)) print("Beginnning parameters = solve_SDP2") t2 = time.time() parameters_L2 = solve_SDP_multi.initialize_parameters(weightsL2, biasesL2) timeSolveSDP2 = time.time() - t2 print("Complete parameters = solve_SDP2 after {} seconds".format( timeSolveSDP2)) init = 1 # 1 initialize from L2-NN, 2 initialize from nominal NN if init == 1: print("Beginnning LipSDP training") t3 = time.time() parameters_Lip, Lip_courseLip, loss_courseLip, CEloss_courseLip, accuracy_courseLip = train_network( modelLip, train_loader, test_loader, rho=rho, mu=mu, parameters=parameters_L2, L_des=L_des, T=Lip_L2["T"]) timeTrainSDP = time.time() - t3 print("LipSDP training complete after {} seconds".format(timeTrainSDP)) else: print("Beginnning LipSDP training") t3 = time.time() parameters_Lip, Lip_courseLip, loss_courseLip, CEloss_courseLip, accuracy_courseLip = train_network( modelLip, train_loader, test_loader, rho=rho, mu=mu, parameters=parameters, L_des=L_des, T=Lip_dic["T"]) timeTrainSDP = time.time() - t3 print("LipSDP training complete after {} seconds".format(timeTrainSDP)) timeFullSDP = time.time() - t1 print("Full LipSDP training complete after {} seconds".format(timeFullSDP)) # save data to saved_weights/ directory weightsLip, biasesLip = modelLip.extract_weights() # weightsLip2, biasesLip2 = modelLip2.extract_weights() Lip_Lip = solve_SDP_multi.build_T_multi(weightsLip, biasesLip, net_dims) # Lip_Lip2 = solve_SDP.build_T(weightsLip2, biasesLip2, net_dims) data = {'weightsLip': np.array(weightsLip, dtype=np.object)} savemat(fname, data) torch.save(modelLip, 'MNIST784_LipModel.pt') # plot losscourse plt.plot(loss_courseLip) plt.xlabel('# epochs x 5') plt.ylabel('Loss Lip') plt.title('Loss with rho = ' + str(rho) + ', mu = ' + str(mu)) plt.show() # plot CElosscourse plt.plot(CEloss_courseLip) plt.xlabel('# epochs x 5') plt.ylabel('CE-Loss Lip') plt.title('CE-Loss with rho = ' + str(rho) + ', mu = ' + str(mu)) plt.show() # plot Lip_course plt.plot(Lip_courseLip) plt.xlabel('# epochs x 5') plt.ylabel('Lip_course Lip') plt.title('Lip with rho = ' + str(rho) + ', mu = ' + str(mu)) plt.show() # plot accuracy_course plt.plot(accuracy_courseLip) plt.xlabel('# epochs x 5') plt.ylabel('accuracy_course Lip') plt.title('Accuracy with rho = ' + str(rho) + ', mu = ' + str(mu)) plt.show() # LMT NN # define neural network model and print summary modelLMT = Network(activation=nn.ReLU).to(device) summary(model, (392, 784)) # train model print("Beginnning LMT training") t = time.time() parametersLMT, Lip_courseLMT, loss_courseLMT, CEloss_courseLMT, accuracy_courseLMT = train_network( modelLMT, train_loader, test_loader, c=c) timeLMT = time.time() - t print("LMT training complete after {} seconds".format(timeLMT)) # save data to saved_weights/ directory weightsLMT, biasesLMT = modelLMT.extract_weights() data = {'weightsLMT': np.array(weightsLMT, dtype=np.object)} savemat(fname, data) Lip_LMT = solve_SDP_multi.build_T_multi(weightsLMT, biasesLMT, net_dims) torch.save(modelLMT, 'MNIST784_LMTModel.pt') # plot losscourse plt.plot(loss_courseLMT) plt.xlabel('# epochs x 5') plt.ylabel('Loss LMT') plt.show() # plot CElosscourse plt.plot(CEloss_courseLMT) plt.xlabel('# epochs x 5') plt.ylabel('CE-Loss LMT') plt.show() # plot Lip_course plt.plot(Lip_courseLMT) plt.xlabel('# epochs x 5') plt.ylabel('Lip_course LMT') plt.show() # plot accuracy_course plt.plot(accuracy_courseLMT) plt.xlabel('# epochs x 5') plt.ylabel('accuracy_course LMT') plt.show()
def train(self, lmbd=None, rho=None, parameters=None, c=None): Lip_course = [] loss_course = [] CEloss_course = [] out = self(input) criterion = nn.CrossEntropyLoss() loss = criterion(out, target_cross) loss_prev = 0 loss_prevprev = 0 for i in range(10000): out = self(input) loss_prev = loss loss = criterion(out, target_cross) if lmbd is not None: loss += self.l2_reg(lmbd) if rho is not None: loss += self.Lip_reg(rho, parameters) if c is not None: self.LMT_reg(c) if np.mod(i, 5000) == 0: weights, biases = self.extract_weights() Lip = solve_SDP_multi.build_T_multi(weights, biases, net_dims) L = Lip["Lipschitz"] T = Lip["T"] L_W = 1 for j in range(len(weights)): L_W = L_W * np.linalg.norm(weights[j], 2) Lip_course.append(L) loss_course.append(loss.item()) crossEntropyLoss = criterion(out, target_cross) CEloss_course.append(crossEntropyLoss) print( 'Train Epoch: {}; Loss: {:.6f}; CE-Loss: {:.6f}; Lipschitz: {:.3f}; Trivial Lipschitz: {:.3f}' .format(i, loss.item(), crossEntropyLoss, L, L_W)) # print(Lip["ok"]) self.zero_grad() loss.backward() optimizer = optim.SGD(self.parameters(), lr=lr) # optimizer = optim.Adagrad(self.parameters(), lr=lr) # optimizer = optim.Adam(self.parameters(), lr=lr) optimizer.step() while abs(loss_prevprev - loss.item()) >= 0.01: out = self(input) loss_prevprev = loss_prev loss_prev = loss criterion = nn.CrossEntropyLoss() loss = criterion(out, target_cross) if lmbd is not None: loss += self.l2_reg(lmbd) if rho is not None: loss += self.Lip_reg(rho, parameters) if c is not None: self.LMT_reg(c) if np.mod(i, 5000) == 0: weights, biases = self.extract_weights() Lip = solve_SDP_multi.build_T_multi(weights, biases, net_dims) L = Lip["Lipschitz"] T = Lip["T"] L_W = 1 for j in range(len(weights)): L_W = L_W * np.linalg.norm(weights[j], 2) Lip_course.append(L) loss_course.append(loss.item()) crossEntropyLoss = criterion(out, target_cross) CEloss_course.append(crossEntropyLoss) print( 'Train Epoch: {}; Loss: {:.6f}; CE-Loss: {:.6f}; Lipschitz: {:.3f}; Trivial Lipschitz: {:.3f}' .format(i, loss.item(), crossEntropyLoss, L, L_W)) # print(Lip["ok"]) self.zero_grad() loss.backward() optimizer = optim.SGD(self.parameters(), lr=lr) # optimizer = optim.Adagrad(self.parameters(), lr=lr) # optimizer = optim.Adam(self.parameters(), lr=lr) optimizer.step() i += 1 print( 'Train Epoch: {}; Loss: {:.6f}; CE-Loss: {:.6f}; Lipschitz: {:.3f}; Trivial Lipschitz: {:.3f}' .format(i, loss.item(), crossEntropyLoss, L, L_W)) if (lmbd is None) and (rho is None) and (c is None): Lip_dic = solve_SDP_multi.build_T_multi(weights, biases, net_dims) Lip_nom = Lip_dic["Lipschitz"] return Lip_course, loss_course, CEloss_course, T
def train_model(self, train_loader, test_loader, optimizer, criterion, lmbd=None, rho=None, mu=None, parameters=None, c=None, Lip_nom=None): """Train neural network model with Adam optimizer for a single epoch params: * model: nn.Sequential instance - NN model to be tested * train_loader: DataLoader instance - Training data for NN * optimizer: torch.optim instance - Optimizer for NN * criterion: torch.nn.CrossEntropyLoss instance - Loss function * epoch_num: int - Number of current epoch * log_interval: int - interval to print output modifies: weights of neural network model instance """ self.train() # Set model to training mode Lip_course, loss_course, CEloss_course, accuracy_course = [], [], [], [] lossM, loss_prev, loss = 0, 0, 0 for epoch_num in range(5): epoch_loss = 0 epoch_CEloss = 0 for batch_id, (data, target) in enumerate(train_loader): data = data.view(BATCH_SIZE, -1) optimizer.zero_grad() # Zero gradient buffers output = self(data) # Pass data through the network loss = criterion(output, target) # Calculate loss if lmbd is not None: loss += self.l2_reg(lmbd) if rho is not None: loss += self.Lip_reg(rho, mu, parameters) if c is not None: self.LMT_reg(Lip_nom, data, c, output) loss.backward() # Backpropagate optimizer.step() # Update weights epoch_loss += loss.item() epoch_CEloss += criterion(output, target) lossM = epoch_loss / batch_id if np.mod(epoch_num, 5) == 0: weights, biases = self.extract_weights() Lip = solve_SDP_multi.build_T_multi(weights, biases, net_dims) L_W = np.linalg.norm(weights[0], 2) * np.linalg.norm( weights[1], 2) T = Lip["T"] epoch_LipM = Lip["Lipschitz"] epoch_lossM = epoch_loss / batch_id epoch_CElossM = epoch_CEloss / batch_id accuracy = self.test_model(test_loader) print( 'Train Epoch: {}; Loss: {:.6f}; Cross-Entropy Loss: {:.6f}; Lipschitz: {:.3f}; Trivial Lipschitz: {:.3f}; Test Accuracy: {:.3f}' .format(epoch_num, epoch_lossM, epoch_CElossM, epoch_LipM, L_W, accuracy)) Lip_course.append(epoch_LipM) loss_course.append(epoch_lossM) CEloss_course.append(epoch_CElossM) accuracy_course.append(accuracy) while abs(loss_prev - lossM) >= 0.01: epoch_num += 1 epoch_loss = 0 epoch_CEloss = 0 for batch_id, (data, target) in enumerate(train_loader): data = data.view(BATCH_SIZE, -1) optimizer.zero_grad() # Zero gradient buffers output = self(data) # Pass data through the network loss = criterion(output, target) # Calculate loss if lmbd is not None: loss += self.l2_reg(lmbd) if rho is not None: loss += self.Lip_reg(rho, mu, parameters) if c is not None: self.LMT_reg(Lip_nom, data, c, output) loss.backward() # Backpropagate optimizer.step() # Update weights epoch_loss += loss.item() epoch_CEloss += criterion(output, target) if np.mod(epoch_num, 5) == 0: weights, biases = self.extract_weights() Lip = solve_SDP_multi.build_T_multi(weights, biases, net_dims) L_W = np.linalg.norm(weights[0], 2) * np.linalg.norm( weights[1], 2) T = Lip["T"] epoch_LipM = Lip["Lipschitz"] epoch_lossM = epoch_loss / batch_id epoch_CElossM = epoch_CEloss / batch_id accuracy = self.test_model(test_loader) print( 'Train Epoch: {}; Loss: {:.6f}; Cross-Entropy Loss: {:.6f}; Lipschitz: {:.3f}; Trivial Lipschitz: {:.3f}; Test Accuracy: {:.3f}' .format(epoch_num, epoch_lossM, epoch_CElossM, epoch_LipM, L_W, accuracy)) Lip_course.append(epoch_LipM) loss_course.append(epoch_lossM) CEloss_course.append(epoch_CElossM) accuracy_course.append(accuracy) loss_prev = lossM lossM = epoch_lossM return loss_course, CEloss_course, Lip_course, accuracy_course, T
target_cross[j] += 1 else: target_cross[j] += 0 # Create NomNetz netz = MeinNetz() optimizer = optim.SGD(netz.parameters(), lr=lr) print("Beginnning nominal NN training") t = time.time() Lip_course, loss_course, CEloss_course, T = netz.train() timeNom = time.time() - t print("Nominal Training Complete after {} seconds".format(timeNom)) weights, biases = netz.extract_weights() Lip = solve_SDP_multi.build_T_multi(weights, biases, net_dims) Lip_dic = solve_SDP_multi.build_T_multi(weights, biases, net_dims) Lip_nom = Lip_dic["Lipschitz"] torch.save(netz, '2D_NomModel.pt') # NN with L2 regularizer net_L2 = MeinNetz() optimizer = optim.SGD(net_L2.parameters(), lr=lr) print("Beginnning L2 training") t = time.time() Lip_course_L2, loss_course_L2, CEloss_course_L2, T = net_L2.train(lmbd=lmbd) timeL2 = time.time() - t print("L2 Training Complete after {} seconds".format(timeL2))