def testNetwork(): # noqa D103 net = Network([Linear(10, 64), ReLU(), Linear(64, 2), Sigmoid()]) x = np.random.randn(32, 10) y = np.random.randn(32, 2) mse = MSE() optim = SGD(0.001, 0.001) pred = net(x) _ = mse(pred, y) _ = net.backward(mse.grad) optim.step(net)
from tensor import Tensor from optimizer import SGD from layer import MSELoss, Linear, Tanh, Sigmoid from model import Sequential import numpy as np #Toy example of Using Tensor Class np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), requires_grad=True) target = Tensor(np.array([[0], [1], [0], [1]]), requires_grad=True) #Every element in w, is an Object of Tensor representing weight matrix model = Sequential( Linear(2, 3), Tanh(), Linear(3, 3), Tanh(), Linear(3, 1), ) optim = SGD(parameters=model.get_parameters(), lr=0.1) criterion = MSELoss() for i in range(10): pred = model(data) loss = criterion(pred, target) loss.backward(Tensor(np.ones_like(loss.data), is_grad=True)) optim.step() print(loss.data) print( "------------------------------------------------------------------------")
mini_batch_size = 100 optimizer1 = SGD(model1.param(), lr=lr) optimizer2 = SGD(model2.param(), lr=lr) print("#" * 50) print("Training model 1") # Train model 1 for e in range(nb_epochs): sum_loss = 0 for b in range(0, train_input.size(0), mini_batch_size): output, loss = model1.forward(train_input.narrow(0, b, mini_batch_size), train_target.narrow(0, b, mini_batch_size)) optimizer1.zero_grad() grad = model1.backward() sum_loss = sum_loss + loss.item() optimizer1.step() print("Iteration {0:}: loss = {1:.3f}".format(e+1, sum_loss / (train_input.shape[0]/mini_batch_size)), end='\r', flush=True) loss_train1 = sum_loss / (train_input.shape[0]/mini_batch_size) print() print("#" * 50) # Test model 1 output_test1, loss_test1 = model1.forward(test_input, test_target) nb_err_test1 = compute_nb_errors(output_test1, test_target) # Print results
class BackPropLearner(SupervisedLearner): def __init__(self): self.lr = .01 self.momentum = .9 self.n_layers = 1 self.hidden_dim = 8 self.val_split = .2 self.encoder = None self.threshold = 50 self.max_epochs = 3000 self.allowance = .0 self.hidden_activation = (sigmoid, anti_sigmoid) # self.output_activation = (softmax, anti_sigmoid) self.output_activation = (sigmoid, anti_sigmoid) # self.loss_function = cross_entropy self.loss_function = lambda z, t: t - z self.opt = None self.layers = None def train(self, features, labels): in_dim = features.cols out_dim = labels.value_count(0) full_x, full_y = self.prep_data(features, labels) train_x, train_y, val_x, val_y = self.split_data( full_x, full_y, self.val_split) self.layers = self.init_layers(in_dim, out_dim) best_weights = deepcopy(self.layers) self.opt = SGD(self.lr, self.momentum) train_losses = [] train_accuracies = [] val_losses = [] val_accuracies = [] lowest_loss = np.inf highest_accuracy = 0 stagnant_rounds = 0 n_epochs = 0 try: while True: train_x, train_y = self.shuffle(train_x, train_y) self.run_epoch(train_x, train_y) train_loss, train_accuracy = self.score(train_x, train_y) val_loss, val_accuracy = self.score(val_x, val_y) train_losses.append(train_loss) train_accuracies.append(train_accuracy) val_losses.append(val_loss) val_accuracies.append(val_accuracy) print(f"EPOCH {n_epochs}") print(f"Train:\t{train_losses[-1]}\t{train_accuracies[-1]}") print(f"Val: \t{val_losses[-1]}\t{val_accuracies[-1]}") print() if val_losses[-1] < lowest_loss + self.allowance * lowest_loss: lowest_loss = val_losses[-1] best_weights = deepcopy(self.layers) elif stagnant_rounds < self.threshold: stagnant_rounds += 1 else: break n_epochs += 1 except KeyboardInterrupt: pass finally: self.layers = best_weights fig, ax1 = plt.subplots() ax1.set_title("Iris Validation Set Loss vs Accuracy") color = 'tab:red' ax1.set_xlabel('Epochs') ax1.set_ylabel('Loss (MSE)', color=color) ax1.plot(val_losses, color=color) ax1.tick_params(axis='y', labelcolor=color) ax2 = ax1.twinx( ) # instantiate a second axes that shares the same x-axis color = 'tab:blue' ax2.set_ylabel( 'Accuracy', color=color) # we already handled the x-label with ax1 ax2.plot(val_accuracies, color=color) ax2.tick_params(axis='y', labelcolor=color) fig.tight_layout( ) # otherwise the right y-label is slightly clipped # plt.savefig("/Users/masonfp/Desktop/cs/CS478-Machine-Learning-Projects/plots/backprop/iris.png") plt.show() def shuffle(self, a, b): temp = list(zip(a, b)) np.random.shuffle(temp) new_a, new_b = zip(*temp) return np.array(new_a), np.array(new_b) def prep_data(self, features, labels): instances = features.to_numpy() if not self.encoder: self.encoder = OneHotEncoder(sparse=False, categories='auto') targets = self.encoder.fit_transform(labels.data) return instances, targets def split_data(self, x, y, split): n_samples = int(len(x) * split) rand_indices = np.random.permutation(range(len(x))) old_x = x[rand_indices[n_samples:]] old_y = y[rand_indices[n_samples:]] new_x = x[rand_indices[:n_samples]] new_y = y[rand_indices[:n_samples]] return old_x, old_y, new_x, new_y def score(self, X, Y): losses = [] accuracy_count = 0 for x, y in zip(X, Y): logits = self.layers.forward(x) loss = self.loss_function(logits, y) accuracy_count += np.argmax(y) == np.argmax(logits) losses.append(loss**2) return np.mean(losses), accuracy_count / len(Y) def run_epoch(self, X, Y): losses = [] for x, y in zip(X, Y): logits = self.layers.forward(x) loss = self.loss_function(logits, y) losses.append(loss) self.layers.backward(loss) self.opt.step(self.layers) return np.mean(losses) def init_layers(self, in_dim, out_dim): layers = LayerList() for x in range(self.n_layers): if x == 0: layers.add_layer(in_dim, self.hidden_dim, self.hidden_activation) else: layers.add_layer(self.hidden_dim, self.hidden_dim, self.hidden_activation) if len(layers): layers.add_layer(self.hidden_dim, out_dim, self.output_activation) else: layers.add_layer(in_dim, out_dim, self.output_activation) return layers def predict(self, features, labels): self.in_training = False del labels[:] pred = self.layers.forward(np.array(features)) pred = [[1 if x == max(pred) else 0 for x in pred]] pred = self.encoder.inverse_transform(pred) labels.append(pred[0][0])
def main(): # generate data and translate labels train_features, train_targets = generate_all_datapoints_and_labels() test_features, test_targets = generate_all_datapoints_and_labels() train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets) print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh') print('Loss: MSE') print('Optimizer: SGD') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss and optimizer for Model 1 my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25), ReLU(),Linear(25,2),Tanh()] my_model_1=Sequential(my_model_design_1) optimizer_1=SGD(my_model_1,lr=1e-3) criterion_1=LossMSE() # train Model 1 batch_size=1 for epoch in range(50): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_1.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_1.forward(temp_train_feature) temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label) # accumulate parameter gradient in each batch my_model_1.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_1.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_1.forward(temp_test_feature) temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) print("Epoch: {}/{}..".format(epoch+1, 50), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) # # visualize the classification performance of Model 1 on testing set test_pred_labels_1=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_1.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_1.append(int(test_targets[i])) else: test_pred_labels_1.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1) axes.set_title('Classification Performance of Model 1') plt.show() print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid') print('Loss: Cross Entropy') print('Optimizer: Adam') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss function and optimizer for Model 2 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2), Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() # train Model 2 batch_size=1 epoch=0 while(epoch<25): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_2.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_2.forward(temp_train_feature) temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label) ''' if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): continue ''' # accumulate parameter gradient in each batch my_model_2.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_2.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_2.forward(temp_test_feature) temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) # in case there is gradient explosion problem, initiliza model again and restart training # but the situation seldom happens if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): epoch=0 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('Restart training because of gradient explosion') continue print("Epoch: {}/{}..".format(epoch+1, 25), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) epoch+=1 # visualize the classification performance of Model 2 on testing set test_pred_labels_2=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_2.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_2.append(int(test_targets[i])) else: test_pred_labels_2.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2) axes.set_title('Classification Performance of Model 2') plt.show()
acc_val: list = [] for epoch in progress_bar: offset = 0 val_err = 0 err = 0 while offset + batch_size <= len(x_train): data = x_train[offset : offset + batch_size, :] label = y_train[offset : offset + batch_size, :] try: pred = net(data) except RuntimeWarning: print(f"Runtime warning on {offset}") err += loss(pred, label) / (len(x_train) / batch_size) g = net.backward(loss.grad) optim.step(net) offset += batch_size acc_train.append(accuracy_score(label.argmax(axis=1), pred.argmax(axis=1))) offset = 0 while offset + batch_size <= len(x_val): val_data = x_val[offset : offset + batch_size, :] val_label = y_val[offset : offset + batch_size] pred = net(val_data) val_err += loss(pred, val_label) / (len(x_val) / batch_size) offset += batch_size acc_val.append(accuracy_score(val_label.argmax(axis=1), pred.argmax(axis=1))) if (epoch) % 2 == 0: progress_bar.set_postfix( {"loss_train": err, "loss_val": val_err, "acc_val": np.mean(acc_val)} ) accuracies["train"].append(np.mean(acc_train))
max_iter = 10000 # batcher parameters batch_size = 64 lenet = LeNet(layers) lenet.save_model("../models/my_model.model") #optimizer = SGDMomentum(lenet, **opt_params) optimizer = SGD(lenet.parameters(), lr=0.1) epochs = 10 per_epoch = -(-xtrain.shape[0] // batch_size) iter_cnt = 0 for epoch in range(epochs): for ix in tqdm(range(per_epoch)): optimizer.update_lr(iter_cnt) rand_ix = np.random.randint(0, xtrain.shape[0], (batch_size,)) batch_x = xtrain[rand_ix] batch_y = ytrain[rand_ix] my_loss = lenet.forward(batch_x.reshape((batch_size, -1)), batch_y) lenet.backward(my_loss) optimizer.step() optimizer.zero_grad() iter_cnt += 1 print("Epoch {0} of {1} Done. Starting Testing".format( epoch + 1, epochs)) start = time.time() test_loss = lenet.forward(xtest, ytest) print("Testing Done. Took {0:.2f}s. Accuracy: {1:.4f}, Loss: {2:.2f}".format( time.time() - start, test_loss["acc"], test_loss["loss"])) lenet.save_model("weights.npy")