def train_multi_layer_network(data): print() print("*****************start train multi layer network*****************") model = FullyConnectedNet([100, 100, 100, 100], weight_scale=1e-2, use_batchnorm=True) solver = Solver(model, data, num_epochs=10, batch_size=100, update_rule="adam", optim_config={ "learning_rate": 0.001 }, lr_decay=0.95, verbose=True) solver.train() plt.subplot(2, 1, 1) plt.title("Training loss") plt.plot(solver.loss_history, "o") plt.xlabel("Iteration") plt.subplot(2, 1, 2) plt.title("Accuracy") plt.plot(solver.train_acc_history, "-o", label="train") plt.plot(solver.val_acc_history, "-o", label="val") plt.plot([0.5] * len(solver.val_acc_history), "k--") plt.xlabel("Epoch") plt.legend(loc="lower right") plt.gcf().set_size_inches(15, 12) plt.show() y_test_pred = np.argmax(model.loss(data["X_test"]), axis=1) y_val_pred = np.argmax(model.loss(data["X_val"]), axis=1) print("Validation set accuracy: ", (y_val_pred == data["y_val"]).mean()) print("Test set accuracy: ", (y_test_pred == data["y_test"]).mean())
def visualize_dropout(data): # Train two identical nets, one with dropout and one without np.random.seed(231) num_train = 500 small_data = { "X_train": data["X_train"][:num_train], "y_train": data["y_train"][:num_train], "X_val": data["X_val"], "y_val": data["y_val"], } solvers = {} dropout_choices = [0, 0.75] for dropout in dropout_choices: model = FullyConnectedNet([100], dropout=dropout) print(dropout) solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule="adam", optim_config={ "learning_rate": 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, "o", label="%.2f dropout" % dropout) plt.title("Train accuracy") plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend(ncol=2, loc="lower right") plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, "o", label="%.2f dropout" % dropout) plt.title("Val accuracy") plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend(ncol=2, loc="lower right") plt.gcf().set_size_inches(15, 15) plt.show()
def test_overfit_small_batch(self): num_train = 50 data = self.data small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } learning_rate = 1e-2 weight_scale = 6e-2 model = FullyConnectedNet([100, 100, 100,100], weight_scale=weight_scale, dtype=np.float64) solver = Solver(model, small_data, print_every=10, num_epochs=20, batch_size=25, update_rule='sgd', optim_config={ 'learning_rate': learning_rate, } ) solver.train() plt.plot(solver.loss_history, 'o') plt.title('Training loss history') plt.xlabel('Iteration') plt.ylabel('Training loss') plt.show() return
def train_two_layer_network(data): print() print("*****************start train two layer network*****************") model = TwoLayerNet() solver = Solver(model, data, update_rule="sgd", optim_config={ "learning_rate": 1e-3 }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() plt.subplot(2, 1, 1) plt.title("Training loss") plt.plot(solver.loss_history, "o") plt.xlabel("Iteration") plt.subplot(2, 1, 2) plt.title("Accuracy") plt.plot(solver.train_acc_history, "-o", label="train") plt.plot(solver.val_acc_history, "-o", label="val") plt.plot([0.5] * len(solver.val_acc_history), "k--") plt.xlabel("Epoch") plt.legend(loc="lower right") plt.gcf().set_size_inches(15, 12) plt.show()
def train_net(self): data = self.data num_train = data['X_train'].shape[0] small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001) solver = Solver(model, data, num_epochs=1, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train() from assignment2.cs231n.vis_utils import visualize_grid grid = visualize_grid(model.params['W1'].transpose(0, 2, 3, 1)) plt.imshow(grid.astype('uint8')) plt.axis('off') plt.gcf().set_size_inches(5, 5) plt.show() return
def train_three_layer_network(data): print() print("*****************start train three layer network*****************") model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001, filter_size=3, num_filters=4) solver = Solver(model, data, num_epochs=5, batch_size=50, update_rule="adam", optim_config={ "learning_rate": 1e-3, }, verbose=True, print_every=20) solver.train() plt.subplot(2, 1, 1) plt.title("Training loss") plt.plot(solver.loss_history, "o") plt.xlabel("Iteration") plt.subplot(2, 1, 2) plt.title("Accuracy") plt.plot(solver.train_acc_history, "-o", label="train") plt.plot(solver.val_acc_history, "-o", label="val") plt.plot([0.5] * len(solver.val_acc_history), "k--") plt.xlabel("Epoch") plt.legend(loc="lower right") plt.gcf().set_size_inches(15, 12) plt.show() y_test_pred = np.argmax(model.loss(data["X_test"]), axis=1) y_val_pred = np.argmax(model.loss(data["X_val"]), axis=1) print("Validation set accuracy: ", (y_val_pred == data["y_val"]).mean()) print("Test set accuracy: ", (y_test_pred == data["y_test"]).mean())
def compare_rmsprop_adam(self): num_train = 4000 data = self.data small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3} for update_rule in ['adam', 'rmsprop']: print 'running with ', update_rule model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2) solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={ 'learning_rate': learning_rates[update_rule] }, verbose=True) solvers[update_rule] = solver solver.train() print plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') for update_rule, solver in solvers.iteritems(): plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label=update_rule) plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label=update_rule) plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label=update_rule) for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15) plt.show() return
def check_best_model(self): data = self.data # num_train = 400 num_train = data['X_train'].shape[0] small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } dropout=0.1 model = FullyConnectedNet([100, 100, 100], weight_scale=5e-2, use_batchnorm=True, dropout=dropout) update_rule = 'adam' learning_rate = 1e-3 solver = Solver(model, small_data, num_epochs=5, batch_size=100, update_rule=update_rule, optim_config={ 'learning_rate': learning_rate }, verbose=True) solver.train() test_acc = solver.check_accuracy(self.X_test, self.y_test) print "test accuracy :{}".format(test_acc) #visualiztion plt.subplot(2, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.plot(solver.loss_history, 'o', label='traing loss') plt.subplot(2, 1, 2) plt.title('Training/validation accuracy') plt.xlabel('Epoch') plt.plot(solver.train_acc_history, '-o', label='train accuracy') plt.plot(solver.val_acc_history, '-o', label='validation accuracy') for i in [1, 2]: plt.subplot(2, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15) # plt.show() return
def test_solver(self): # X_train, y_train, X_val, y_val,_,_ = self.get_CIFAR10_data() # data = { # 'X_train': X_train, # 'y_train': y_train, # 'X_val': X_val, # 'y_val': y_val} data = self.data input_dim=3*32*32 hidden_dim=100 num_classes=10 weight_scale=1e-3 reg=0.0 model = TwoLayerNet(input_dim=input_dim, hidden_dim=hidden_dim, num_classes=num_classes, weight_scale=weight_scale, reg=reg) solver = Solver(model, data, update_rule='sgd', optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) solver.train() # Run this cell to visualize training loss and train / val accuracy plt.subplot(2, 1, 1) plt.title('Training loss') plt.plot(solver.loss_history, 'o') plt.xlabel('Iteration') plt.subplot(2, 1, 2) plt.title('Accuracy') plt.plot(solver.train_acc_history, '-o', label='train') plt.plot(solver.val_acc_history, '-o', label='val') plt.plot([0.5] * len(solver.val_acc_history), 'k--') plt.xlabel('Epoch') plt.legend(loc='lower right') plt.gcf().set_size_inches(15, 12) plt.show() return
def overfit_small_data(self): num_train = 100 data = self.data small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } model = ThreeLayerConvNet(weight_scale=1e-2) solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=1) solver.train() return
data = { 'X_train': X[8000:35117, :], #load labels 'y_train': y[8000:35117], 'X_val': X[3000:8000, :], 'y_val': y[3000:8000] } num_inputs = 35126 input_dim = (3, 256, 256) reg = 0.1 num_classes = 5 model = ThreeLayerConvNet(num_filters=5, filter_size=5, input_dim=input_dim, hidden_dim=7, num_classes=5, dtype=np.float64, reg=reg) solver = Solver(model, data, num_epochs=1, batch_size=5000, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train()
def weight_initialization_batch_norm(self): # Try training a very deep net with batchnorm data = self.data hidden_dims = [50, 50, 50, 50, 50, 50, 50] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } bn_solvers = {} solvers = {} weight_scales = np.logspace(-4, 0, num=20) for i, weight_scale in enumerate(weight_scales): print 'Running weight scale %d / %d' % (i + 1, len(weight_scales)) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=True) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=False, print_every=200) bn_solver.train() bn_solvers[weight_scale] = bn_solver solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=False, print_every=200) solver.train() solvers[weight_scale] = solver # Plot results of weight scale experiment best_train_accs, bn_best_train_accs = [], [] best_val_accs, bn_best_val_accs = [], [] final_train_loss, bn_final_train_loss = [], [] for ws in weight_scales: best_train_accs.append(max(solvers[ws].train_acc_history)) bn_best_train_accs.append(max(bn_solvers[ws].train_acc_history)) best_val_accs.append(max(solvers[ws].val_acc_history)) bn_best_val_accs.append(max(bn_solvers[ws].val_acc_history)) final_train_loss.append(np.mean(solvers[ws].loss_history[-100:])) bn_final_train_loss.append( np.mean(bn_solvers[ws].loss_history[-100:])) plt.subplot(3, 1, 1) plt.title('Best val accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best val accuracy') plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) plt.title('Best train accuracy vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Best training accuracy') plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline') plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm') plt.legend() plt.subplot(3, 1, 3) plt.title('Final training loss vs weight initialization scale') plt.xlabel('Weight initialization scale') plt.ylabel('Final training loss') plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline') plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm') plt.legend() plt.gcf().set_size_inches(10, 15) plt.show() return
def batch_norm_with_deep(self): # Try training a very deep net with batchnorm data = self.data hidden_dims = [100, 100, 100, 100, 100] num_train = 1000 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } weight_scale = 2e-2 bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=True) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) bn_solver.train() solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=200) solver.train() plt.subplot(3, 1, 1) plt.title('Training loss') plt.xlabel('Iteration') plt.subplot(3, 1, 2) plt.title('Training accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 3) plt.title('Validation accuracy') plt.xlabel('Epoch') plt.subplot(3, 1, 1) plt.plot(solver.loss_history, 'o', label='baseline') plt.plot(bn_solver.loss_history, 'o', label='batchnorm') plt.subplot(3, 1, 2) plt.plot(solver.train_acc_history, '-o', label='baseline') plt.plot(bn_solver.train_acc_history, '-o', label='batchnorm') plt.subplot(3, 1, 3) plt.plot(solver.val_acc_history, '-o', label='baseline') plt.plot(bn_solver.val_acc_history, '-o', label='batchnorm') for i in [1, 2, 3]: plt.subplot(3, 1, i) plt.legend(loc='upper center', ncol=4) plt.gcf().set_size_inches(15, 15) plt.show() return
from assignment2.cs231n.solver import Solver import h5py h5f = h5py.File('img_data.h5','r') X = h5f['dataset_1'][:] h5f.close() print data.shape #load data #data=?? data={ 'X_train':X[8000:35117,:], #load labels 'y_train':y[8000:35117], 'X_val':X[3000:8000,:], 'y_val':y[3000:8000] } num_inputs = 35126 input_dim = (3, 256, 256) reg = 0.1 num_classes = 5 model = ThreeLayerConvNet(num_filters=5, filter_size=5,input_dim=input_dim, hidden_dim=7,num_classes=5,dtype=np.float64,reg=reg) solver = Solver(model, data, num_epochs=1, batch_size=5000, update_rule='adam', optim_config={ 'learning_rate': 1e-3, }, verbose=True, print_every=20) solver.train()
def visualize_batch_normalization(data): print() print( "*****************start visualizing batch normalization*****************" ) np.random.seed(231) # Try training a very deep net with batchnorm hidden_dims = [50, 50, 50, 50, 50, 50, 50] num_train = 1000 small_data = { "X_train": data["X_train"][:num_train], "y_train": data["y_train"][:num_train], "X_val": data["X_val"], "y_val": data["y_val"], } bn_solvers = {} solvers = {} weight_scales = np.logspace(-4, 0, num=20) for i, weight_scale in enumerate(weight_scales): print("Running weight scale %d / %d" % (i + 1, len(weight_scales))) bn_model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=True) model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False) bn_solver = Solver(bn_model, small_data, num_epochs=10, batch_size=50, update_rule="adam", optim_config={"learning_rate": 1e-3}, verbose=False, print_every=200) bn_solver.train() bn_solvers[weight_scale] = bn_solver solver = Solver(model, small_data, num_epochs=10, batch_size=50, update_rule="adam", optim_config={"learning_rate": 1e-3}, verbose=False, print_every=200) solver.train() solvers[weight_scale] = solver # Plot results of weight scale experiment best_train_accs, bn_best_train_accs = [], [] best_val_accs, bn_best_val_accs = [], [] final_train_loss, bn_final_train_loss = [], [] for ws in weight_scales: best_train_accs.append(max(solvers[ws].train_acc_history)) bn_best_train_accs.append(max(bn_solvers[ws].train_acc_history)) best_val_accs.append(max(solvers[ws].val_acc_history)) bn_best_val_accs.append(max(bn_solvers[ws].val_acc_history)) final_train_loss.append(np.mean(solvers[ws].loss_history[-100:])) bn_final_train_loss.append(np.mean(bn_solvers[ws].loss_history[-100:])) plt.subplot(3, 1, 1) plt.title("Best val accuracy vs weight initialization scale") plt.xlabel("Weight initialization scale") plt.ylabel("Best val accuracy") plt.semilogx(weight_scales, best_val_accs, "-o", label="baseline") plt.semilogx(weight_scales, bn_best_val_accs, "-o", label="batchnorm") plt.legend(ncol=2, loc="lower right") plt.subplot(3, 1, 2) plt.title("Best train accuracy vs weight initialization scale") plt.xlabel("Weight initialization scale") plt.ylabel("Best training accuracy") plt.semilogx(weight_scales, best_train_accs, "-o", label="baseline") plt.semilogx(weight_scales, bn_best_train_accs, "-o", label="batchnorm") plt.legend() plt.subplot(3, 1, 3) plt.title("Final training loss vs weight initialization scale") plt.xlabel("Weight initialization scale") plt.ylabel("Final training loss") plt.semilogx(weight_scales, final_train_loss, "-o", label="baseline") plt.semilogx(weight_scales, bn_final_train_loss, "-o", label="batchnorm") plt.legend() plt.gca().set_ylim(1.0, 3.5) plt.gcf().set_size_inches(10, 15) plt.show()
def experiment_regularization(self): # Train two identical nets, one with dropout and one without data = self.data num_train = 500 small_data = { 'X_train': data['X_train'][:num_train], 'y_train': data['y_train'][:num_train], 'X_val': data['X_val'], 'y_val': data['y_val'], } solvers = {} dropout_choices = [0, 0.75] for dropout in dropout_choices: model = FullyConnectedNet([500], dropout=dropout) print dropout solver = Solver(model, small_data, num_epochs=25, batch_size=100, update_rule='adam', optim_config={ 'learning_rate': 5e-4, }, verbose=True, print_every=100) solver.train() solvers[dropout] = solver # Plot train and validation accuracies of the two models train_accs = [] val_accs = [] for dropout in dropout_choices: solver = solvers[dropout] train_accs.append(solver.train_acc_history[-1]) val_accs.append(solver.val_acc_history[-1]) plt.subplot(3, 1, 1) for dropout in dropout_choices: plt.plot(solvers[dropout].train_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Train accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.subplot(3, 1, 2) for dropout in dropout_choices: plt.plot(solvers[dropout].val_acc_history, 'o', label='%.2f dropout' % dropout) plt.title('Val accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend(ncol=2, loc='lower right') plt.gcf().set_size_inches(15, 15) plt.show() return