def runModel_i(input_vector): hls, lr, ne, rs = input_vector model = init_two_layer_model( 32 * 32 * 3, int(hls), 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model = {} loss_history = [] train_acc_history = [] val_acc_history = [] best_model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=int(ne), reg=rs, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) print("Inside model I") print(val_acc_history) return best_model, loss_history, train_acc_history, val_acc_history
hidden_size = [600, 800, 1000, 1200, 1400, 2000] training_epochs = [45] regs = [1e-2, 1e-3, 5e-3, 1e-4, 5e-4, 2] learning_rate = [1e-3, 1e-4, 5e-5, 1e-6, 5e-6] best = 0 bmodel = None best_para = {} for hs in hidden_size: for ep in training_epochs: for reg in regs: for lr in learning_rate: print "test on param hs :", hs, " ep: ", ep, " reg: ", reg, " lr:", lr model = init_two_layer_model( 32 * 32 * 3, hs, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=ep, reg=reg, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) plt.subplot(2, 1, 1)
print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) ############################################################################### # # Train the network # To train the network we will use SGD with Momentum. #Open the file `classifier_trainer.py` and familiarize yourself with the `ClassifierTrainer` #class. It performs optimization given an arbitrary cost function data, and model. #By default it uses vanilla SGD, which you need to implement. #First, run the optimization below using Vanilla SGD: from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100,