def train_simple_model(data=None, n_values=None, num_epochs=5, depth=10, width=256, drop_in=0., drop_hid=0., batch_size=32, learning_rate=0.01, valid_freq=100, save_path='../results/', options_dict=None, reload_model=None, num_targets=3): #TODO: eliminate data from this function. Instead refer to a filename for data. #TODO: Rewrite iterate_minibatch to iterate through a file. #Either use numpy's mmap or csv #TODO: load the first line of said file to get layer_shape train, valid, test = data #X width, so the model knows how wide to make the first layer layer_shape = train[0].shape[1] # Prepare Theano variables for inputs and target input_var = T.matrix('inputs', dtype='float32') #CG: num_targets is 1 or 3. int64 fine because it goes into output target_var = [] for i in range(num_targets): target_var.append(T.vector('target_%s' % i, dtype='int32')) # Create neural network model (depending on first command line parameter) #CG: ignore mlp, maybe remove this whole switch. #CG 1: build network start_time = time.time() fplog("Building model and compiling functions...") network = build_custom_mlp( input_var, depth, width, drop_in, drop_hid, layer_shape, [[n_values['y_1']], [n_values['y_2']], [n_values['y_3']]]) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): # CG 2: Make prediction prediction = [] for n in network: prediction.append(lasagne.layers.get_output(n)) loss = 0 #for p,t in zip(prediction,target_var): # loss += lasagne.objectives.categorical_crossentropy(p, t) loss = lasagne.objectives.categorical_crossentropy( prediction[0], target_var[0]) + lasagne.objectives.categorical_crossentropy( prediction[1], target_var[1]) + lasagne.objectives.categorical_crossentropy( prediction[2], target_var[2]) loss = loss.mean() # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, params = [] for i, n in enumerate(network): if i == 0: p = lasagne.layers.get_all_params(n, trainable=True) else: p = lasagne.layers.get_all_params(n, trainable=True)[-2:] params += p updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = [] test_loss = [] test_acc = [] preds = [] for n, t in zip(network, target_var): p = lasagne.layers.get_output(n, deterministic=True) test_prediction.append(p) l = lasagne.objectives.categorical_crossentropy(p, t) test_loss.append(l.mean()) # As a bonus, also create an expression for the classification accuracy: acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX) test_acc.append(acc) preds.append(theano.function([input_var], p)) val_fn = [] train_fn = theano.function([input_var] + target_var, loss, updates=updates) for t, l, a in zip(target_var, test_loss, test_acc): val_fn.append(theano.function([input_var, t], [l, a])) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. fplog("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets[0], targets[1], targets[2]) train_batches += 1 if train_batches % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs, targets[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) save_to_results_file(var_string, results_path) np.savez(save_path, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, options_dict=options_dict, *params) # And a full pass over the validation data: val_err = np.zeros(num_targets) val_acc = np.zeros(num_targets) val_batches = 0 for batch in iterate_minibatches(valid, batch_size, shuffle=False): inputs, targets = batch #calculate error and accuracy separately for each target for i in range(num_targets): e, a = val_fn[i](inputs, targets[i]) val_err[i] += e val_acc[i] += a val_batches += 1 params = get_all_params(network) if train_batches % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs, targets[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) fplog('saving...') np.savez(save_path, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, options_dict=options_dict, *params) # Then we fplog the results for this epoch: fplog("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) max_train = np.max(train_err / train_batches) min_train = np.min(train_err / train_batches) max_val = np.max(val_err / val_batches) min_val = np.min(val_err / val_batches) avg_val_acc = np.mean(val_acc / val_batches) fplog(" train_batches: %i" % train_batches) fplog(" val_batches: %i" % val_batches) fplog(" max training loss:\t\t{:.6f}".format(max_train)) fplog(" min training loss:\t\t{:.6f}".format(min_train)) fplog(" max validation loss:\t\t{:.6f}".format(max_val)) fplog(" min validation loss:\t\t{:.6f}".format(min_val)) fplog(" avg validation accuracy:\t\t{:.2f} %".format(avg_val_acc * 100)) end_time = time.time() fplog("The code ran for %d epochs, with %f sec/epochs" % ((num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and fplog the test error: test_err = np.zeros(num_targets) test_acc = np.zeros(num_targets) test_batches = 0 test_preds = [] y_test_list = test[1:] #omit X_test. Only want y1,y2,y3 for i in range(num_targets): test_preds.append(np.zeros(len(y_test_list[i]))) for batch in iterate_minibatches(train, batch_size, shuffle=False): inputs, targets = batch for i in range(num_targets): e, a = val_fn[i](inputs, targets[i]) pred_prob = preds[i](inputs) pred = pred_prob.argmax(axis=1) test_preds[i] = np.append(test_preds[i], pred) test_err[i] += e test_acc[i] += a test_batches += 1 test_acc_pct = [] max_err = np.max(test_err / test_batches) min_err = np.min(test_err / test_batches) avg_acc = np.mean(test_acc / test_batches) max_acc = np.max(test_acc / test_batches) min_acc = np.min(test_acc / test_batches) fplog("Final results:") fplog(" max test loss:\t\t\t{:.6f}".format(max_err)) fplog(" min test loss:\t\t\t{:.6f}".format(min_err)) #calculate test accuracy for i in range(len(test_acc)): test_acc_pct.append(test_acc[i] / test_batches) fplog(" test accuracy " + str(i) + ":\t\t{:.2f} %".format(test_acc_pct[i] * 100)) fplog(" mean test accuracy:\t\t{:.2f} %".format(avg_acc * 100)) params = get_all_params(network) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path, train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, test_acc=test_acc_pct, history_train_errs=history_train_errs, history_valid_errs=history_valid_errs, predictions=test_preds, options_dict=options_dict, *params) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return params, test_preds
def train_simple_model(data = None, n_values = None, num_epochs=5, depth = 10, width = 256, drop_in = 0., drop_hid = 0., batch_size = 32, learning_rate = 0.01, valid_freq = 100, save_path = '../results/', options_dict = None, reload_model = None, num_targets = 3): #TODO: eliminate data from this function. Instead refer to a filename for data. #TODO: Rewrite iterate_minibatch to iterate through a file. #Either use numpy's mmap or csv #TODO: load the first line of said file to get layer_shape train, valid, test = data #X width, so the model knows how wide to make the first layer layer_shape = train[0].shape[1] # Prepare Theano variables for inputs and target input_var = T.matrix('inputs',dtype='float32') #CG: num_targets is 1 or 3. int64 fine because it goes into output target_var = [] for i in range(num_targets): target_var.append(T.vector('target_%s' % i,dtype = 'int32')) # Create neural network model (depending on first command line parameter) #CG: ignore mlp, maybe remove this whole switch. #CG 1: build network start_time = time.time() fplog("Building model and compiling functions...") network = build_custom_mlp(input_var, depth, width, drop_in, drop_hid, layer_shape, [[n_values['y_1']],[n_values['y_2']],[n_values['y_3']]]) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): # CG 2: Make prediction prediction = [] for n in network: prediction.append(lasagne.layers.get_output(n)) loss=0 #for p,t in zip(prediction,target_var): # loss += lasagne.objectives.categorical_crossentropy(p, t) loss = lasagne.objectives.categorical_crossentropy(prediction[0],target_var[0]) + lasagne.objectives.categorical_crossentropy(prediction[1],target_var[1]) + lasagne.objectives.categorical_crossentropy(prediction[2],target_var[2]) loss=loss.mean() # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use adadelta, params = [] for i, n in enumerate(network): if i == 0: p = lasagne.layers.get_all_params(n, trainable=True) else: p = lasagne.layers.get_all_params(n, trainable=True)[-2:] params += p updates = lasagne.updates.adadelta( loss, params, learning_rate=0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = [] test_loss = [] test_acc = [] preds = [] for n,t in zip(network,target_var): p = lasagne.layers.get_output(n, deterministic=True) test_prediction.append(p) l = lasagne.objectives.categorical_crossentropy(p,t) test_loss.append(l.mean()) # As a bonus, also create an expression for the classification accuracy: acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX) test_acc.append(acc) preds.append(theano.function([input_var],p)) val_fn = [] train_fn = theano.function([input_var] + target_var, loss, updates=updates) for t,l,a in zip(target_var, test_loss, test_acc): val_fn.append(theano.function([input_var, t], [l, a])) history_train_errs = [] history_valid_errs = [] # Finally, launch the training loop. fplog("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets[0], targets[1], targets[2]) train_batches += 1 if train_batches % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e, a = val_fn[i](inputs,targets[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) save_to_results_file(var_string,results_path) np.savez(save_path, history_train_errs = history_train_errs, history_valid_errs = history_valid_errs, options_dict=options_dict, *params) # And a full pass over the validation data: val_err = np.zeros(num_targets) val_acc = np.zeros(num_targets) val_batches = 0 for batch in iterate_minibatches(valid, batch_size, shuffle=False): inputs, targets = batch #calculate error and accuracy separately for each target for i in range(num_targets): e,a = val_fn[i](inputs, targets[i]) val_err[i] += e val_acc[i] += a val_batches += 1 params = get_all_params(network) if train_batches % valid_freq == 0: err = [] acc = [] for i in range(num_targets): e,a = val_fn[i](inputs, targets[i]) err.append(e) acc.append(a) history_train_errs.append([err, acc]) fplog('saving...') np.savez(save_path, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, options_dict=options_dict, *params) # Then we fplog the results for this epoch: fplog("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) max_train = np.max(train_err / train_batches) min_train = np.min(train_err / train_batches) max_val = np.max(val_err / val_batches) min_val = np.min(val_err / val_batches) avg_val_acc = np.mean(val_acc / val_batches) fplog(" train_batches: %i" %train_batches) fplog(" val_batches: %i" %val_batches) fplog(" max training loss:\t\t{:.6f}".format(max_train)) fplog(" min training loss:\t\t{:.6f}".format(min_train)) fplog(" max validation loss:\t\t{:.6f}".format(max_val)) fplog(" min validation loss:\t\t{:.6f}".format(min_val)) fplog(" avg validation accuracy:\t\t{:.2f} %".format( avg_val_acc * 100)) end_time = time.time() fplog("The code ran for %d epochs, with %f sec/epochs" % ( (num_epochs), (end_time - start_time) / (1. * (num_epochs)))) # After training, we compute and fplog the test error: test_err = np.zeros(num_targets) test_acc = np.zeros(num_targets) test_batches = 0 test_preds = [] y_test_list = test[1:] #omit X_test. Only want y1,y2,y3 for i in range(num_targets): test_preds.append(np.zeros(len(y_test_list[i]))) for batch in iterate_minibatches(train, batch_size, shuffle=False): inputs, targets = batch for i in range(num_targets): e,a = val_fn[i](inputs, targets[i]) pred_prob = preds[i](inputs) pred = pred_prob.argmax(axis = 1) test_preds[i] = np.append(test_preds[i],pred) test_err[i] += e test_acc[i] += a test_batches += 1 test_acc_pct = [] max_err = np.max(test_err / test_batches) min_err = np.min(test_err / test_batches) avg_acc = np.mean(test_acc / test_batches) max_acc = np.max(test_acc / test_batches) min_acc = np.min(test_acc / test_batches) fplog("Final results:") fplog(" max test loss:\t\t\t{:.6f}".format(max_err)) fplog(" min test loss:\t\t\t{:.6f}".format(min_err)) #calculate test accuracy for i in range(len(test_acc)): test_acc_pct.append(test_acc[i]/ test_batches) fplog(" test accuracy "+str(i)+":\t\t{:.2f} %".format( test_acc_pct[i] * 100)) fplog(" mean test accuracy:\t\t{:.2f} %".format( avg_acc * 100)) params = get_all_params(network) # Optionally, you could now dump the network weights to a file like this: np.savez(save_path, train_err=train_err / train_batches, valid_err=val_err / val_batches, test_err=test_err / test_batches, test_acc = test_acc_pct, history_train_errs=history_train_errs, history_valid_errs = history_valid_errs, predictions = test_preds, options_dict=options_dict, *params) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) return params, test_preds
n_values=n_values, num_epochs=5, depth=10, width=256, batch_size=32, learning_rate=0.01, valid_freq=100, save_path='../results/simple_mlp/', options_dict=None, reload_model=None, num_targets=3) if __name__ == '__main__': if ('--help' in sys.argv) or ('-h' in sys.argv): fplog("Trains a neural network on MNIST using Lasagne.") fplog("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0]) fplog() fplog("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),") fplog(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP") fplog(" with DEPTH hidden layers of WIDTH units, DROP_IN") fplog(" input dropout and DROP_HID hidden dropout,") fplog(" 'cnn' for a simple Convolutional Neural Network (CNN).") fplog("EPOCHS: number of training epochs to perform (default: 500)") else: kwargs = {} if len(sys.argv) > 1: kwargs['model'] = sys.argv[1] if len(sys.argv) > 2: kwargs['num_epochs'] = int(sys.argv[2]) main(**kwargs)
params, preds = train_simple_model(data = data, n_values = n_values, num_epochs=5, depth = 10, width = 256, batch_size = 32, learning_rate = 0.01, valid_freq = 100, save_path = '../results/simple_mlp/', options_dict = None, reload_model = None, num_targets = 3) if __name__ == '__main__': if ('--help' in sys.argv) or ('-h' in sys.argv): fplog("Trains a neural network on MNIST using Lasagne.") fplog("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0]) fplog() fplog("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),") fplog(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP") fplog(" with DEPTH hidden layers of WIDTH units, DROP_IN") fplog(" input dropout and DROP_HID hidden dropout,") fplog(" 'cnn' for a simple Convolutional Neural Network (CNN).") fplog("EPOCHS: number of training epochs to perform (default: 500)") else: kwargs = {} if len(sys.argv) > 1: kwargs['model'] = sys.argv[1] if len(sys.argv) > 2: kwargs['num_epochs'] = int(sys.argv[2]) main(**kwargs)