def __init__(self, **kwargs): """Initialization. Args: debug (kwargs): boolean indicating debug mode """ self.debug = Debug("debug" in kwargs and kwargs["debug"]) self.graph = []
def main(): # set random seed np.random.seed(13141) # debug mode debug_mode = False dbg = Debug(debug_mode) # parse arguments parser = argparse.ArgumentParser(description='Train and test neural network on cifar dataset.') parser.add_argument('experiment_name', help='used for outputting log files') parser.add_argument('--num_hidden_units', type=int, help='number of hidden units') parser.add_argument('--learning_rate', type=float, help='learning rate for solver') parser.add_argument('--momentum_mu', type=float, help='mu for momentum solver') parser.add_argument('--mini_batch_size', type=int, help='mini batch size') parser.add_argument('--num_epoch', type=int, help='number of epochs') args = parser.parse_args() # experiment name experiment_name = args.experiment_name iter_log_file = "logs/{0}_iter_log.txt".format(experiment_name) epoch_log_file = "logs/{0}_epoch_log.txt".format(experiment_name) # load data print("Loading dataset...") timer.begin("dataset") DATASET_PATH = 'cifar-2class-py2/cifar_2class_py2.p' data = CifarDataset() data.load(DATASET_PATH) print("Loaded dataset in {0:2f}s.".format(timer.getElapsed("dataset"))) # get data stats num_training = data.get_num_train() num_test = data.get_num_test() input_dim = data.get_data_dim() # hyperparameters num_hidden_units = 50 if args.num_hidden_units is None else args.num_hidden_units learning_rate = 0.01 if args.learning_rate is None else args.learning_rate momentum_mu = 0.6 if args.momentum_mu is None else args.momentum_mu mini_batch_size = 64 if args.mini_batch_size is None else args.mini_batch_size num_epoch = (500 if not debug_mode else 1) if args.num_epoch is None else args.num_epoch # print hyperparameters print("num_hidden_units: {0}".format(num_hidden_units)) print("learning_rate: {0}".format(learning_rate)) print("momentum_mu: {0}".format(momentum_mu)) print("mini_batch_size: {0}".format(mini_batch_size)) print("num_epoch: {0}".format(num_epoch)) # network net = Sequential(debug=debug_mode) net.add( LinearLayer(input_dim, num_hidden_units) ) net.add( ReluLayer() ) net.add( LinearLayer(num_hidden_units, 2) ) net.add( SoftMaxLayer() ) print("{0}\n".format(net)) # loss loss = CrossEntropyLoss() # error metrics training_objective = Objective(loss) test_objective = Objective(loss) errorRate = ErrorRate() print("Loss function: {0}\n".format(loss)) # solver solver = MomentumSolver(lr=learning_rate, mu=momentum_mu) # training loop monitor = Monitor() monitor.createSession(iter_log_file, epoch_log_file) cum_iter = 0 for epoch in range(num_epoch): print("Training epoch {0}...".format(epoch)) timer.begin("epoch") # training for iter, batch in enumerate(data.get_train_batches(mini_batch_size)): if iter > 1 and debug_mode: break timer.begin("iter") # get batch (x, target) = batch batch_size = x.shape[2] # forward z = net.forward(x) dbg.disp("\toutput: {0}".format(z)) dbg.disp("\toutput shape: {0}".format(z.shape)) # loss if debug_mode: l = loss.forward(z, target) dbg.disp("\tloss: {0}".format(l)) dbg.disp("\tloss shape: {0}".format(l.shape)) # backward loss gradients = loss.backward(z, target) dbg.disp("\tgradients: {0}".format(gradients)) dbg.disp("\tgradients shape: {0}".format(gradients.shape)) # backward grad_x = net.backward(x, gradients) dbg.disp("\tgrad_x: {0}".format(grad_x)) dbg.disp("\tgrad_x: {0}".format(grad_x.shape)) # update parameters net.updateParams(solver) # metrics and timing loss_avg = training_objective.compute(z, target) elapsed = timer.getElapsed("iter") # logging print("\t[iter {0}]\tloss: {1}\telapsed: {2}".format(iter, loss_avg, elapsed)) monitor.recordIteration(cum_iter, loss_avg, elapsed) cum_iter += 1 # evaluation on test set target = data.get_test_labels() x = data.get_test_data() output = net.forward(x) loss_avg_test = test_objective.compute(output, target) error_rate_test = errorRate.compute(output, target) # evaluation on training set target = data.get_train_labels() x = data.get_train_data() output = net.forward(x) loss_avg_train = training_objective.compute(output, target) error_rate_train = errorRate.compute(output, target) # timing elapsed = timer.getElapsed("epoch") # logging print("End of epoch:\ttest objective: {0}\ttrain objective: {1}".format(loss_avg_test, loss_avg_train)) print("\t\ttest error rate: {0}\ttrain error rate: {1}".format(error_rate_test, error_rate_train)) print("Finished epoch {1} in {0:2f}s.\n".format(elapsed, epoch)) monitor.recordEpoch(epoch, loss_avg_train, loss_avg_test, error_rate_train, error_rate_test, elapsed) monitor.finishSession()
def __init__(self, **kwargs): self.debug = Debug("debug" in kwargs and kwargs["debug"]) self.graph = []
class Sequential: def __init__(self, **kwargs): self.debug = Debug("debug" in kwargs and kwargs["debug"]) self.graph = [] def size(self): return len(self.graph) def get(self, index): return self.graph[index] def add(self, layer): self.graph.append(layer) self.debug.disp("Added layer: [{0}] {1}".format( len(self.graph) - 1, layer)) def remove(self, index=None): if index is None: self.graph.pop() else: self.graph.pop(index) self.debug.disp("Removed layer from end: {0}".format(layer)) def insert(self, layer, index): self.graph.insert(layer, index) self.debug.disp("Inserted layer: [{0}] {1}".format(index, layer)) def forward(self, x): self.debug.disp("[forward] Running forward pass...\n") self.debug.disp("[forward] Initial Input={0}\n".format(x)) self.debug.disp("[forward] Initial Input Shape={0}\n".format(x.shape)) z = x for index, layer in enumerate(self.graph): self.debug.disp("[forward] [{0}] {1}".format(index, layer)) self.debug.disp("[forward] Input=\n\t\t{0}".format(z)) self.debug.disp("[forward] Input Shape=\n\t\t{0}".format(z.shape)) z = layer.forward(z) self.debug.disp("[forward] Output=\n\t\t{0}\n".format(z)) self.debug.disp("[forward] Output Shape=\n\t\t{0}\n".format( z.shape)) self.debug.disp("[forward] Final Output={0}\n".format(z)) self.debug.disp("[forward] Final Output Shape={0}\n".format(z.shape)) self.debug.disp("[forward] Done with forward pass.") return z def backward(self, x, grad): self.debug.disp("[backward] Running backward pass...\n") self.debug.disp("[backward] Initial Input x={0}\n".format(x)) self.debug.disp("[backward] Initial Input x shape={0}\n".format( x.shape)) self.debug.disp("[backward] Initial Input grad={0}\n".format(grad)) self.debug.disp("[backward] Initial Input grad shape={0}\n".format( grad.shape)) g = grad for index, layer in enumerate(reversed(self.graph)): self.debug.disp("[backward] [{0}] {1}".format(index, layer)) self.debug.disp("[backward] Input=\n\t\t{0}".format(g)) self.debug.disp("[backward] Input Shape=\n\t\t{0}".format(g.shape)) g = layer.backward(g) self.debug.disp("[backward] Output=\n\t\t{0}\n".format(g)) self.debug.disp("[backward] Output Shape=\n\t\t{0}\n".format( g.shape)) self.debug.disp("[backward] Final Output={0}\n".format(g)) self.debug.disp("[backward] Final Output Shape={0}\n".format(g.shape)) self.debug.disp("[backward] Done with backward pass.") return g def updateParams(self, solver): self.debug.disp("[updateParams] Updating network params...\n") for index, layer in enumerate(reversed(self.graph)): self.debug.disp("[updateParams] [{0}] {1}".format(index, layer)) layer.updateParams(solver) self.debug.disp("\n[updateParams] Done updating params.") def __str__(self): string = "Sequential Network: " for index, layer in enumerate(self.graph): string += "\n\t[{0}] {1}".format(index, layer) return string
def MLP(): np.random.seed(13141) debug_mode = False dbg = Debug(debug_mode) parser = argparse.ArgumentParser( description='Train and test neural network on cifar dataset.') parser.add_argument('experiment_name', help='used for outputting log files') parser.add_argument('--num_hidden_units', type=int, help='number of hidden units') parser.add_argument('--learning_rate', type=float, help='learning rate for solver') parser.add_argument('--momentum_mu', type=float, help='mu for momentum solver') parser.add_argument('--mini_batch_size', type=int, help='mini batch size') parser.add_argument('--num_epoch', type=int, help='number of epochs') args = parser.parse_args() experiment_name = args.experiment_name iter_log_file = "logs/{0}_iter_log.txt".format(experiment_name) epoch_log_file = "logs/{0}_epoch_log.txt".format(experiment_name) print timer.begin("dataset") DATASET_PATH = 'cifar-2class-py2/cifar_2class_py2.p' data = CifarDataset() data.load(DATASET_PATH) num_training = data.get_num_train() num_test = data.get_num_test() input_dim = data.get_data_dim() num_hidden_units = 50 if args.num_hidden_units is None else args.num_hidden_units learning_rate = 0.01 if args.learning_rate is None else args.learning_rate momentum_mu = 0.6 if args.momentum_mu is None else args.momentum_mu mini_batch_size = 64 if args.mini_batch_size is None else args.mini_batch_size num_epoch = (500 if not debug_mode else 1) if args.num_epoch is None else args.num_epoch print("num_hidden_units: {0}".format(num_hidden_units)) print("learning_rate: {0}".format(learning_rate)) print("momentum_mu: {0}".format(momentum_mu)) print("mini_batch_size: {0}".format(mini_batch_size)) print("num_epoch: {0}".format(num_epoch)) net = Sequential(debug=debug_mode) net.add(LinearLayer(input_dim, num_hidden_units)) net.add(ReluLayer()) net.add(LinearLayer(num_hidden_units, 2)) net.add(SoftMaxLayer()) print("{0}\n".format(net)) loss = CrossEntropyLoss() training_objective = Objective(loss) test_objective = Objective(loss) errorRate = ErrorRate() print("Loss function: {0}\n".format(loss)) solver = MomentumSolver(lr=learning_rate, mu=momentum_mu) monitor = Monitor() monitor.createSession(iter_log_file, epoch_log_file) cum_iter = 0 for epoch in range(num_epoch): print("Training epoch {0}...".format(epoch)) timer.begin("epoch") for iter, batch in enumerate(data.get_train_batches( mini_batch_size)): #BATCHES ARE FORMED HERE if iter > 1 and debug_mode: break timer.begin("iter") (x, target) = batch batch_size = x.shape[2] z = net.forward(x) dbg.disp("\toutput: {0}".format(z)) dbg.disp("\toutput shape: {0}".format(z.shape)) if debug_mode: l = loss.forward(z, target) dbg.disp("\tloss: {0}".format(l)) dbg.disp("\tloss shape: {0}".format(l.shape)) gradients = loss.backward(z, target) dbg.disp("\tgradients: {0}".format(gradients)) dbg.disp("\tgradients shape: {0}".format(gradients.shape)) grad_x = net.backward(x, gradients) dbg.disp("\tgrad_x: {0}".format(grad_x)) dbg.disp("\tgrad_x: {0}".format(grad_x.shape)) net.updateParams(solver) loss_avg = training_objective.compute(z, target) elapsed = timer.getElapsed("iter") print("\t[iter {0}]\tloss: {1}\telapsed: {2}".format( iter, loss_avg, elapsed)) monitor.recordIteration(cum_iter, loss_avg, elapsed) cum_iter += 1 target = data.get_test_labels() x = data.get_test_data() output = net.forward(x) #forward_layer loss_avg_test = test_objective.compute(output, target) error_rate_test = errorRate.compute(output, target) #100 % - ACCURACY target = data.get_train_labels() x = data.get_train_data() output = net.forward(x) #forward_layer loss_avg_train = training_objective.compute(output, target) error_rate_train = errorRate.compute(output, target) #100 % - ACCURACY elapsed = timer.getElapsed("epoch") print( "End of epoch:\ttest objective: {0}\ttrain objective: {1}".format( loss_avg_test, loss_avg_train)) print("\t\ttest error rate: {0}\ttrain error rate: {1}".format( error_rate_test, error_rate_train)) print("Finished epoch {1} in {0:2f}s.\n".format(elapsed, epoch)) monitor.recordEpoch(epoch, loss_avg_train, loss_avg_test, error_rate_train, error_rate_test, elapsed) monitor.finishSession()
class Sequential: """Sequential is a type of network container that organizes modules in a sequential network. Assumes working with consistent data format for all inputs and outputs: mxnxb numpy array b refers to batch m,n are arbitrary """ def __init__(self, **kwargs): """Initialization. Args: debug (kwargs): boolean indicating debug mode """ self.debug = Debug("debug" in kwargs and kwargs["debug"]) self.graph = [] def size(self): """Returns the number of layers in the network. Returns: number of layers """ return len(self.graph) def get(self, index): """Get the layer at position index. Args: index: index to query Returns: layer object """ return self.graph[index] def add(self, layer): """Add a layer to the end of the network. Args: layer: layer object """ self.graph.append(layer) self.debug.disp("Added layer: [{0}] {1}".format(len(self.graph)-1, layer)) def remove(self, index=None): """Remove a layer at the end of the network or at the specified index. Args: index: index to remove layer, otherwise remove last layer """ if index is None: self.graph.pop() else: self.graph.pop(index) self.debug.disp("Removed layer from end: {0}".format(layer)) def insert(self, layer, index): """Insert a layer at the specified index. Args: layer: layer object index: index to insert """ self.graph.insert(layer, index) self.debug.disp("Inserted layer: [{0}] {1}".format(index, layer)) def forward(self, x): """Performs a forward pass. Calls forward() on all layers from in a forward sequence. This is basically inference. Args: x: input data Returns: result of forward pass (last layer) """ self.debug.disp("[forward] Running forward pass...\n") self.debug.disp("[forward] Initial Input={0}\n".format(x)) self.debug.disp("[forward] Initial Input Shape={0}\n".format(x.shape)) z = x for index, layer in enumerate(self.graph): self.debug.disp("[forward] [{0}] {1}".format(index, layer)) self.debug.disp("[forward] Input=\n\t\t{0}".format(z)) self.debug.disp("[forward] Input Shape=\n\t\t{0}".format(z.shape)) z = layer.forward(z) self.debug.disp("[forward] Output=\n\t\t{0}\n".format(z)) self.debug.disp("[forward] Output Shape=\n\t\t{0}\n".format(z.shape)) self.debug.disp("[forward] Final Output={0}\n".format(z)) self.debug.disp("[forward] Final Output Shape={0}\n".format(z.shape)) self.debug.disp("[forward] Done with forward pass.") return z def backward(self, x, grad): """Performs a backward pass. Calls backward() on all layers in a backward sequence. Does not update weights! This is basically computing gradients in advance for backpropagation. Also, forward() MUST BE CALLED BEFORE backward()! Args: x: input dataset grad: output gradient (computed from loss function) Returns: gradient w.r.t. to input """ self.debug.disp("[backward] Running backward pass...\n") self.debug.disp("[backward] Initial Input x={0}\n".format(x)) self.debug.disp("[backward] Initial Input x shape={0}\n".format(x.shape)) self.debug.disp("[backward] Initial Input grad={0}\n".format(grad)) self.debug.disp("[backward] Initial Input grad shape={0}\n".format(grad.shape)) g = grad for index, layer in enumerate(reversed(self.graph)): self.debug.disp("[backward] [{0}] {1}".format(index, layer)) self.debug.disp("[backward] Input=\n\t\t{0}".format(g)) self.debug.disp("[backward] Input Shape=\n\t\t{0}".format(g.shape)) g = layer.backward(g) self.debug.disp("[backward] Output=\n\t\t{0}\n".format(g)) self.debug.disp("[backward] Output Shape=\n\t\t{0}\n".format(g.shape)) self.debug.disp("[backward] Final Output={0}\n".format(g)) self.debug.disp("[backward] Final Output Shape={0}\n".format(g.shape)) self.debug.disp("[backward] Done with backward pass.") return g def updateParams(self, solver): """Update the parameters of the network. Backpropagation is basically backward() followed by updateParams(). backward() MUST BE CALLED BEFORE updateParams()! Args: solver: solver object for updating weights """ self.debug.disp("[updateParams] Updating network params...\n") for index, layer in enumerate(reversed(self.graph)): self.debug.disp("[updateParams] [{0}] {1}".format(index, layer)) layer.updateParams(solver) self.debug.disp("\n[updateParams] Done updating params.") def __str__(self): string = "Sequential Network: " for index, layer in enumerate(self.graph): string += "\n\t[{0}] {1}".format(index, layer) return string