def test_weight_init(self): print("======== TestFCNet.test_weight_init:") dataset = load_data(self.data_dir, self.verbose) num_train = 1500 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } input_dim = 32 * 32 * 3 hidden_dims = [100, 100, 100, 100, 100, 100, 100, 100, 100] weight_scale = 2e-2 reg = 2e-2 learning_rate = 1e-3 batch_size = 50 update_rule = 'adam' weight_init = ['gauss', 'gauss_sqrt2', 'xavier'] model_dict = {} for w in weight_init: model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, reg=reg, weight_init=w) model_dict[w] = model solver_dict = {} for k, m in model_dict.items(): if self.verbose: print(m) solv = solver.Solver( m, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=batch_size, # previously 25 update_rule=update_rule, optim_config={'learning_rate': learning_rate}) solv.train() #skey = '%s-%s' % (m.__repr__(), k) skey = '%s' % k solver_dict[skey] = solv if self.draw_plots: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) #vis_solver.plot_solver(ax, solv) plt.show() print("======== TestFCNet.test_weight_init: <END> ")
def train_xavier(verbose=True, draw_plots=False): data_dir = 'datasets/cifar-10-batches-py' dataset = load_data(data_dir) # Hyperparams input_dim = (3, 32, 32) hidden_dims = [256, 256] num_filters = [16, 32, 64] reg = 2e-2 weight_scale = 1e-3 learning_rate = 1e-3 num_epochs = 600 batch_size = 50 update_rule = 'adam' weight_init = ['gauss', 'gauss_sqrt', 'xavier'] model_dict = {} for w in weight_init: model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dims, num_filters=num_filters, weight_scale=weight_scale, weight_init=w, reg=reg, verbose=True) model_dict[w] = model solver_dict = {} for k, m in model_dict.items(): if verbose: print(m) solv = solver.Solver(m, dataset, print_every=10, num_epochs=num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}) solv.train() fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs)) solv.save(fname) skey = '%s-%s' % (m.__repr__(), k) solver_dict[skey] = solv # Plot results if draw_plots is True: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show()
def overfit(): # Data dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py') # Hyperparameters # for now we just some random params, not found by search reg = 1e-2 weight_scale = 2e-3 learning_rate = 1e-3 # Training parameters num_epochs = 40 #train_sizes = [50, 100, 150, 200] train_sizes = [200, 400, 800, 1000, 1500] solv_dict = {} for size in train_sizes: overfit_data = { 'X_train': dataset['X_train'][:size], 'y_train': dataset['y_train'][:size], 'X_val': dataset['X_val'][:size], 'y_val': dataset['y_val'][:size] } model = convnet.ConvNetLayer(hidden_dims=[256], num_filters=[16], filter_size=5, reg=reg, weight_scale=weight_scale) solv = solver.Solver(model, overfit_data, num_epochs=num_epochs, optim_config={'learning_rate': learning_rate}) print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs)) print(model) solv.train() dkey = 'size_%d' % size solv_dict[dkey] = solv # Check that we can actually overfit # Plot the results fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solv_dict) plt.show()
for p in path: for f in fname: epoch_str = '_epoch_%d.pkl' % epoch_num if prefix is not None: cname = str(prefix) + '/' + str(p) + '/' + str(f) + str( epoch_str) else: cname = str(p) + '/' + str(f) + str(epoch_str) solv = solver.Solver(None, None) solv.load_checkpoint(cname) solver_dict[f] = solv #vis_solver.plot_model_first_layer(ax, solv.model, cname) vis_solver.plot_solver_compare(ax, solver_dict) if __name__ == "__main__": solv_fig, solv_ax = vis_solver.get_train_fig() w_fig, w_ax = vis_solver.get_weight_fig() prefix = "/home/kreshnik/Documents/compucon/machine-learning/models" cpath = ["conv-net-train-2017-11-15-01", "conv-net-train-2017-11-15-02"] cname = [ 'c16-fc256-fc10-net', 'c16-c32-fc256-fc10-net', 'c16-c32-c64-fc256-fc256-fc10-net', 'c16-c32-c64-c128-fc256-fc256-fc10-net' ] ex_vis_solver_compare(solv_ax, cpath, cname, 100, prefix) ex_plot_sequence(w_ax, cpath, cname, (1, 100), prefix=prefix, step=10) plt.show()
def test_3layer_nets(self): print("\n======== TestSolverCompare.test_3layer_nets:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } filter_size = 7 num_filters = 32 hidden_dims = 100 weight_scale = 1e-2 learning_rate = 1e-3 reg = 0.0 batch_size = 50 update_rule = 'adam' # TODO : Save this for a Xavier test #for i in range(2): # if i == 0: # use_xavier = False # else: # use_xavier = True from pymllib.classifiers import convnet l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims, num_filters=num_filters, filter_size=filter_size, weight_scale=weight_scale, reg=reg) if self.verbose: print("L3 net:") print(l3_net) fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims], num_filters=[num_filters], filter_size=filter_size, weight_scale=weight_scale, reg=reg) model_dict = {'l3_net': l3_net, 'fc_net': fc_net} solver_dict = {} for k, m in model_dict.items(): solv = solver.Solver(m, small_data, optim_config={'learning_rate': learning_rate}, num_epochs=self.num_epochs, batch_size=batch_size, print_every=self.print_every, verbose=True) solv.train() solver_dict[k] = solv # Make some plots if self.draw_plots: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show() print("======== TestSolverCompare.test_3layer_nets : <END> ")