示例#1
0
    def test_weight_init(self):
        print("======== TestFCNet.test_weight_init:")

        dataset = load_data(self.data_dir, self.verbose)
        num_train = 1500
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        input_dim = 32 * 32 * 3
        hidden_dims = [100, 100, 100, 100, 100, 100, 100, 100, 100]
        weight_scale = 2e-2
        reg = 2e-2
        learning_rate = 1e-3
        batch_size = 50
        update_rule = 'adam'
        weight_init = ['gauss', 'gauss_sqrt2', 'xavier']

        model_dict = {}
        for w in weight_init:
            model = fcnet.FCNet(input_dim=input_dim,
                                hidden_dims=hidden_dims,
                                weight_scale=weight_scale,
                                reg=reg,
                                weight_init=w)
            model_dict[w] = model
        solver_dict = {}

        for k, m in model_dict.items():
            if self.verbose:
                print(m)

            solv = solver.Solver(
                m,
                small_data,
                print_every=self.print_every,
                num_epochs=self.num_epochs,
                batch_size=batch_size,  # previously 25
                update_rule=update_rule,
                optim_config={'learning_rate': learning_rate})
            solv.train()
            #skey = '%s-%s' % (m.__repr__(), k)
            skey = '%s' % k
            solver_dict[skey] = solv

        if self.draw_plots:
            fig, ax = vis_solver.get_train_fig()
            vis_solver.plot_solver_compare(ax, solver_dict)
            #vis_solver.plot_solver(ax, solv)
            plt.show()

        print("======== TestFCNet.test_weight_init: <END> ")
示例#2
0
def train_xavier(verbose=True, draw_plots=False):

    data_dir = 'datasets/cifar-10-batches-py'
    dataset = load_data(data_dir)

    # Hyperparams
    input_dim = (3, 32, 32)
    hidden_dims = [256, 256]
    num_filters = [16, 32, 64]
    reg = 2e-2
    weight_scale = 1e-3
    learning_rate = 1e-3
    num_epochs = 600
    batch_size = 50
    update_rule = 'adam'

    weight_init = ['gauss', 'gauss_sqrt', 'xavier']
    model_dict = {}

    for w in weight_init:
        model = convnet.ConvNetLayer(input_dim=input_dim,
                                     hidden_dims=hidden_dims,
                                     num_filters=num_filters,
                                     weight_scale=weight_scale,
                                     weight_init=w,
                                     reg=reg,
                                     verbose=True)
        model_dict[w] = model

    solver_dict = {}

    for k, m in model_dict.items():
        if verbose:
            print(m)
        solv = solver.Solver(m,
                             dataset,
                             print_every=10,
                             num_epochs=num_epochs,
                             batch_size=batch_size,
                             update_rule=update_rule,
                             optim_config={'learning_rate': learning_rate})
        solv.train()
        fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs))
        solv.save(fname)
        skey = '%s-%s' % (m.__repr__(), k)
        solver_dict[skey] = solv

    # Plot results
    if draw_plots is True:
        fig, ax = vis_solver.get_train_fig()
        vis_solver.plot_solver_compare(ax, solver_dict)
        plt.show()
示例#3
0
def ex_vis_solver_compare(ax, path, fname, epoch_num, prefix=None):
    """
    EX_VIS_SOLVER_COMPARE
    Visualize a series of solutions superimposed on a single plot.
    Each solution checpoint is read in turn an plotted on a single
    graph. The legend is created using the __repr__() result for each
    solver object.

    Inputs
        ax:
            A matplotlib axes onto which to draw the visualization
        path:
            Directory containing solver files. This may be a list of multiple
            directories, in which case the method iterates over each of them in turn.
        fname:
            The name of a given solver file, without the '_epoch_%d.pkl' suffix
        epoch_num:
            Which epoch to load.
        prefix:
            A prefix that is prepended to the filename. This allows, for example, a
            group of subfolders to be traversed that all have the same root.
            Default = None
    """

    # Helper function for loading solver objects
    def load_solver(fname):
        solv = solver.Solver(None, None)
        solv.load_checkpoint(fname)
        return solv

    # Check input arguments
    if type(path) is not list:
        path = [path]

    if type(fname) is not list:
        fname = [fname]

    # Iterate over all files and generate animations
    solver_dict = {}
    for p in path:
        for f in fname:
            epoch_str = '_epoch_%d.pkl' % epoch_num
            if prefix is not None:
                cname = str(prefix) + '/' + str(p) + '/' + str(f) + str(
                    epoch_str)
            else:
                cname = str(p) + '/' + str(f) + str(epoch_str)
            solv = solver.Solver(None, None)
            solv.load_checkpoint(cname)
            solver_dict[f] = solv
            #vis_solver.plot_model_first_layer(ax, solv.model, cname)
    vis_solver.plot_solver_compare(ax, solver_dict)
示例#4
0
def overfit():
    # Data
    dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py')
    # Hyperparameters
    # for now we just some random params, not found by search
    reg = 1e-2
    weight_scale = 2e-3
    learning_rate = 1e-3
    # Training parameters
    num_epochs = 40
    #train_sizes = [50, 100, 150, 200]
    train_sizes = [200, 400, 800, 1000, 1500]

    solv_dict = {}
    for size in train_sizes:
        overfit_data = {
            'X_train': dataset['X_train'][:size],
            'y_train': dataset['y_train'][:size],
            'X_val':   dataset['X_val'][:size],
            'y_val':   dataset['y_val'][:size]
        }
        model = convnet.ConvNetLayer(hidden_dims=[256],
                                     num_filters=[16],
                                     filter_size=5,
                                     reg=reg,
                                     weight_scale=weight_scale)
        solv = solver.Solver(model,
                             overfit_data,
                             num_epochs=num_epochs,
                             optim_config={'learning_rate': learning_rate})
        print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs))
        print(model)
        solv.train()
        dkey = 'size_%d' % size
        solv_dict[dkey] = solv
        # Check that we can actually overfit

    # Plot the results
    fig, ax = vis_solver.get_train_fig()
    vis_solver.plot_solver_compare(ax, solv_dict)
    plt.show()
示例#5
0
    def test_3layer_nets(self):
        print("\n======== TestSolverCompare.test_3layer_nets:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }

        filter_size = 7
        num_filters = 32
        hidden_dims = 100
        weight_scale = 1e-2
        learning_rate = 1e-3
        reg = 0.0
        batch_size = 50
        update_rule = 'adam'

        # TODO : Save this for a Xavier test
        #for i in range(2):
        #    if i == 0:
        #        use_xavier = False
        #    else:
        #        use_xavier = True

        from pymllib.classifiers import convnet

        l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims,
                                           num_filters=num_filters,
                                           filter_size=filter_size,
                                           weight_scale=weight_scale,
                                           reg=reg)
        if self.verbose:
            print("L3 net:")
            print(l3_net)
        fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims],
                                      num_filters=[num_filters],
                                      filter_size=filter_size,
                                      weight_scale=weight_scale,
                                      reg=reg)

        model_dict = {'l3_net': l3_net, 'fc_net': fc_net}
        solver_dict = {}
        for k, m in model_dict.items():
            solv = solver.Solver(m,
                                 small_data,
                                 optim_config={'learning_rate': learning_rate},
                                 num_epochs=self.num_epochs,
                                 batch_size=batch_size,
                                 print_every=self.print_every,
                                 verbose=True)
            solv.train()
            solver_dict[k] = solv

        # Make some plots
        if self.draw_plots:
            fig, ax = vis_solver.get_train_fig()
            vis_solver.plot_solver_compare(ax, solver_dict)
            plt.show()

        print("======== TestSolverCompare.test_3layer_nets : <END> ")