Пример #1
0
def main(main_params):

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d = Xtrain.shape
    N_val, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining MLP ###
    """
    MLP for a 10-class classification problem on MNIST.
    The network structure is input --> linear --> relu --> dropout --> linear --> softmax_cross_entropy loss
    the hidden_layer size (num_L1) is 1000
    the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1000
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 10
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects (modules) from the module classes
    model['L1'] = dnn_misc.linear_layer(input_D=d, output_D=num_L1)
    model['nonlinear1'] = dnn_misc.relu()
    model['drop1'] = dnn_misc.dropout(r=_dropout_rate)
    model['L2'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=True)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)

            ### backward ###
            grad_a2 = model['loss'].backward(a2, y)
            grad_d1 = model['L2'].backward(d1, grad_a2)
            grad_h1 = model['drop1'].backward(h1, grad_d1)
            grad_a1 = model['nonlinear1'].backward(a1, grad_h1)
            grad_x = model['L1'].backward(x, grad_a1)

            ### gradient_update ###
            for module_name, module in model.items():

                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _alpha > 0.0:
                            momentum[module_name + '_' +
                                     key] = _alpha * momentum[
                                         module_name + '_' +
                                         key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' +
                                                           key]

                        else:
                            module.params[key] -= _learning_rate * g

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            loss = model['loss'].forward(a2, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a2) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            a1 = model['L1'].forward(x)
            h1 = model['nonlinear1'].forward(a1)
            d1 = model['drop1'].forward(h1, is_train=False)
            a2 = model['L2'].forward(d1)
            val_acc += np.sum(predict_label(a2) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'MLP_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '.json', 'w'))

    print('Finish running!')
    return
Пример #2
0
def main(main_params):
    """
        Search TODO for those parts you need to complete.
        Please follow the step indicated in TODO (step) to complete this script from step = 1 to step = 5.
    """

    ### set the random seed ###
    np.random.seed(int(main_params['random_seed']))

    ### data processing ###
    Xtrain, Ytrain, Xval, Yval, _, _ = data_loader_mnist(
        dataset='mnist_subset.json')
    N_train, d, _, _ = Xtrain.shape
    N_val, _, _, _ = Xval.shape

    trainSet = DataSplit(Xtrain, Ytrain)
    valSet = DataSplit(Xval, Yval)

    ### building/defining CNN ###
    """
        In this script, we are going to build a CNN for a 10-class classification problem on MNIST.
        The network structure is input --> convolution --> relu --> max pooling --> convolution --> relu --> max pooling --> flatten --> dropout --> linear --> softmax_cross_entropy loss
        the hidden_layer size (num_L1) is 1225
        the output_layer size (num_L2) is 10
    """
    model = dict()
    num_L1 = 1225
    num_L2 = 10

    # experimental setup
    num_epoch = int(main_params['num_epoch'])
    minibatch_size = int(main_params['minibatch_size'])

    # optimization setting: _alpha for momentum, _lambda for weight decay
    _learning_rate = float(main_params['learning_rate'])
    _step = 30
    _alpha = float(main_params['alpha'])
    _lambda = float(main_params['lambda'])
    _dropout_rate = float(main_params['dropout_rate'])

    # create objects (modules) from the module classes
    model['C1'] = dnn_misc.conv_layer(num_input=d,
                                      num_output=25,
                                      filter_len=5,
                                      stride=1)
    model['nonlinear1'] = dnn_misc.relu()
    model['M1'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    # TODO (1): Understand the new modules to be included (compared to dnn_cnn.py) #
    # You do not need to modify any thing here.                                    #
    ################################################################################

    model['C2'] = dnn_misc.conv_layer(num_input=25,
                                      num_output=25,
                                      filter_len=3,
                                      stride=1)
    model['nonlinear2'] = dnn_misc.relu()
    model['M2'] = dnn_misc.max_pool(max_len=2, stride=2)

    ################################################################################
    #                            End of TODO (1)                                   #
    ################################################################################

    model['F1'] = dnn_misc.flatten_layer()
    model['drop1'] = dnn_misc.dropout(r=_dropout_rate)
    model['L1'] = dnn_misc.linear_layer(input_D=num_L1, output_D=num_L2)
    model['loss'] = dnn_misc.softmax_cross_entropy()

    # create variables for momentum
    if _alpha > 0.0:
        momentum = dnn_misc.add_momentum(model)
    else:
        momentum = None

    train_acc_record = []
    val_acc_record = []

    ### run training and validation ###
    for t in range(num_epoch):
        print('At epoch ' + str(t + 1))
        if (t % _step == 0) and (t != 0):
            _learning_rate = _learning_rate * 0.1

        idx_order = np.random.permutation(N_train)

        train_acc = 0.0
        train_loss = 0.0
        train_count = 0

        val_acc = 0.0
        val_count = 0

        for i in range(int(np.floor(N_train / minibatch_size))):

            # get a mini-batch of data
            x, y = trainSet.get_example(idx_order[i * minibatch_size:(i + 1) *
                                                  minibatch_size])

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (2): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (2)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (2)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=True)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)

            ### backward ###
            grad_a1 = model['loss'].backward(a1, y)
            grad_d1 = model['L1'].backward(d1, grad_a1)
            grad_f1 = model['drop1'].backward(f1, grad_d1)
            grad_m2 = model['F1'].backward(m2, grad_f1)

            ################################################################################
            # TODO (3): Connect the three modules for the backward pass                    #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the backward pass.                                                      #
            # Please make sure to connect them with grad_m2 and grad_m1, the input and     #
            # output of the previous and the next modules, respectively                    #
            # Please pay attention to the number of arguments in the backward pass.        #
            ################################################################################

            # TODO (3)
            grad_h2 = model['M2'].backward(h2, grad_m2)
            grad_c2 = model['nonlinear2'].backward(c2, grad_h2)
            grad_m1 = model['C2'].backward(m1, grad_c2)

            ################################################################################
            #                            End of TODO (3)                                   #
            ################################################################################

            grad_h1 = model['M1'].backward(h1, grad_m1)
            grad_c1 = model['nonlinear1'].backward(c1, grad_h1)
            grad_x = model['C1'].backward(x, grad_c1)

            ### gradient_update ###
            for module_name, module in model.items():

                # check if a module has learnable parameters
                if hasattr(module, 'params'):
                    for key, _ in module.params.items():
                        g = module.gradient[key] + _lambda * module.params[key]

                        if _alpha > 0.0:
                            momentum[module_name + '_' +
                                     key] = _alpha * momentum[
                                         module_name + '_' +
                                         key] - _learning_rate * g
                            module.params[key] += momentum[module_name + '_' +
                                                           key]

                        else:
                            module.params[key] -= _learning_rate * g

        ### Computing training accuracy and obj ###
        for i in range(int(np.floor(N_train / minibatch_size))):

            x, y = trainSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (4): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (4)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (4)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            loss = model['loss'].forward(a1, y)
            train_loss += len(y) * loss
            train_acc += np.sum(predict_label(a1) == y)
            train_count += len(y)

        train_loss = train_loss / train_count
        train_acc = train_acc / train_count
        train_acc_record.append(train_acc)

        print('Training loss at epoch ' + str(t + 1) + ' is ' +
              str(train_loss))
        print('Training accuracy at epoch ' + str(t + 1) + ' is ' +
              str(train_acc))

        ### Computing validation accuracy ###
        for i in range(int(np.floor(N_val / minibatch_size))):

            x, y = valSet.get_example(
                np.arange(i * minibatch_size, (i + 1) * minibatch_size))

            ### forward ###
            c1 = model['C1'].forward(x)
            h1 = model['nonlinear1'].forward(c1)
            m1 = model['M1'].forward(h1)

            ################################################################################
            # TODO (5): Connect the three modules for the forward pass                     #
            # model['C2']                                                                  #
            # model['nonlinear2']                                                          #
            # model['M2']                                                                  #
            # into the forward pass.                                                       #
            # Please make sure to connect them with m1 and m2, the input and output of the #
            # previous and the next modules, respectively                                  #
            ################################################################################

            # TODO (5)
            c2 = model['C2'].forward(m1)
            h2 = model['nonlinear2'].forward(c2)
            m2 = model['M2'].forward(h2)

            ################################################################################
            #                            End of TODO (5)                                   #
            ################################################################################

            f1 = model['F1'].forward(m2)
            d1 = model['drop1'].forward(f1, is_train=False)
            a1 = model['L1'].forward(d1)
            val_acc += np.sum(predict_label(a1) == y)
            val_count += len(y)

        val_acc = val_acc / val_count
        val_acc_record.append(val_acc)

        print('Validation accuracy at epoch ' + str(t + 1) + ' is ' +
              str(val_acc))

    # save file
    json.dump(
        {
            'train': train_acc_record,
            'val': val_acc_record
        },
        open(
            'CNN2_lr' + str(main_params['learning_rate']) + '_m' +
            str(main_params['alpha']) + '_w' + str(main_params['lambda']) +
            '_d' + str(main_params['dropout_rate']) + '.json', 'w'))

    print('Finish running!')
    return
Пример #3
0
import numpy as np
import dnn_misc


np.random.seed(123)

# example data
X = np.random.normal(0, 1, (5, 3))


# example modules
check_linear = dnn_misc.linear_layer(input_D = 3, output_D = 2)
check_relu = dnn_misc.relu()
check_dropout = dnn_misc.dropout(r = 0.5)


# check_linear.forward
hat_X = check_linear.forward(X)
ground_hat_X = np.array([[ 0.42525407, -0.2120611 ],
 [ 0.15174804, -0.36218431],
 [ 0.20957104, -0.57861084],
 [ 0.03460477, -0.35992763],
 [-0.07256568,  0.1385197 ]])

if (hat_X.shape[0] != 5) or (hat_X.shape[1] != 2):
    print('Wrong output dimension of linear.forward')
else:
    max_relative_diff = np.amax(np.abs(ground_hat_X - hat_X) / (ground_hat_X + 1e-8))
    print('max_diff_output: ' + str(max_relative_diff))
    if max_relative_diff >= 1e-7:
        print('linear.forward might be wrong')
Пример #4
0
    def test_linear(self):
        np.random.seed(123)
        # example data
        X = np.random.normal(0, 1, (5, 3))

        # example modules
        check_linear = dnn_misc.linear_layer(input_D=3, output_D=2)
        check_relu = dnn_misc.relu()
        check_dropout = dnn_misc.dropout(r=0.5)

        # check_linear.forward
        hat_X = check_linear.forward(X)
        ground_hat_X = np.array([[0.42525407, -0.2120611],
                                 [0.15174804, -0.36218431],
                                 [0.20957104, -0.57861084],
                                 [0.03460477, -0.35992763],
                                 [-0.07256568, 0.1385197]])

        if (hat_X.shape[0] != 5) or (hat_X.shape[1] != 2):
            print('Wrong output dimension of linear.forward')
            self.fail()
        else:
            max_relative_diff = np.amax(np.abs(ground_hat_X - hat_X) / (ground_hat_X + 1e-8))
            print('max_diff_output: ' + str(max_relative_diff))
            if max_relative_diff >= 1e-7:
                print('linear.forward might be wrong')
                self.fail()
            else:
                print('linear.forward should be correct')
                pass
        # check_linear.backward
        grad_hat_X = np.random.normal(0, 1, (5, 2))
        grad_X = check_linear.backward(X, grad_hat_X)

        ground_grad_X = np.array([[-0.32766959, 0.13123228, -0.0470483],
                                  [0.22780188, -0.04838436, 0.04225799],
                                  [0.03115675, -0.32648556, -0.06550193],
                                  [-0.01895741, -0.21411292, -0.05212837],
                                  [-0.26923074, -0.78986304, -0.23870499]])

        ground_grad_W = np.array([[-0.27579345, -2.08570514],
                                  [4.52754775, -0.40995374],
                                  [-1.2049515, 1.77662551]])

        ground_grad_b = np.array([[-4.55094716, -2.51399667]])

        if (grad_X.shape[0] != 5) or (grad_X.shape[1] != 3):
            print('Wrong output dimension of linear.backward')
            self.fail()
        else:
            max_relative_diff_X = np.amax(np.abs(ground_grad_X - grad_X) / (ground_grad_X + 1e-8))
            print('max_diff_grad_X: ' + str(max_relative_diff_X))
            max_relative_diff_W = np.amax(np.abs(ground_grad_W - check_linear.gradient['W']) / (ground_grad_W + 1e-8))
            print('max_diff_grad_W: ' + str(max_relative_diff_W))
            max_relative_diff_b = np.amax(np.abs(ground_grad_b - check_linear.gradient['b']) / (ground_grad_b + 1e-8))
            print('max_diff_grad_b: ' + str(max_relative_diff_b))

            if (max_relative_diff_X >= 1e-7) or (max_relative_diff_W >= 1e-7) or (max_relative_diff_b >= 1e-7):
                print('linear.backward might be wrong')
                self.fail()
            else:
                print('linear.backward should be correct')
                pass