Exemplo n.º 1
0
def train_best_model():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    learning_rate = 3.1e-4
    weight_scale = 2.5e-2  #1e-5
    model = FullyConnectedNet([600, 500, 400, 300, 200, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64,
                              dropout=0.25,
                              use_batchnorm=True,
                              reg=1e-2)
    solver = Solver(model,
                    data,
                    print_every=500,
                    num_epochs=30,
                    batch_size=100,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': learning_rate,
                    },
                    lr_decay=0.9)

    solver.train()
    scores = model.loss(X_test)
    y_pred = np.argmax(scores, axis=1)
    acc = np.mean(y_pred == y_test)
    print('test acc: %f' % (acc))
    best_model = model

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history)
    plt.title('Loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, label='train')
    plt.plot(solver.val_acc_history, label='val')
    plt.title('Classification accuracy history')
    plt.xlabel('Epoch')
    plt.ylabel('Clasification accuracy')
    plt.show()
Exemplo n.º 2
0
def solver_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    model = TwoLayerNet(reg=1e-1)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    num_epochs=10,
                    batch_size=100,
                    print_every=100)

    solver.train()
    scores = model.loss(X_test)
    y_pred = np.argmax(scores, axis=1)
    acc = np.mean(y_pred == y_test)
    print("Test acc: {}".format(acc))

    # Visualize training loss and train /val accuracy
    plt.subplot(2, 1, 1)
    plt.title('Training loss')
    plt.plot(solver.loss_history, 'o')
    plt.xlabel("Iteration")

    plt.subplot(2, 1, 2)
    plt.title("Accuracy")
    plt.plot(solver.train_acc_history, "-o", label="train")
    plt.plot(solver.val_acc_history, "-o", label="val")
    plt.plot([0.5] * len(solver.val_acc_history), 'k--')
    plt.xlabel("Epoch")
    plt.legend(loc="lower right")
    plt.gcf().set_size_inches(15, 12)
    plt.show()
Exemplo n.º 3
0
def batchnorm_for_deep_networks():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val
    }
    weight_scale = 2e-2
    bn_model = FullyConnectedNet(hidden_dims,
                                 weight_scale=weight_scale,
                                 use_batchnorm=True)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=False)

    bn_solver = Solver(bn_model,
                       small_data,
                       num_epochs=10,
                       batch_size=50,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       verbose=True,
                       print_every=200)
    bn_solver.train()

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=200)
    solver.train()
Exemplo n.º 4
0
def neural_network_with_rms_and_adam():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    learning_rates = {'rmsprop': 1e-4, 'adam': 1e-3}
    for update_rule in ['adam', 'rmsprop']:
        print('running with ', update_rule)
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)

        solver = Solver(
            model,
            small_data,
            num_epochs=5,
            batch_size=100,
            update_rule=update_rule,
            optim_config={'learning_rate': learning_rates[update_rule]},
            verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
    plt.show()
Exemplo n.º 5
0
def sgd_momentum_test():
    N, D = 4, 5
    w = np.linspace(-0.4, 0.6, num=N * D).reshape(N, D)
    dw = np.linspace(-0.6, 0.4, num=N * D).reshape(N, D)
    v = np.linspace(0.6, 0.9, num=N * D).reshape(N, D)

    config = {'learning_rate': 1e-3, 'velocity': v}
    next_w, _ = sgd_momentum(w, dw, config=config)

    expected_next_w = np.asarray(
        [[0.1406, 0.20738947, 0.27417895, 0.34096842, 0.40775789],
         [0.47454737, 0.54133684, 0.60812632, 0.67491579, 0.74170526],
         [0.80849474, 0.87528421, 0.94207368, 1.00886316, 1.07565263],
         [1.14244211, 1.20923158, 1.27602105, 1.34281053, 1.4096]])
    expected_velocity = np.asarray(
        [[0.5406, 0.55475789, 0.56891579, 0.58307368, 0.59723158],
         [0.61138947, 0.62554737, 0.63970526, 0.65386316, 0.66802105],
         [0.68217895, 0.69633684, 0.71049474, 0.72465263, 0.73881053],
         [0.75296842, 0.76712632, 0.78128421, 0.79544211, 0.8096]])

    print("next_w error: {}".format(rel_error(next_w, expected_next_w)))
    print("velocity error: {}".format(
        rel_error(expected_velocity, config['velocity'])))

    # Train a six-layer network with both SGD and SGD+momentum.
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    num_train = 4000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    for update_rule in ['sgd', 'sgd_momentum']:
        print("Running with {}".format(update_rule))
        model = FullyConnectedNet([100, 100, 100, 100, 100], weight_scale=5e-2)
        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={'learning_rate': 1e-2},
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()

    plt.subplot(3, 1, 1)
    plt.title('Training loss')
    plt.xlabel('Iteration')

    plt.subplot(3, 1, 2)
    plt.title('Training accuracy')
    plt.xlabel('Epoch')

    plt.subplot(3, 1, 3)
    plt.title('Validation accuracy')
    plt.xlabel('Epoch')

    for update_rule, solver in solvers.items():
        plt.subplot(3, 1, 1)
        plt.plot(solver.loss_history, 'o', label=update_rule)

        plt.subplot(3, 1, 2)
        plt.plot(solver.train_acc_history, '-o', label=update_rule)

        plt.subplot(3, 1, 3)
        plt.plot(solver.val_acc_history, '-o', label=update_rule)

    for i in [1, 2, 3]:
        plt.subplot(3, 1, i)
        plt.legend(loc='upper center', ncol=4)
    plt.gcf().set_size_inches(15, 15)
Exemplo n.º 6
0
def multilayer_network_test():
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }
    # Initial loss and gradient check
    # N, D, H1, H2, C = 2, 15, 20, 30, 10
    # X = np.random.randn(N, D)
    # y = np.random.randint(C, size=(N, ))

    # print(X.shape)
    # for reg in [0, 3.14]:
    #     print("Running check with reg={}".format(reg))
    #     model = FullyConnectedNet([H1, H2],
    #                               input_dim=D,
    #                               num_classes=C,
    #                               reg=reg,
    #                               weight_scale=5e-2,
    #                               dtype=np.float64)
    #     loss, grads = model.loss(X, y)
    #     print("Initial loss: {}".format(loss))

    #     for name in sorted(grads):
    #         f = lambda _: model.loss(X, y)[0]
    #         grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
    #         print("{} relative {}".format(name, rel_error(grad_num, grads[name])))

    # As another sanity check (完整性检查), make sure you can overfit a smal dataset of 50 images.
    # First we will try a three-layer network with 100 units in each hidden layer.
    # You will need to tweak the learning rate and initialize scale, but you should
    # be able to overfit and achieve 100% training accuracy within 20 epoches.
    num_train = 50
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    ##########################################################################
    # weight_scale = 5e-2
    # learning_rate = 1e-3

    # model = FullyConnectedNet([100, 100],
    #                         weight_scale=weight_scale,
    #                         dtype=np.float64)
    # solver = Solver(model,
    #                 small_data,
    #                 print_every=10,
    #                 num_epochs=20,
    #                 batch_size=25,
    #                 update_rule='sgd',
    #                 optim_config={'learning_rate': learning_rate})
    # solver.train()

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Grid Search
    # best_accurcy = 0.0
    # best_solver = None
    # weight_scale = np.linspace(1e-3, 1e-2, 10)
    # learing_rate = np.linspace(1e-4, 1e-2, 100)
    # for w in weight_scale:
    #     for l in learing_rate:
    #         print("Training with weight_scale {} and learning_rate {}".format(w, l))
    #         model = FullyConnectedNet([100, 100],
    #                                 weight_scale=w,
    #                                 dtype=np.float64)
    #         solver = Solver(model,
    #                         small_data,
    #                         print_every=10,
    #                         num_epochs=20,
    #                         batch_size=25,
    #                         update_rule='sgd',
    #                         optim_config={'learning_rate': l})
    #         solver.train()

    #         if best_accurcy > solver.best_train_acc:
    #             best_accurcy = solver.best_train_acc
    #             best_solver = solver

    # plt.plot(solver.loss_history, 'o')
    # plt.title('Training loss history')
    # plt.xlabel('Iteration')
    # plt.ylabel('Training loss')
    # plt.show()
    ##########################################################################

    ##########################################################################
    # Five layer network
    learning_rate = 8e-4
    weight_scale = 1e-1
    model = FullyConnectedNet([100, 100, 100, 100],
                              weight_scale=weight_scale,
                              dtype=np.float64)
    solver = Solver(model,
                    small_data,
                    print_every=10,
                    num_epochs=20,
                    batch_size=25,
                    update_rule='sgd',
                    optim_config={'learning_rate': learning_rate})
    solver.train()
    plt.plot(solver.loss_history, 'o')
    plt.title('Training loss history')
    plt.xlabel('Iteration')
    plt.ylabel('Training loss')
    plt.show()
Exemplo n.º 7
0
def regularization_expriment():
    """
    We will train a pair of two-layer networks on 500 training examples: one will use no dropout,
    and one will use a dropout probability of 0.75.
    """
    num_train = 500
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
        'X_test': y_test,
        'y_test': y_test
    }

    solvers = {}
    dropout_choices = [0, 0.25, 0.5, 0.75, 0.8, 0.9, 0.99]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], weight_scale=5e-2, dropout=dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule="adam",
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models

    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 'o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 15)
    plt.show()
Exemplo n.º 8
0
def three_layer_convnet_test():
    # model = ThreeLayerConvNet()

    # N = 50
    # X = np.random.randn(N, 3, 32, 32)
    # y = np.random.randint(10, size=N)

    # loss, grads = model.loss(X, y)
    # print('Initial loss (no regularization): {}'.format(loss))

    # model.reg = 0.5
    # loss, grads = model.loss(X, y)
    # print("Initial loss(with regularization: {}".format(loss))

    # # Gradient check
    # num_inputs = 2
    # input_dim = (3, 16, 16)
    # reg = 0.0
    # num_classes = 10
    # X = np.random.randn(num_inputs, *input_dim)
    # y = np.random.randint(num_classes, size=num_inputs)

    # model = ThreeLayerConvNet(num_filters=3, filter_size=3,
    #                         input_dim=input_dim, hidden_dim=7,
    #                         dtype=np.float64)
    # loss, grads = model.loss(X, y)
    # for param_name in sorted(grads):
    #     f = lambda _: model.loss(X, y)[0]
    #     param_grad_num = eval_numerical_gradient(f, model.params[param_name], verbose=False, h=1e-6)
    #     e = rel_error(param_grad_num, grads[param_name])
    #     print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))

    # Overfit small data
    X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
    num_train = 100
    small_data = {
        'X_train': X_train[:num_train].transpose(0, 3, 1, 2),
        'y_train': y_train[:num_train],
        'X_val': X_val.transpose(0, 3, 1, 2),
        'y_val': y_val
    }
    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=20,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 4e-4,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history, 'o')
    plt.xlabel('iteration')
    plt.ylabel('loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, '-o')
    plt.plot(solver.val_acc_history, '-o')
    plt.legend(['train', 'val'], loc='upper left')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.show()