示例#1
0
def check_layernorm_backward():
    print_formatted('Layernorm backward', 'bold', 'blue')

    np.random.seed(231)
    N, D = 4, 5
    x = 5 * np.random.randn(N, D) + 12
    gamma = np.random.randn(D)
    beta = np.random.randn(D)
    dout = np.random.randn(N, D)

    fx = lambda x: layernorm_forward(x, gamma, beta)[0]
    fg = lambda a: layernorm_forward(x, a, beta)[0]
    fb = lambda b: layernorm_forward(x, gamma, b)[0]

    dx_num = evaluate_numerical_gradient_array(fx, x, dout)
    da_num = evaluate_numerical_gradient_array(fg, gamma.copy(), dout)
    db_num = evaluate_numerical_gradient_array(fb, beta.copy(), dout)

    _, cache = layernorm_forward(x, gamma, beta)
    dx, dgamma, dbeta = layernorm_backward(dout, cache)

    print('(You should expect to see relative errors between 1e-12 and 1e-8)')
    print('dx error: ', relative_error(dx_num, dx))
    print('dgamma error: ', relative_error(da_num, dgamma))
    print('dbeta error: ', relative_error(db_num, dbeta))
    print()
示例#2
0
def check_spatial_groupnorm_backward():
    print_formatted('Spatial groupnorm backward', 'bold', 'blue')

    np.random.seed(231)
    N, C, H, W = 2, 6, 4, 5
    G = 2
    x = 5 * np.random.randn(N, C, H, W) + 12
    gamma = np.random.randn(1, C, 1, 1)
    beta = np.random.randn(1, C, 1, 1)
    dout = np.random.randn(N, C, H, W)

    gn_param = {}
    fx = lambda x: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]
    fg = lambda a: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]
    fb = lambda b: spatial_groupnorm_forward(x, gamma, beta, G, gn_param)[0]

    dx_num = evaluate_numerical_gradient_array(fx, x, dout)
    da_num = evaluate_numerical_gradient_array(fg, gamma, dout)
    db_num = evaluate_numerical_gradient_array(fb, beta, dout)

    _, cache = spatial_groupnorm_forward(x, gamma, beta, G, gn_param)
    dx, dgamma, dbeta = spatial_groupnorm_backward(dout, cache)

    print('(You should expect to see relative errors between 1e-12 and 1e-7)')
    print('dx error: ', relative_error(dx_num, dx))
    print('dgamma error: ', relative_error(da_num, dgamma))
    print('dbeta error: ', relative_error(db_num, dbeta))
    print()
def check_dropout_fc_net():
    print_formatted('Fully connected net with dropout', 'bold', 'blue')

    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    print('Relative errors should be around e-6 or less.')
    print('It is fine if for dropout=1 you have W2 error on the order of e-5.')
    print()

    for dropout in [1, 0.75, 0.5]:
        print('Running check with dropout = ', dropout)
        model = FullyConnectedNet(input_dim=D,
                                  hidden_dims=[H1, H2],
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  dropout=dropout,
                                  seed=123)

        loss, grads = model.loss(X, y)
        print('Initial loss: ', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = evaluate_numerical_gradient(f,
                                                   model.params[name],
                                                   verbose=False,
                                                   h=1e-5)
            print('%s relative error: %.2e' %
                  (name, relative_error(grad_num, grads[name])))

        print()
示例#4
0
def overfit_small_data(plot=False):
    print_formatted('Overfitting small data', 'stage')

    num_train = 50
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    weight_scale = 3e-2
    learning_rate = 1e-3
    update_rule = 'adam'

    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=[100, 100],
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule=update_rule,
                    optim_config={'learning_rate': learning_rate},
                    lr_decay=0.95,
                    num_epochs=20,
                    batch_size=25,
                    print_every=10)
    solver.train()

    if plot:
        plot_stats('loss',
                   solvers={'fc_net': solver},
                   filename='overfitting_loss_history.png')
示例#5
0
def check_spatial_batchnorm_forward_train_time():
    print_formatted('Train time spatial batchnorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, C, H, W = 2, 3, 4, 5
    x = 4 * np.random.randn(N, C, H, W) + 10

    print('Before spatial batch normalization:')
    print('  Shape: ', x.shape)
    print('  Means: ', x.mean(axis=(0, 2, 3)))
    print('  Stds: ', x.std(axis=(0, 2, 3)))
    print()

    gamma, beta = np.ones(C), np.zeros(C)
    bn_param = {'mode': 'train'}
    out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
    print('After spatial batch normalization:')
    print('(Means should be close to 0 and stds close to 1)')
    print('  Shape: ', out.shape)
    print('  Means: ', out.mean(axis=(0, 2, 3)))
    print('  Stds: ', out.std(axis=(0, 2, 3)))
    print()

    gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8])
    out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
    print('After spatial batch normalization (nontrivial gamma, beta):')
    print(
        '(Means should be close to beta [6, 7, 8] and stds close to gamma [3, 4, 5])'
    )
    print('  Shape: ', out.shape)
    print('  Means: ', out.mean(axis=(0, 2, 3)))
    print('  Stds: ', out.std(axis=(0, 2, 3)))
    print()
示例#6
0
def check_spatial_norms():
    print_formatted('Check spatial norms', 'stage')
    check_spatial_batchnorm_forward_train_time()
    check_spatial_batchnorm_forward_test_time()
    check_spatial_batchnorm_backward()
    check_spatial_groupnorm_forward()
    check_spatial_groupnorm_backward()
示例#7
0
def conv_net_overfitting(plot=False):
    print_formatted('Overfitting small data with convnet', 'stage')

    np.random.seed(231)

    num_train = 100
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }
    small_data['X_train'] = small_data['X_train'].reshape(
        (small_data['X_train'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)
    small_data['X_val'] = small_data['X_val'].reshape(
        (small_data['X_val'].shape[0], 32, 32, 3)).transpose(0, 3, 1, 2)

    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=15,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    print_every=1)
    solver.train()

    if plot:
        plot_stats('loss',
                   'train_val_acc',
                   solvers={'convnet': solver},
                   filename='convnet_overfitting.png')
示例#8
0
def train_conv_net():
    print_formatted('Conv net', 'stage')

    data = {
        'X_train':
        X_train.reshape((X_train.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2),
        'y_train':
        y_train,
        'X_val':
        X_val.reshape((X_val.shape[0], 32, 32, 3)).transpose(0, 3, 1, 2),
        'y_val':
        y_val,
    }

    model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    print_every=20,
                    checkpoint_name='convnet')
    solver.train()
def check_batchnorm():
    print_formatted('Batchnorm checks', 'stage')
    check_batchnorm_forward_train_time()
    check_batchnorm_forward_test_time()
    check_batchnorm_backward()
    check_batchnorm_backward_alt()
    check_batchnorm_fc_net()
示例#10
0
def check_spatial_groupnorm_forward():
    print_formatted('Spatial groupnorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, C, H, W = 2, 6, 4, 5
    G = 2
    x = 4 * np.random.randn(N, C, H, W) + 10
    x_g = x.reshape((N * G, -1))

    print('Before spatial group normalization:')
    print('  Shape: ', x.shape)
    print('  Means: ', x_g.mean(axis=1))
    print('  Stds: ', x_g.std(axis=1))
    print()

    gamma, beta = np.ones((1, C, 1, 1)), np.zeros((1, C, 1, 1))
    bn_param = {'mode': 'train'}

    out, _ = spatial_groupnorm_forward(x, gamma, beta, G, bn_param)
    out_g = out.reshape((N * G, -1))
    print('After spatial group normalization:')
    print('(Means should be close to 0 and stds close to 1)')
    print('  Shape: ', out.shape)
    print('  Means: ', out_g.mean(axis=1))
    print('  Stds: ', out_g.std(axis=1))
    print()
def check_batchnorm_forward_test_time():
    print_formatted('Test time batchnorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, D1, D2, D3 = 200, 50, 60, 3
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)

    bn_param = {'mode': 'train'}
    gamma = np.ones(D3)
    beta = np.zeros(D3)

    for t in range(50):
        X = np.random.randn(N, D1)
        a = np.maximum(0, X.dot(W1)).dot(W2)
        batchnorm_forward(a, gamma, beta, bn_param)

    bn_param['mode'] = 'test'
    X = np.random.randn(N, D1)
    a = np.maximum(0, X.dot(W1)).dot(W2)
    a_norm, _ = batchnorm_forward(a, gamma, beta, bn_param)

    print('After batch normalization (test-time):')
    print('(Means should be near 0 and stds near 1)')
    print_mean_std(a_norm, axis=0)
def check_batchnorm_backward():
    print_formatted('Batchnorm backward', 'bold', 'blue')

    np.random.seed(231)
    N, D = 4, 5
    x = 5 * np.random.randn(N, D) + 12
    gamma = np.random.randn(D)
    beta = np.random.randn(D)
    dout = np.random.randn(N, D)

    bn_param = {'mode': 'train'}
    fx = lambda x: batchnorm_forward(x, gamma, beta, bn_param)[0]
    fg = lambda a: batchnorm_forward(x, a, beta, bn_param)[0]
    fb = lambda b: batchnorm_forward(x, gamma, b, bn_param)[0]

    dx_num = evaluate_numerical_gradient_array(fx, x, dout)
    da_num = evaluate_numerical_gradient_array(fg, gamma, dout)
    db_num = evaluate_numerical_gradient_array(fb, beta, dout)

    _, cache = batchnorm_forward(x, gamma, beta, bn_param)
    dx, dgamma, dbeta = batchnorm_backward(dout, cache)

    print('(You should expect to see relative errors between 1e-13 and 1e-8)')
    print('dx error: ', relative_error(dx_num, dx))
    print('dgamma error: ', relative_error(da_num, dgamma))
    print('dbeta error: ', relative_error(db_num, dbeta))
    print()
def check_batchnorm_forward_train_time():
    print_formatted('Train time batchnorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, D1, D2, D3 = 200, 50, 60, 3
    X = np.random.randn(N, D1)
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)
    a = np.maximum(0, X.dot(W1)).dot(W2)

    print('Before batch normalization:')
    print_mean_std(a, axis=0)

    gamma = np.ones((D3, ))
    beta = np.zeros((D3, ))
    a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'})
    print('After batch normalization (gamma=1, beta=0)')
    print('(Means should be close to 0 and stds close to 1)')
    print_mean_std(a_norm, axis=0)

    gamma = np.asarray([1.0, 2.0, 3.0])
    beta = np.asarray([11.0, 12.0, 13.0])
    a_norm, _ = batchnorm_forward(a, gamma, beta, {'mode': 'train'})
    print('After batch normalization (gamma=', gamma, ', beta=', beta, ')')
    print('(Now means should be close to beta and stds close to gamma)')
    print_mean_std(a_norm, axis=0)
示例#14
0
def check_layernorm_forward():
    print_formatted('Layernorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, D1, D2, D3 = 4, 50, 60, 3
    X = np.random.randn(N, D1)
    W1 = np.random.randn(D1, D2)
    W2 = np.random.randn(D2, D3)
    a = np.maximum(0, X.dot(W1)).dot(W2)

    print('Before layer normalization:')
    print_mean_std(a, axis=1)

    gamma = np.ones(D3)
    beta = np.zeros(D3)
    print('After layer normalization (gamma=1, beta=0)')
    print('(Means should be close to 0 and stds close to 1)')
    a_norm, _ = layernorm_forward(a, gamma, beta)
    print_mean_std(a_norm, axis=1)

    gamma = np.asarray([3.0, 3.0, 3.0])
    beta = np.asarray([5.0, 5.0, 5.0])
    print('After layer normalization (gamma=', gamma, ', beta=', beta, ')')
    print('(Now means should be close to beta and stds close to gamma)')
    a_norm, _ = layernorm_forward(a, gamma, beta)
    print_mean_std(a_norm, axis=1)
def check_batchnorm_backward_alt():
    print_formatted('Batchnorm backward alt', 'bold', 'blue')

    np.random.seed(231)
    N, D = 100, 500
    x = 5 * np.random.randn(N, D) + 12
    gamma = np.random.randn(D)
    beta = np.random.randn(D)
    dout = np.random.randn(N, D)

    bn_param = {'mode': 'train'}
    out, cache = batchnorm_forward(x, gamma, beta, bn_param)

    t1 = time.time()
    dx1, dgamma1, dbeta1 = batchnorm_backward(dout, cache)
    t2 = time.time()
    dx2, dgamma2, dbeta2 = batchnorm_backward_alt(dout, cache)
    t3 = time.time()

    print('dx difference: ', relative_error(dx1, dx2))
    print('dgamma difference: ', relative_error(dgamma1, dgamma2))
    print('dbeta difference: ', relative_error(dbeta1, dbeta2))
    print(
        'batchnorm_backward_alt is %.2f times faster the batchnorm_backward' %
        ((t2 - t1) / (t3 - t2)))
    print()
示例#16
0
def compare_update_rules(plot=False):
    print_formatted('Update rules', 'stage')

    num_train = 4000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    learning_rates = {
        'sgd': 1e-2,
        'sgd_momentum': 1e-2,
        'nesterov_momentum': 1e-2,
        'adagrad': 1e-4,
        'rmsprop': 1e-4,
        'adam': 1e-3
    }
    solvers = {}

    for update_rule in [
            'sgd', 'sgd_momentum', 'nesterov_momentum', 'adagrad', 'rmsprop',
            'adam'
    ]:
        print_formatted('running with ' + update_rule, 'bold', 'blue')
        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[100] * 5,
                                  num_classes=10,
                                  weight_scale=5e-2)

        solver = Solver(model,
                        small_data,
                        num_epochs=5,
                        batch_size=100,
                        update_rule=update_rule,
                        optim_config={
                            'learning_rate': learning_rates[update_rule],
                        },
                        verbose=True)
        solvers[update_rule] = solver
        solver.train()
        print()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers=solvers,
                   filename='update_rules_comparison.png')
示例#17
0
def check_dropout_backward():
    print_formatted('Dropout backward', 'bold', 'blue')

    np.random.seed(231)
    x = np.random.randn(10, 10) + 10
    dout = np.random.randn(*x.shape)

    dropout_param = {'mode': 'train', 'p': 0.2, 'seed': 123}
    out, cache = dropout_forward(x, dropout_param)
    dx = dropout_backward(dout, cache)
    dx_num = evaluate_numerical_gradient_array(
        lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

    print('(Relative error should be around e-10 or less)')
    print('dx relative error: ', relative_error(dx, dx_num))
    print()
示例#18
0
def check_dropout_forward():
    print_formatted('Dropout forward', 'bold', 'blue')

    np.random.seed(231)
    x = np.random.randn(500, 500) + 10

    for p in [0.25, 0.4, 0.7]:
        out, _ = dropout_forward(x, {'mode': 'train', 'p': p})
        out_test, _ = dropout_forward(x, {'mode': 'test', 'p': p})

        print('Running tests with p = ', p)
        print('Mean of input: ', x.mean())
        print('Mean of train-time output: ', out.mean())
        print('Mean of test-time output: ', out_test.mean())
        print('Fraction of train-time output set to zero: ', (out == 0).mean())
        print('Fraction of test-time output set to zero: ',
              (out_test == 0).mean())
        print()
示例#19
0
def visualize_convnet_filters():
    print_formatted('Visualizing convnet filters', 'stage')

    checkpoint = pickle.load(open('convnet_epoch_1.pkl', 'rb'))
    W1 = checkpoint['model'].params['W1'].transpose(0, 2, 3, 1)
    N, H, W, C = W1.shape
    grid_size = int(ceil(sqrt(N)))

    for i in range(N):
        img = W1[i]
        low, high = np.min(img), np.max(img)
        rgb_img = 255 * (img - low) / (high - low)
        plt.subplot(grid_size, grid_size, i + 1)
        plt.imshow(rgb_img.astype('uint8'))
        plt.axis('off')

    plt.gcf().set_size_inches(10, 10)
    plt.savefig('plots/convnet_filters.png')
示例#20
0
def check_spatial_batchnorm_forward_test_time():
    print_formatted('Test time spatial batchnorm forward', 'bold', 'blue')

    np.random.seed(231)
    N, C, H, W = 10, 4, 11, 12
    bn_param = {'mode': 'train'}
    gamma = np.ones(C)
    beta = np.zeros(C)
    for t in range(50):
        x = 2.3 * np.random.randn(N, C, H, W) + 13
        spatial_batchnorm_forward(x, gamma, beta, bn_param)
    bn_param['mode'] = 'test'
    x = 2.3 * np.random.randn(N, C, H, W) + 13
    a_norm, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)

    print('After spatial batch normalization (test-time):')
    print('(Means should be near 0 and stds near 1)')
    print('  means: ', a_norm.mean(axis=(0, 2, 3)))
    print('  stds: ', a_norm.std(axis=(0, 2, 3)))
    print()
示例#21
0
def train_two_layer(plot=False):
    print_formatted('Two layer net', 'stage')

    model = TwoLayerNet(input_dim=3072, hidden_dim=100, num_classes=10)
    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }
    solver = Solver(model,
                    data,
                    num_epochs=1,
                    print_every=100,
                    batch_size=100,
                    lr_decay=0.95)
    solver.train()

    if plot:
        plot_stats('loss',
                   'train_val_acc',
                   solvers={'two_layer_net': solver},
                   filename='two_layer_net_stats.png')
示例#22
0
def train_with_layernorm(plot=False):
    print_formatted('Layer normalization', 'stage')

    hidden_dims = [100, 100, 100, 100, 100]
    weight_scale = 2e-2

    num_train = 1000
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    print_formatted('without layernorm', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale)
    solver = Solver(model,
                    small_data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=10,
                    batch_size=50,
                    print_every=20)
    solver.train()
    print()

    print_formatted('with layernorm', 'bold', 'blue')
    ln_model = FullyConnectedNet(input_dim=3072,
                                 hidden_dims=hidden_dims,
                                 num_classes=10,
                                 weight_scale=weight_scale,
                                 normalization='layernorm')
    ln_solver = Solver(ln_model,
                       small_data,
                       update_rule='adam',
                       optim_config={
                           'learning_rate': 1e-3,
                       },
                       num_epochs=10,
                       batch_size=50,
                       print_every=20)
    ln_solver.train()

    if plot:
        plot_stats('loss',
                   'train_acc',
                   'val_acc',
                   solvers={
                       'baseline': solver,
                       'with_norm': ln_solver
                   },
                   filename='layernorm.png')
def check_batchnorm_fc_net():
    print_formatted('Fully connected net with batchnorm', 'bold', 'blue')

    np.random.seed(231)
    N, D, H1, H2, C = 2, 15, 20, 30, 10
    X = np.random.randn(N, D)
    y = np.random.randint(C, size=(N, ))

    print('Relative errors for W should be between 1e-4 ~ 1e-10.')
    print('Relative errors for b should be between 1e-8 ~ 1e-10.')
    print(
        'Relative errors for gammas and betas should be between 1e-8 ~ 1e-9.')
    print()

    for reg in [0, 3.14]:
        print('Running check with reg = ', reg)
        model = FullyConnectedNet(input_dim=D,
                                  hidden_dims=[H1, H2],
                                  num_classes=C,
                                  weight_scale=5e-2,
                                  reg=reg,
                                  normalization='batchnorm')

        loss, grads = model.loss(X, y)
        print('Initial loss: ', loss)

        for name in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            grad_num = evaluate_numerical_gradient(f,
                                                   model.params[name],
                                                   verbose=False,
                                                   h=1e-5)
            print('%s relative error: %.2e' %
                  (name, relative_error(grad_num, grads[name])))

        if reg == 0: print()
示例#24
0
def train_with_dropout(plot=False):
    print_formatted('Dropout', 'stage')

    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': X_train[:num_train],
        'y_train': y_train[:num_train],
        'X_val': X_val,
        'y_val': y_val,
    }

    solvers = {}
    dropout_choices = [1, 0.25]
    for dropout in dropout_choices:
        if dropout == 1:
            print_formatted('without dropout, p = 1', 'bold', 'blue')
        else:
            print_formatted('with dropout, p = %.2f' % dropout, 'bold', 'blue')

        model = FullyConnectedNet(input_dim=3072,
                                  hidden_dims=[500],
                                  num_classes=10,
                                  dropout=dropout)

        solver = Solver(model,
                        small_data,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        num_epochs=25,
                        batch_size=100,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

        if dropout == 1: print()

    if plot:
        plot_stats('train_acc',
                   'val_acc',
                   solvers={
                       '1.00 dropout': solvers[1],
                       '0.25 dropout': solvers[0.25]
                   },
                   filename='dropout.png')
示例#25
0
def train_best_fc_model(plot=False):
    print_formatted('Best fully connected net', 'stage')

    hidden_dims = [100, 100, 100]
    weight_scale = 2e-2
    num_epochs = 10
    dropout = 1

    data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': X_test,
        'y_test': y_test,
    }

    print_formatted('training', 'bold', 'blue')
    model = FullyConnectedNet(input_dim=3072,
                              hidden_dims=hidden_dims,
                              num_classes=10,
                              weight_scale=weight_scale,
                              normalization='batchnorm',
                              dropout=dropout)
    solver = Solver(model,
                    data,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    num_epochs=num_epochs,
                    batch_size=50,
                    print_every=100)
    solver.train()
    print()

    if plot: plot_stats('loss', 'train_val_acc', solvers={'best_fc': solver})

    print_formatted('evaluating', 'bold', 'blue')
    y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
    y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
    print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
    print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
示例#26
0
def check_layernorm():
    print_formatted('Layernorm checks', 'stage')
    check_layernorm_forward()
    check_layernorm_backward()
    check_layernorm_fc_net()
示例#27
0
def check_dropout():
    print_formatted('Dropout checks', 'stage')
    check_dropout_forward()
    check_dropout_backward()
    check_dropout_fc_net()
示例#28
0
from batchnorm_checks import check_batchnorm
from layernorm_checks import check_layernorm
from dropout_checks import check_dropout
from conv_checks import check_conv
import numpy as np
import pickle
from math import ceil, sqrt
import matplotlib.pyplot as plt
from spatial_norms_checks import check_spatial_norms
''' Hyperparameters '''

subtract_mean = True
normalize = False
''' Data '''

print_formatted('Load data', 'stage')
X_train, y_train, X_val, y_val, X_test, y_test = load_CIFAR10_sample(
    'datasets/cifar-10-batches-py',
    num_train=49000,
    num_val=1000,
    num_test=10000,
    mean_subtr=subtract_mean,
    norm=normalize)
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_val shape:', X_val.shape)
print('y_val shape:', y_val.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)
''' Actions '''