def main():
    # step 1: load the data, transform as needed
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
示例#2
0
def main():
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Ytrain_ind = y2indicator(Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test
    Ytest_ind = y2indicator(Ytest)

    # Gradient descent parameters
    max_iter = 6
    print_period = 10
    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    # Limit samples since input will always have to be same size
    # Could also do N = N / batch_sz * batch_sz
    Xtrain = Xtrain[:73000, ]
    Ytrain = Ytrain[:73000]
    Xtest = Xtest[:26000, ]
    Ytest = Ytest[:26000]
    Ytest_ind = Ytest_ind[:26000, ]

    # Initial weights
    M = 500
    K = 10
    poolsz = (2, 2)

    # W1_shape = (filter_width, filter_height,
    #             num_color_channels, num_feature_maps)
    W1_shape = (5, 5, 3, 20)
    W1_init = init_filter(W1_shape, poolsz)
    # One bias per output feature map
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)

    # W2_shape = (filter_width, filter_height,
    #             old_num_feature_maps, num_feature_maps)
    W2_shape = (5, 5, 20, 50)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)

    # Vanilla ANN weights
    W3_init = np.random.randn(W2_shape[-1] * 8 * 8,
                              M) / np.sqrt(W2_shape[-1] * 8 * 8 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    # Define variables and expressions
    # Using None as the first shape element takes up too much RAM
    X = tf.placeholder(tf.float32, shape=(batch_sz, 32, 32, 3), name='X')
    T = tf.placeholder(tf.float32, shape=(batch_sz, K), name='T')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))

    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    Z3 = tf.nn.relu(tf.matmul(Z2r, W3) + b3)
    Yish = tf.matmul(Z3, W4) + b4

    cost = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits(logits=Yish, labels=T))

    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(cost)

    # Use this to calculate the error rate
    predict_op = tf.argmax(Yish, 1)

    t0 = datetime.now()
    LL = []
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
                Ybatch = Ytrain_ind[j * batch_sz:(j * batch_sz + batch_sz), ]

                if len(Xbatch) == batch_sz:
                    session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                    if j % print_period == 0:
                        # Due to RAM limitations we need to have a
                        # fixed size input, so we have this ugly total
                        # cost and prediction computation.
                        test_cost = 0
                        prediction = np.zeros(len(Xtest))
                        for k in range(len(Xtest) // batch_sz):
                            Xtestbatch = Xtest[k * batch_sz:(k * batch_sz +
                                                             batch_sz), ]
                            Ytestbatch = Ytest_ind[k * batch_sz:(k * batch_sz +
                                                                 batch_sz), ]
                            test_cost += session.run(cost,
                                                     feed_dict={
                                                         X: Xtestbatch,
                                                         T: Ytestbatch
                                                     })
                            prediction[k *
                                       batch_sz:(k * batch_sz +
                                                 batch_sz)] = session.run(
                                                     predict_op,
                                                     feed_dict={X: Xtestbatch})
                        err = error_rate(prediction, Ytest)
                        print("Cost / err at iteration i = %d,\
                              j = %d: %.3f / %.3f" % (i, j, test_cost, err))
                        LL.append(test_cost)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()
def main():
    # step 1: load the data, transform as needed
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Ytrain_ind = y2indicator(Ytrain)

    Xtest  = rearrange(test['X'])
    Ytest  = test['y'].flatten() - 1
    del test
    Ytest_ind  = y2indicator(Ytest)


    max_iter = 8
    print_period = 10

    lr = np.float32(0.00001)
    reg = np.float32(0.01)
    mu = np.float32(0.99)

    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    M = 500
    K = 10
    poolsz = (2, 2)

    # after conv will be of dimension 32 - 5 + 1 = 28
    # after downsample 28 / 2 = 14
    W1_shape = (20, 3, 5, 5) # (num_feature_maps, num_color_channels, filter_width, filter_height)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[0], dtype=np.float32) # one bias per output feature map

    # after conv will be of dimension 14 - 5 + 1 = 10
    # after downsample 10 / 2 = 5
    W2_shape = (50, 20, 5, 5) # (num_feature_maps, old_num_feature_maps, filter_width, filter_height)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[0], dtype=np.float32)

    # vanilla ANN weights
    W3_init = np.random.randn(W2_shape[0]*5*5, M) / np.sqrt(W2_shape[0]*5*5 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)


    # step 2: define theano variables and expressions
    X = T.tensor4('X', dtype='float32')
    Y = T.matrix('T')
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')
    W3 = theano.shared(W3_init.astype(np.float32), 'W3')
    b3 = theano.shared(b3_init, 'b3')
    W4 = theano.shared(W4_init.astype(np.float32), 'W4')
    b4 = theano.shared(b4_init, 'b4')

    # momentum changes
    dW1 = theano.shared(np.zeros(W1_init.shape, dtype=np.float32), 'dW1')
    db1 = theano.shared(np.zeros(b1_init.shape, dtype=np.float32), 'db1')
    dW2 = theano.shared(np.zeros(W2_init.shape, dtype=np.float32), 'dW2')
    db2 = theano.shared(np.zeros(b2_init.shape, dtype=np.float32), 'db2')
    dW3 = theano.shared(np.zeros(W3_init.shape, dtype=np.float32), 'dW3')
    db3 = theano.shared(np.zeros(b3_init.shape, dtype=np.float32), 'db3')
    dW4 = theano.shared(np.zeros(W4_init.shape, dtype=np.float32), 'dW4')
    db4 = theano.shared(np.zeros(b4_init.shape, dtype=np.float32), 'db4')

    # forward pass
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
    pY = T.nnet.softmax( Z3.dot(W4) + b4)

    # define the cost function and prediction
    params = (W1, b1, W2, b2, W3, b3, W4, b4)
    reg_cost = reg*np.sum((param*param).sum() for param in params)
    cost = -(Y * T.log(pY)).sum() + reg_cost
    prediction = T.argmax(pY, axis=1)

    # step 3: training expressions and functions
    update_W1 = W1 + mu*dW1 - lr*T.grad(cost, W1)
    update_b1 = b1 + mu*db1 - lr*T.grad(cost, b1)
    update_W2 = W2 + mu*dW2 - lr*T.grad(cost, W2)
    update_b2 = b2 + mu*db2 - lr*T.grad(cost, b2)
    update_W3 = W3 + mu*dW3 - lr*T.grad(cost, W3)
    update_b3 = b3 + mu*db3 - lr*T.grad(cost, b3)
    update_W4 = W4 + mu*dW4 - lr*T.grad(cost, W4)
    update_b4 = b4 + mu*db4 - lr*T.grad(cost, b4)

    # update weight changes
    update_dW1 = mu*dW1 - lr*T.grad(cost, W1)
    update_db1 = mu*db1 - lr*T.grad(cost, b1)
    update_dW2 = mu*dW2 - lr*T.grad(cost, W2)
    update_db2 = mu*db2 - lr*T.grad(cost, b2)
    update_dW3 = mu*dW3 - lr*T.grad(cost, W3)
    update_db3 = mu*db3 - lr*T.grad(cost, b3)
    update_dW4 = mu*dW4 - lr*T.grad(cost, W4)
    update_db4 = mu*db4 - lr*T.grad(cost, b4)

    train = theano.function(
        inputs=[X, Y],
        updates=[
            (W1, update_W1),
            (b1, update_b1),
            (W2, update_W2),
            (b2, update_b2),
            (W3, update_W3),
            (b3, update_b3),
            (W4, update_W4),
            (b4, update_b4),
            (dW1, update_dW1),
            (db1, update_db1),
            (dW2, update_dW2),
            (db2, update_db2),
            (dW3, update_dW3),
            (db3, update_db3),
            (dW4, update_dW4),
            (db4, update_db4),
        ],
    )

    # create another function for this because we want it over the whole dataset
    get_prediction = theano.function(
        inputs=[X, Y],
        outputs=[cost, prediction],
    )

    t0 = datetime.now()
    LL = []
    for i in range(max_iter):
        for j in range(n_batches):
            Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
            Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]

            train(Xbatch, Ybatch)
            if j % print_period == 0:
                cost_val, prediction_val = get_prediction(Xtest, Ytest_ind)
                err = error_rate(prediction_val, Ytest)
                print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, cost_val, err))
                LL.append(cost_val)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()

    # visualize W1 (20, 3, 5, 5)
    W1_val = W1.get_value()
    grid = np.zeros((8*5, 8*5))
    m = 0
    n = 0
    for i in range(20):
        for j in range(3):
            filt = W1_val[i,j]
            grid[m*5:(m+1)*5,n*5:(n+1)*5] = filt
            m += 1
            if m >= 8:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W1")
    plt.show()

    # visualize W2 (50, 20, 5, 5)
    W2_val = W2.get_value()
    grid = np.zeros((32*5, 32*5))
    m = 0
    n = 0
    for i in range(50):
        for j in range(20):
            filt = W2_val[i,j]
            grid[m*5:(m+1)*5,n*5:(n+1)*5] = filt
            m += 1
            if m >= 32:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W2")
    plt.show()
def main():
    # step 1: load the data, transform as needed
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

    Xtest  = rearrange(test['X'])
    Ytest  = test['y'].flatten() - 1
    del test


    max_iter = 6
    print_period = 10

    lr = np.float32(1e-2)
    mu = np.float32(0.99)

    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    M = 500
    K = 10
    poolsz = (2, 2)

    # after conv will be of dimension 32 - 5 + 1 = 28
    # after downsample 28 / 2 = 14
    W1_shape = (20, 3, 5, 5) # (num_feature_maps, num_color_channels, filter_width, filter_height)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[0], dtype=np.float32) # one bias per output feature map

    # after conv will be of dimension 14 - 5 + 1 = 10
    # after downsample 10 / 2 = 5
    W2_shape = (50, 20, 5, 5) # (num_feature_maps, old_num_feature_maps, filter_width, filter_height)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[0], dtype=np.float32)

    # vanilla ANN weights
    W3_init = np.random.randn(W2_shape[0]*5*5, M) / np.sqrt(W2_shape[0]*5*5 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)


    # step 2: define theano variables and expressions
    X = T.tensor4('X', dtype='float32')
    Y = T.ivector('T')
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')
    W3 = theano.shared(W3_init.astype(np.float32), 'W3')
    b3 = theano.shared(b3_init, 'b3')
    W4 = theano.shared(W4_init.astype(np.float32), 'W4')
    b4 = theano.shared(b4_init, 'b4')

    # forward pass
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
    pY = T.nnet.softmax( Z3.dot(W4) + b4)

    # define the cost function and prediction
    cost = -(T.log(pY[T.arange(Y.shape[0]), Y])).mean()
    prediction = T.argmax(pY, axis=1)

    # step 3: training expressions and functions
    params = [W1, b1, W2, b2, W3, b3, W4, b4]

    # momentum changes
    dparams = [
        theano.shared(
            np.zeros_like(
                p.get_value(),
                dtype=np.float32
            )
        ) for p in params
    ]

    updates = []
    grads = T.grad(cost, params)
    for p, dp, g in zip(params, dparams, grads):
        dp_update = mu*dp - lr*g
        p_update = p + dp_update

        updates.append((dp, dp_update))
        updates.append((p, p_update))

    train = theano.function(
        inputs=[X, Y],
        updates=updates,
    )

    # create another function for this because we want it over the whole dataset
    get_prediction = theano.function(
        inputs=[X, Y],
        outputs=[cost, prediction],
    )

    t0 = datetime.now()
    costs = []
    for i in range(max_iter):
        Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
        for j in range(n_batches):
            Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
            Ybatch = Ytrain[j*batch_sz:(j*batch_sz + batch_sz),]

            train(Xbatch, Ybatch)
            if j % print_period == 0:
                cost_val, prediction_val = get_prediction(Xtest, Ytest)
                err = error_rate(prediction_val, Ytest)
                print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, cost_val, err))
                costs.append(cost_val)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(costs)
    plt.show()
示例#5
0
def main():
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    Xtrain = rearrange(train['X'])
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Ytrain = train['y'].flatten() - 1
    # print len(Ytrain)
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test

    # gradient descent params
    max_iter = 6  # epoch
    print_period = 10
    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    # limit samples since input will always have to be same size
    # you could also just do N = N / batch_sz * batch_sz
    Xtrain = Xtrain[:73000, ]
    Ytrain = Ytrain[:73000]
    Xtest = Xtest[:26000, ]
    Ytest = Ytest[:26000]

    # initial weights
    M = 500  # hidden units of ANN
    K = 10  # number of classes
    poolsz = (2, 2)

    # output is (N, 32, 32, 3), (#images, height, width, #color)
    # (filter_width, filter_height, num_color_channels, num_feature_maps)
    W1_shape = (5, 5, 3, 20)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(
        W1_shape[-1],
        dtype=np.float32)  # one bias per output feature map -- 20 bias

    # (filter_width, filter_height, num_feature_maps_in, num_feature_maps_out)
    W2_shape = (5, 5, 20, 50)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)  # -- 50 bias

    # vanilla ANN weights
    # finall shape of feature map is (8,8) ?
    W3_init = np.random.randn(W2_shape[-1] * 8 * 8,
                              M) / np.sqrt(W2_shape[-1] * 8 * 8 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    # define variables and expressions
    # using None as the first shape element takes up too much RAM unfortunately
    X = tf.placeholder(tf.float32, shape=(batch_sz, 32, 32, 3), name='X')
    T = tf.placeholder(tf.int32, shape=(batch_sz, ), name='T')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))

    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    # output is (N, h, w, #feature maps)
    # reshape: Z2_shape[0]: #images
    Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    Z3 = tf.nn.relu(tf.matmul(Z2r, W3) + b3)
    # logits
    Yish = tf.matmul(Z3, W4) + b4

    cost = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=Yish, labels=T))

    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(cost)

    # we'll use this to calculate the error rate
    predict_op = tf.argmax(Yish, 1)

    t0 = datetime.now()
    LL = []
    W1_val = None
    W2_val = None
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
                Ybatch = Ytrain[j * batch_sz:(j * batch_sz + batch_sz), ]

                if len(Xbatch) == batch_sz:
                    session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                    if j % print_period == 0:
                        # due to RAM limitations we need to have a fixed size input
                        # so as a result, we have this ugly total cost and prediction computation
                        test_cost = 0
                        prediction = np.zeros(len(Xtest))
                        for k in range(len(Xtest) // batch_sz):
                            Xtestbatch = Xtest[k * batch_sz:(k * batch_sz +
                                                             batch_sz), ]
                            Ytestbatch = Ytest[k * batch_sz:(k * batch_sz +
                                                             batch_sz), ]
                            test_cost += session.run(cost,
                                                     feed_dict={
                                                         X: Xtestbatch,
                                                         T: Ytestbatch
                                                     })
                            prediction[k *
                                       batch_sz:(k * batch_sz +
                                                 batch_sz)] = session.run(
                                                     predict_op,
                                                     feed_dict={X: Xtestbatch})

                        err = error_rate(prediction, Ytest)
                        print(
                            "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                            (i, j, test_cost, err))
                        LL.append(test_cost)

        W1_val = W1.eval()
        W2_val = W2.eval()

    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()

    W1_val = W1_val.transpose(3, 2, 0, 1)
    W2_val = W2_val.transpose(3, 2, 0, 1)

    # visualize W1 (20, 3, 5, 5)
    # W1_val = W1.get_value()
    grid = np.zeros((8 * 5, 8 * 5))
    m = 0
    n = 0
    for i in range(20):
        for j in range(3):
            filt = W1_val[i, j]
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = filt
            m += 1
            if m >= 8:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W1")
    plt.show()

    # visualize W2 (50, 20, 5, 5)
    # W2_val = W2.get_value()
    grid = np.zeros((32 * 5, 32 * 5))
    m = 0
    n = 0
    for i in range(50):
        for j in range(20):
            filt = W2_val[i, j]
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = filt
            m += 1
            if m >= 32:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W2")
    plt.show()
示例#6
0
def main():
    # step 1: load the data, transform as needed
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test

    max_iter = 6
    print_period = 10

    lr = np.float32(1e-2)
    mu = np.float32(0.99)

    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    M = 500
    K = 10
    poolsz = (2, 2)

    # after conv will be of dimension 32 - 5 + 1 = 28
    # after downsample 28 / 2 = 14
    W1_shape = (
        20, 3, 5, 5
    )  # (num_feature_maps, num_color_channels, filter_width, filter_height)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[0],
                       dtype=np.float32)  # one bias per output feature map

    # after conv will be of dimension 14 - 5 + 1 = 10
    # after downsample 10 / 2 = 5
    W2_shape = (
        50, 20, 5, 5
    )  # (num_feature_maps, old_num_feature_maps, filter_width, filter_height)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[0], dtype=np.float32)

    # vanilla ANN weights
    W3_init = np.random.randn(W2_shape[0] * 5 * 5,
                              M) / np.sqrt(W2_shape[0] * 5 * 5 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    # step 2: define theano variables and expressions
    X = T.tensor4('X', dtype='float32')
    Y = T.ivector('T')
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')
    W3 = theano.shared(W3_init.astype(np.float32), 'W3')
    b3 = theano.shared(b3_init, 'b3')
    W4 = theano.shared(W4_init.astype(np.float32), 'W4')
    b4 = theano.shared(b4_init, 'b4')

    # forward pass
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
    pY = T.nnet.softmax(Z3.dot(W4) + b4)

    # define the cost function and prediction
    cost = -(T.log(pY[T.arange(Y.shape[0]), Y])).mean()
    prediction = T.argmax(pY, axis=1)

    # step 3: training expressions and functions
    params = [W1, b1, W2, b2, W3, b3, W4, b4]

    # momentum changes
    dparams = [
        theano.shared(np.zeros_like(p.get_value(), dtype=np.float32))
        for p in params
    ]

    updates = []
    grads = T.grad(cost, params)
    for p, dp, g in zip(params, dparams, grads):
        dp_update = mu * dp - lr * g
        p_update = p + dp_update

        updates.append((dp, dp_update))
        updates.append((p, p_update))

    train = theano.function(
        inputs=[X, Y],
        updates=updates,
    )

    # create another function for this because we want it over the whole dataset
    get_prediction = theano.function(
        inputs=[X, Y],
        outputs=[cost, prediction],
    )

    t0 = datetime.now()
    costs = []
    for i in range(max_iter):
        Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
        for j in range(n_batches):
            Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
            Ybatch = Ytrain[j * batch_sz:(j * batch_sz + batch_sz), ]

            train(Xbatch, Ybatch)
            if j % print_period == 0:
                cost_val, prediction_val = get_prediction(Xtest, Ytest)
                err = error_rate(prediction_val, Ytest)
                print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                      (i, j, cost_val, err))
                costs.append(cost_val)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(costs)
    plt.show()
示例#7
0
def rearrange(X):
    # input is (32, 32, 3, N)
    # output is (N, 32, 32, 3)
    # N = X.shape[-1]
    # out = np.zeros((N, 32, 32, 3), dtype=np.float32)
    # for i in xrange(N):
    #     for j in xrange(3):
    #         out[i, :, :, j] = X[:, :, j, i]
    # return out / 255
    return (X.transpose(3, 0, 1, 2) / 255).astype(np.float32)


# In[3]:

train, test = get_data()

# Need to scale! don't leave as 0..255
# Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
# So flatten it and make it 0..9
# Also need indicator matrix for cost calculation
Xtrain = rearrange(train['X'])
Ytrain = train['y'].flatten() - 1
# print len(Ytrain)
del train
Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
Ytrain_ind = y2indicator(Ytrain)

Xtest = rearrange(test['X'])
Ytest = test['y'].flatten() - 1
del test
示例#8
0
def main():
    # step 1: load the data, transform as needed
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Ytrain_ind = y2indicator(Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test
    Ytest_ind = y2indicator(Ytest)

    max_iter = 8
    print_period = 10

    lr = np.float32(0.00001)
    reg = np.float32(0.01)
    mu = np.float32(0.99)

    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    M = 500
    K = 10
    poolsz = (2, 2)

    # after conv will be of dimension 32 - 5 + 1 = 28
    # after downsample 28 / 2 = 14
    W1_shape = (
        20, 3, 5, 5
    )  # (num_feature_maps, num_color_channels, filter_width, filter_height)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[0],
                       dtype=np.float32)  # one bias per output feature map

    # after conv will be of dimension 14 - 5 + 1 = 10
    # after downsample 10 / 2 = 5
    W2_shape = (
        50, 20, 5, 5
    )  # (num_feature_maps, old_num_feature_maps, filter_width, filter_height)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[0], dtype=np.float32)

    # vanilla ANN weights
    W3_init = np.random.randn(W2_shape[0] * 5 * 5,
                              M) / np.sqrt(W2_shape[0] * 5 * 5 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    # step 2: define theano variables and expressions
    X = T.tensor4('X', dtype='float32')
    Y = T.matrix('T')
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')
    W3 = theano.shared(W3_init.astype(np.float32), 'W3')
    b3 = theano.shared(b3_init, 'b3')
    W4 = theano.shared(W4_init.astype(np.float32), 'W4')
    b4 = theano.shared(b4_init, 'b4')

    # momentum changes
    dW1 = theano.shared(np.zeros(W1_init.shape, dtype=np.float32), 'dW1')
    db1 = theano.shared(np.zeros(b1_init.shape, dtype=np.float32), 'db1')
    dW2 = theano.shared(np.zeros(W2_init.shape, dtype=np.float32), 'dW2')
    db2 = theano.shared(np.zeros(b2_init.shape, dtype=np.float32), 'db2')
    dW3 = theano.shared(np.zeros(W3_init.shape, dtype=np.float32), 'dW3')
    db3 = theano.shared(np.zeros(b3_init.shape, dtype=np.float32), 'db3')
    dW4 = theano.shared(np.zeros(W4_init.shape, dtype=np.float32), 'dW4')
    db4 = theano.shared(np.zeros(b4_init.shape, dtype=np.float32), 'db4')

    # forward pass
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
    pY = T.nnet.softmax(Z3.dot(W4) + b4)

    # define the cost function and prediction
    params = (W1, b1, W2, b2, W3, b3, W4, b4)
    reg_cost = reg * sum((param * param).sum() for param in params)
    cost = -(Y * T.log(pY)).sum() + reg_cost
    prediction = T.argmax(pY, axis=1)

    # step 3: training expressions and functions
    update_W1 = W1 + mu * dW1 - lr * T.grad(cost, W1)
    update_b1 = b1 + mu * db1 - lr * T.grad(cost, b1)
    update_W2 = W2 + mu * dW2 - lr * T.grad(cost, W2)
    update_b2 = b2 + mu * db2 - lr * T.grad(cost, b2)
    update_W3 = W3 + mu * dW3 - lr * T.grad(cost, W3)
    update_b3 = b3 + mu * db3 - lr * T.grad(cost, b3)
    update_W4 = W4 + mu * dW4 - lr * T.grad(cost, W4)
    update_b4 = b4 + mu * db4 - lr * T.grad(cost, b4)

    # update weight changes
    update_dW1 = mu * dW1 - lr * T.grad(cost, W1)
    update_db1 = mu * db1 - lr * T.grad(cost, b1)
    update_dW2 = mu * dW2 - lr * T.grad(cost, W2)
    update_db2 = mu * db2 - lr * T.grad(cost, b2)
    update_dW3 = mu * dW3 - lr * T.grad(cost, W3)
    update_db3 = mu * db3 - lr * T.grad(cost, b3)
    update_dW4 = mu * dW4 - lr * T.grad(cost, W4)
    update_db4 = mu * db4 - lr * T.grad(cost, b4)

    train = theano.function(
        inputs=[X, Y],
        updates=[
            (W1, update_W1),
            (b1, update_b1),
            (W2, update_W2),
            (b2, update_b2),
            (W3, update_W3),
            (b3, update_b3),
            (W4, update_W4),
            (b4, update_b4),
            (dW1, update_dW1),
            (db1, update_db1),
            (dW2, update_dW2),
            (db2, update_db2),
            (dW3, update_dW3),
            (db3, update_db3),
            (dW4, update_dW4),
            (db4, update_db4),
        ],
    )

    # create another function for this because we want it over the whole dataset
    get_prediction = theano.function(
        inputs=[X, Y],
        outputs=[cost, prediction],
    )

    t0 = datetime.now()
    LL = []
    for i in range(max_iter):
        for j in range(n_batches):
            Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
            Ybatch = Ytrain_ind[j * batch_sz:(j * batch_sz + batch_sz), ]

            train(Xbatch, Ybatch)
            if j % print_period == 0:
                cost_val, prediction_val = get_prediction(Xtest, Ytest_ind)
                err = error_rate(prediction_val, Ytest)
                print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                      (i, j, cost_val, err))
                LL.append(cost_val)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()

    # visualize W1 (20, 3, 5, 5)
    W1_val = W1.get_value()
    grid = np.zeros((8 * 5, 8 * 5))
    m = 0
    n = 0
    for i in range(20):
        for j in range(3):
            filt = W1_val[i, j]
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = filt
            m += 1
            if m >= 8:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W1")
    plt.show()

    # visualize W2 (50, 20, 5, 5)
    W2_val = W2.get_value()
    grid = np.zeros((32 * 5, 32 * 5))
    m = 0
    n = 0
    for i in range(50):
        for j in range(20):
            filt = W2_val[i, j]
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = filt
            m += 1
            if m >= 32:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W2")
    plt.show()
def main():
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    # print len(Ytrain)
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

    Xtest  = rearrange(test['X'])
    Ytest  = test['y'].flatten() - 1
    del test

    # gradient descent params
    max_iter = 6
    print_period = 10
    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    # limit samples since input will always have to be same size
    # you could also just do N = N / batch_sz * batch_sz
    Xtrain = Xtrain[:73000,]
    Ytrain = Ytrain[:73000]
    Xtest = Xtest[:26000,]
    Ytest = Ytest[:26000]
    # print "Xtest.shape:", Xtest.shape
    # print "Ytest.shape:", Ytest.shape

    # initial weights
    M = 500
    K = 10
    poolsz = (2, 2)

    W1_shape = (5, 5, 3, 20) # (filter_width, filter_height, num_color_channels, num_feature_maps)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32) # one bias per output feature map

    W2_shape = (5, 5, 20, 50) # (filter_width, filter_height, old_num_feature_maps, num_feature_maps)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)

    # vanilla ANN weights
    W3_init = np.random.randn(W2_shape[-1]*8*8, M) / np.sqrt(W2_shape[-1]*8*8 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)


    # define variables and expressions
    # using None as the first shape element takes up too much RAM unfortunately
    X = tf.placeholder(tf.float32, shape=(batch_sz, 32, 32, 3), name='X')
    T = tf.placeholder(tf.int32, shape=(batch_sz,), name='T')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))

    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    Z3 = tf.nn.relu( tf.matmul(Z2r, W3) + b3 )
    Yish = tf.matmul(Z3, W4) + b4

    cost = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=Yish,
            labels=T
        )
    )

    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99, momentum=0.9).minimize(cost)

    # we'll use this to calculate the error rate
    predict_op = tf.argmax(Yish, 1)

    t0 = datetime.now()
    LL = []
    W1_val = None
    W2_val = None
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
                Ybatch = Ytrain[j*batch_sz:(j*batch_sz + batch_sz),]

                if len(Xbatch) == batch_sz:
                    session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                    if j % print_period == 0:
                        # due to RAM limitations we need to have a fixed size input
                        # so as a result, we have this ugly total cost and prediction computation
                        test_cost = 0
                        prediction = np.zeros(len(Xtest))
                        for k in range(len(Xtest) // batch_sz):
                            Xtestbatch = Xtest[k*batch_sz:(k*batch_sz + batch_sz),]
                            Ytestbatch = Ytest[k*batch_sz:(k*batch_sz + batch_sz),]
                            test_cost += session.run(cost, feed_dict={X: Xtestbatch, T: Ytestbatch})
                            prediction[k*batch_sz:(k*batch_sz + batch_sz)] = session.run(
                                predict_op, feed_dict={X: Xtestbatch})
                        err = error_rate(prediction, Ytest)
                        print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
                        LL.append(test_cost)

        W1_val = W1.eval()
        W2_val = W2.eval()
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()


    W1_val = W1_val.transpose(3, 2, 0, 1)
    W2_val = W2_val.transpose(3, 2, 0, 1)


    # visualize W1 (20, 3, 5, 5)
    # W1_val = W1.get_value()
    grid = np.zeros((8*5, 8*5))
    m = 0
    n = 0
    for i in range(20):
        for j in range(3):
            filt = W1_val[i,j]
            grid[m*5:(m+1)*5,n*5:(n+1)*5] = filt
            m += 1
            if m >= 8:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W1")
    plt.show()

    # visualize W2 (50, 20, 5, 5)
    # W2_val = W2.get_value()
    grid = np.zeros((32*5, 32*5))
    m = 0
    n = 0
    for i in range(50):
        for j in range(20):
            filt = W2_val[i,j]
            grid[m*5:(m+1)*5,n*5:(n+1)*5] = filt
            m += 1
            if m >= 32:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title("W2")
    plt.show()
示例#10
0
def main():
    train, test = get_data()
    Xtrain = rearrange(train['X'])
    # train['y'] has shape (N,1) and vals ranging 1:10; need shape (N,) and ranging 0:9
    Ytrain = train['y'].flatten() - 1
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Ytrain_ind = y2indicator(Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test
    Ytest_ind = y2indicator(Ytest)

    # grad. desc. params
    max_iter = 20
    print_period = 10
    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    # make num samples divisible by batch_sz so all batches are same sz
    Xtrain = Xtrain[:73000, ]
    Ytrain = Ytrain[:73000]
    Xtest = Xtest[:26000, ]
    Ytest = Ytest[:26000]
    Ytest_ind = Ytest_ind[:26000, ]

    # initial weights
    M = 500  # neurons in final layer
    K = 10  # num classes
    pool_sz = (2, 2)

    W1_shape = (
        5, 5, 3, 20
    )  # filter shape (width, height, num_color_channel, num_feature_maps(or filters))
    W1_init = init_filter(W1_shape,
                          pool_sz)  # pass in pool_sz for normalization
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)

    W2_shape = (5, 5, 20, 50)
    W2_init = init_filter(W2_shape, pool_sz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)

    # vanilla NN weights
    W3_init = np.random.randn(W2_shape[-1] * 8 * 8, M) / np.sqrt(
        W2_shape[-1] * 8 * 8 + M)  # 8 factor is result of
    # final convolution (2 convpool layers 32x32--> 16x16 --> 8x8 output_sz)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    X = tf.placeholder(tf.float32, shape=(batch_sz, 32, 32, 3), name='X')
    T = tf.placeholder(tf.float32, shape=(batch_sz, K), name='T')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))

    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])
                          ])  # reshape data to input to ANN layer
    Z3 = tf.nn.relu(tf.matmul(Z2r, W3) + b3)
    Yish = tf.matmul(Z3, W4) + b4

    cost = tf.reduce_sum(  # sums all elements in matrix
        tf.nn.softmax_cross_entropy_with_logits(
            # computes softmax with logits and labels
            logits=Yish,
            labels=T))

    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(cost)

    predict_op = tf.argmax(Yish, 1)

    t0 = datetime.now()
    LL = []

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
                Ybatch = Ytrain_ind[j * batch_sz:(j * batch_sz + batch_sz)]

                if len(Xbatch) == batch_sz:
                    sess.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                    if j % print_period == 0:
                        # due to RAM limiations, we need to have fixed size input
                        # as a result, need total clost and pred computation
                        test_cost = 0
                        prediction = np.zeros(len(Xtest))
                        # since tf var X is expecting input of batch_sz, need to loop throug Xtest
                        # in iterations of batch_sz
                        for k in range(len(Xtest) // batch_sz):
                            Xtestbatch = Xtest[k * batch_sz:(k * batch_sz +
                                                             batch_sz), ]
                            Ytestbatch = Ytest_ind[k * batch_sz:(k * batch_sz +
                                                                 batch_sz)]
                            test_cost += sess.run(cost,
                                                  feed_dict={
                                                      X: Xtestbatch,
                                                      T: Ytestbatch
                                                  })
                            prediction[k *
                                       batch_sz:(k * batch_sz +
                                                 batch_sz)] = sess.run(
                                                     predict_op,
                                                     feed_dict={X: Xtestbatch})
                        err = error_rate(prediction, Ytest)
                        print(
                            "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                            (i, j, test_cost, err))
                        LL.append(test_cost)
        print("Elapsed time:", (datetime.now() - t0))
        plt.plot(LL)
        plt.show()

def rearrange(X):
    # input is (32, 32, 3, N)
    # output is (N, 32, 32, 3)
    # N = X.shape[-1]
    # out = np.zeros((N, 32, 32, 3), dtype=np.float32)
    # for i in xrange(N):
    #     for j in xrange(3):
    #         out[i, :, :, j] = X[:, :, j, i]
    # return out / 255
    return (X.transpose(3, 0, 1, 2) / 255).astype(np.float32)



train, test = get_data()

# Need to scale! don't leave as 0..255
# Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
# So flatten it and make it 0..9
# Also need indicator matrix for cost calculation
Xtrain = rearrange(train['X'])
Ytrain = train['y'].flatten() - 1
# print len(Ytrain)
del train
Xtrain, Ytrain = shuffle(Xtrain, Ytrain)

Xtest  = rearrange(test['X'])
Ytest  = test['y'].flatten() - 1
del test
示例#12
0
def main():
    train, test = get_data()

    X_train = rearrange(train['X'])
    t_train = train['y'].flatten() - 1
    del train
    X_train, t_train = shuffle(X_train, t_train)
    X_test = rearrange(test['X'])
    t_test = test['y'].flatten() - 1
    del test

    # Gradient-descent parameters
    epochs = 6
    print_period = 10
    N = X_train.shape[0]
    batch_size = 500
    nb_batches = N // batch_size

    # Limit samples since input will always have to be same size
    # we could have done: N = N / batch_size * batch_size
    X_train = X_train[:73000, ]
    t_train = t_train[:73000]
    X_test = X_test[:26000, ]
    t_test = t_test[:26000]

    # Initial weights
    M = 500
    K = 10
    pool_size = (2, 2)
    # W*H*C1*features_map
    W0_shape = (5, 5, 3, 20)
    W0_init = init_filter(W0_shape, pool_size)
    b0_init = np.zeros(W0_shape[-1], dtype=np.float32)

    W1_shape = (5, 5, 20, 50)
    W1_init = init_filter(W1_shape, pool_size)
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)
    # ANN weights
    W2_init = np.random.randn(W1_shape[-1] * 8 * 8,
                              M) / np.sqrt(W1_shape[-1] * 8 * 8 + M)
    b2_init = np.zeros(M)
    W3_init = np.random.randn(M, K) / np.sqrt(M + K)
    b3_init = np.zeros(K)

    # tf environment
    X_pl = tf.placeholder(tf.float32, shape=(batch_size, 32, 32, 3), name='X')
    t_pl = tf.placeholder(tf.int32, shape=(batch_size, ), name='t')
    W0 = tf.Variable(W0_init.astype(np.float32))
    b0 = tf.Variable(b0_init.astype(np.float32))
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))

    # tf training environment
    A1 = convpool(X_pl, W0, b0)
    A2 = convpool(A1, W1, b1)
    A2_shape = A2.get_shape().as_list()
    A2r = tf.reshape(A2, [A2_shape[0], np.prod(A2.shape[1:])])
    A3 = tf.nn.relu(tf.matmul(A2r, W2) + b2)
    Z4 = tf.matmul(A3, W3) + b3
    J = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=t_pl, logits=Z4))
    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(J)

    # tf test environment
    y = tf.argmax(Z4, 1)

    # TRAIN & TEST
    t0 = datetime.now()
    tests_costs = []
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(epochs):
            for batch_id in range(nb_batches):
                X_train_batch = X_train[batch_id * batch_size:(batch_id + 1) *
                                        batch_size, ]
                t_train_batch = t_train[batch_id * batch_size:(batch_id + 1) *
                                        batch_size, ]
                if len(X_train_batch) == batch_size:
                    sess.run(train_op,
                             feed_dict={
                                 X_pl: X_train_batch,
                                 t_pl: t_train_batch
                             })
                    if batch_id % print_period == 0:
                        # due to RAM limitations we need to have a fixed input
                        # We took the size of a batch for the placeholder
                        # as a result we have this ugly total cost and prediction computation
                        j_test = 0
                        y_test = np.zeros(len(X_test))
                        for batch_test_id in range(len(X_test) // batch_size):
                            X_test_batch = X_test[batch_test_id *
                                                  batch_size:(batch_test_id +
                                                              1) *
                                                  batch_size, ]
                            t_test_batch = t_test[batch_test_id *
                                                  batch_size:(batch_test_id +
                                                              1) * batch_size]
                            j_test += sess.run(J,
                                               feed_dict={
                                                   X_pl: X_test_batch,
                                                   t_pl: t_test_batch
                                               })
                            y_test[batch_test_id *
                                   batch_size:(batch_test_id + 1) *
                                   batch_size, ] = sess.run(
                                       y, feed_dict={X_pl: X_test_batch})
                        tests_costs.append(j_test)
                        acc = accuracy(y_test, t_test)
                        print(
                            'Epoch {} batch_id {}: validation cost: {} - accuracy = {}%'
                            .format(epoch, batch_id, j_test, acc * 100))
    # W0_val = W0.eval()
    # W1_val = W1.eval()
    print('Elapsed time: {}'.format(datetime.now() - t0))
    #plt.plot(tests_costs)
    #plt.show()
    '''
示例#13
0
def main():
    train, test = get_data()

    # Need to scale! don't leave as 0..255
    # Y is a N x 1 matrix with values 1..10 (MATLAB indexes by 1)
    # So flatten it and make it 0..9
    # Also need indicator matrix for cost calculation
    Xtrain = rearrange(train['X'])
    Ytrain = train['y'].flatten() - 1
    # print len(Ytrain)
    del train
    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Ytrain_ind = y2indicator(Ytrain)

    Xtest = rearrange(test['X'])
    Ytest = test['y'].flatten() - 1
    del test
    Ytest_ind = y2indicator(Ytest)

    # gradient descent params
    max_iter = 6
    print_period = 10
    N = Xtrain.shape[0]
    batch_sz = 500
    n_batches = N // batch_sz

    Xtrain = Xtrain[:73000, ]
    Ytrain = Ytrain[:73000, ]
    Xtest = Xtest[:26000, ]
    Ytest = Ytest[:26000, ]

    M = 500
    K = 10
    poolsz = (2, 2)

    # W1
    # (filter_width, filter_height, num_color_channels, num_feature_maps)
    W1_shape = (5, 5, 3, 20)
    W1_init = init_filter(W1_shape, poolsz)
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)

    # W2
    # (filter_width, filter_height, num_color_channels, num_feature_maps)
    W2_shape = (5, 5, 20, 50)
    W2_init = init_filter(W2_shape, poolsz)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)

    # W3. FeedForward Network
    W3_init = np.random.randn(W2_shape[-1] * 8 * 8,
                              M) / np.sqrt(W2_shape[-1] * 8 * 8 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    # Tensorflow variables
    # using None as the first shape element takes up too much RAM unfortunately

    # Init X
    X = tf.placeholder(tf.float32,
                       shape=(batch_sz, Xtrain.shape[1], Xtrain.shape[2],
                              Xtrain.shape[3]))
    # Init T
    T = tf.placeholder(tf.float32, shape=(batch_sz, K), name='T')
    # Init Weights and Biases
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))
    # FeedForward operation
    Z1 = convpool(X, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    # Reshape to [N, W*H*C]
    Z2r = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    # Output to 2nd convpool layer, flattened
    # and multplied with FeedForward layer
    Z3 = tf.nn.relu(tf.matmul(Z2r, W3) + b3)
    # Output of FF layer, multiplied with Y output layer
    Yish = tf.matmul(Z3, W4) + b4

    cost = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits(logits=Yish, labels=T))

    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(cost)

    # we'll use this to calculate the error rate
    predict_op = tf.argmax(Yish, 1)

    t0 = datetime.now()
    LL = []
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        for i in range(max_iter):
            for j in range(n_batches):
                Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ]
                Ybatch = Ytrain_ind[j * batch_sz:(j * batch_sz + batch_sz), ]

                if len(Xbatch) == batch_sz:
                    session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
                    if j % print_period == 0:
                        # due to RAM limitations we need to have a fixed size input
                        # so as a result, we have this ugly total cost and prediction computation
                        test_cost = 0
                        prediction = np.zeros(len(Xtest))
                        for k in range(len(Xtest) // batch_sz):
                            Xtestbatch = Xtest[k * batch_sz:(k * batch_sz +
                                                             batch_sz), ]
                            Ytestbatch = Ytest_ind[k * batch_sz:(k * batch_sz +
                                                                 batch_sz), ]
                            # Accumulate test cost here
                            test_cost += session.run(cost,
                                                     feed_dict={
                                                         X: Xtestbatch,
                                                         T: Ytestbatch
                                                     })
                            # Only assign part of the prediction
                            prediction[k * batch_sz:(k * batch_sz +
                                                     batch_sz)] = session.run(
                                                         predict_op,
                                                         feed_dict={
                                                             X: Xtestbatch,
                                                             T: Ytestbatch
                                                         })
                        err = error_rate(prediction, Ytest)
                        print(
                            "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                            (i, j, test_cost, err))
                        LL.append(test_cost)
    print("Elapsed time:", (datetime.now() - t0))
    plt.plot(LL)
    plt.show()
示例#14
0
文件: cnn_tf.py 项目: ZUOYANGDING/dpl
def main():
    train, test = get_data()
    train_X = rearrange(train['X'])
    train_Y = train['y'].flatten() - 1
    train_X, train_Y = shuffle(train_X, train_Y)
    test_X = rearrange(test['X'])
    test_Y = test['y'].flatten() - 1
    del train
    del test

    max_iter = 6
    print_period = 10
    N = train_X.shape[0]
    batch_sz = 500
    num_batch = N // batch_sz
    train_X = train_X[:73000, ]
    train_Y = train_Y[:73000]
    test_X = test_X[:26000, ]
    test_Y = test_Y[:26000]

    #init weights and placeholders
    M = 500
    K = 10
    W1_shape = (5, 5, 3, 20)
    W1_init = init_filter(W1_shape)
    b1_init = np.zeros(W1_shape[-1], dtype=np.float32)
    W2_shape = (5, 5, 20, 50)
    W2_init = init_filter(W2_shape)
    b2_init = np.zeros(W2_shape[-1], dtype=np.float32)

    W3_init = np.random.randn(W2_shape[-1] * 8 * 8,
                              M) / np.sqrt(W2_shape[-1] * 8 * 8 + M)
    b3_init = np.zeros(M, dtype=np.float32)
    W4_init = np.random.randn(M, K) / np.sqrt(M + K)
    b4_init = np.zeros(K, dtype=np.float32)

    inputs = tf.placeholder(tf.float32,
                            shape=[batch_sz, 32, 32, 3],
                            name='inputs')
    labels = tf.placeholder(tf.int32, shape=[
        batch_sz,
    ], name='labels')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))
    W3 = tf.Variable(W3_init.astype(np.float32))
    b3 = tf.Variable(b3_init.astype(np.float32))
    W4 = tf.Variable(W4_init.astype(np.float32))
    b4 = tf.Variable(b4_init.astype(np.float32))

    #forward
    Z1 = convpool(inputs, W1, b1)
    Z2 = convpool(Z1, W2, b2)
    Z2_shape = Z2.get_shape().as_list()
    Z2_re = tf.reshape(Z2, [Z2_shape[0], np.prod(Z2_shape[1:])])
    Z3 = tf.nn.relu(tf.matmul(Z2_re, W3) + b3)
    logits = tf.matmul(Z3, W4) + b4

    #init functions
    cost = tf.reduce_sum(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels))
    train_op = tf.train.RMSPropOptimizer(0.0001, decay=0.99,
                                         momentum=0.9).minimize(cost)
    predict_op = tf.argmax(logits, axis=1)

    costs = []
    W1_value = None
    W2_value = None
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        for i in range(max_iter):
            shuffle_X, shuffle_Y = shuffle(train_X, train_Y)
            for j in range(num_batch):
                x = shuffle_X[j * batch_sz:(j * batch_sz + batch_sz), ]
                y = shuffle_Y[j * batch_sz:(j * batch_sz + batch_sz), ]

                if len(x) == batch_sz:
                    session.run(train_op, feed_dict={inputs: x, labels: y})
                    if j % print_period == 0:
                        test_cost = 0
                        prediction = np.zeros(len(test_X))
                        for k in range(len(test_X) // batch_sz):
                            Xtestbatch = test_X[k * batch_sz:(k * batch_sz +
                                                              batch_sz), ]
                            Ytestbatch = test_Y[k * batch_sz:(k * batch_sz +
                                                              batch_sz), ]
                            test_cost += session.run(cost,
                                                     feed_dict={
                                                         inputs: Xtestbatch,
                                                         labels: Ytestbatch
                                                     })
                            prediction[k * batch_sz:(k * batch_sz +
                                                     batch_sz)] = session.run(
                                                         predict_op,
                                                         feed_dict={
                                                             inputs: Xtestbatch
                                                         })
                        err = error_rate(prediction, test_Y)
                        costs.append(test_cost)
                        print(
                            "Cost / err at iteration i=%d, j=%d: %.3f / %.3f" %
                            (i, j, test_cost, err))
        W1_value = W1.eval()
        W2_value = W2.eval()
    plt.plot(costs)
    plt.show()

    W1_value = W1_value.transpose(3, 2, 0, 1)
    W2_value = W2_value.transpose(3, 2, 0, 1)

    #input 3 chanels, output 20 chanels, use 8*8=64 grids and left final 4 empty
    grid = np.zeros((8 * 5, 8 * 5))
    m = 0
    n = 0
    for i in range(20):
        for j in range(3):
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = W1_value[i, j]
            m += 1
            if m >= 8:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title('W1')
    plt.show()

    #input 20, output 50, total is 1000. use 32*32=1024 grids and left final 24 empty
    grid = np.zeros((32 * 5, 32 * 5))
    m = 0
    n = 0
    for i in range(50):
        for j in range(20):
            grid[m * 5:(m + 1) * 5, n * 5:(n + 1) * 5] = W2_value[i, j]
            m += 1
            if m >= 32:
                m = 0
                n += 1
    plt.imshow(grid, cmap='gray')
    plt.title('W2')
    plt.show()
示例#15
0
def main():
	train, test = get_data()
	train_X = rearrange(train['X'])
	train_Y = train['y'].flatten()-1
	train_X, train_Y = shuffle(train_X, train_Y)
	test_X = rearrange(test['X'])
	test_Y = test['y'].flatten()-1

	max_iter = 6
	print_period = 10
	lr = np.float32(0.0001)
	mu = np.float32(0.99)
	decay = np.float32(0.9)
	eps = np.float32(1e-10)
	reg = np.float32(0.01)
	N = train_X.shape[0]
	batch_sz = 500
	num_batch = N // batch_sz
	M = 500
	K = 10
	poolsz = (2, 2)

	W1_shape = (20, 3, 5, 5) #(num_feature_maps, num_color_channels, filter_width, filter_height)
	W1_init = init_filter(W1_shape, poolsz)
	b1_init = np.zeros(W1_shape[0], dtype=np.float32)

	W2_shape = (50, 20, 5, 5) #(num_feature_maps, old_num_feature_maps, filter_width, filter_height)
	W2_init = init_filter(W2_shape, poolsz)
	b2_init = np.zeros(W2_shape[0], dtype=np.float32)

	#ANN
	W3_init = np.random.randn(W2_shape[0]*5*5, M) / np.sqrt(W2_shape[0]*5*5 + M)
	b3_init = np.zeros(M, dtype=np.float32)
	W4_init = np.random.randn(M, K) / np.sqrt(M+K)
	b4_init = np.zeros(K, dtype=np.float32)

	#init theano variables
	X = T.tensor4('X', dtype='float32')
	Y = T.ivector('T')
	W1 = theano.shared(W1_init, 'W1')
	b1 = theano.shared(b1_init, 'b1')
	W2 = theano.shared(W2_init, 'W2')
	b2 = theano.shared(b2_init, 'b2')
	W3 = theano.shared(W3_init.astype(np.float32), 'W3')
	b3 = theano.shared(b3_init, 'b3')
	W4 = theano.shared(W4_init.astype(np.float32), 'W4')
	b4 = theano.shared(b4_init, 'b4')

	#forward
	Z1 = convpool(X, W1, b1)
	Z2 = convpool(Z1, W2, b2)
	Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
	pY = T.nnet.softmax(Z3.dot(W4) + b4)
	
	#test & prediction functions
	params = [W1, b1, W2, b2, W3, b3, W4, b4]
	rcost = reg * np.sum((p*p).sum() for p in params)
	cost = -(T.log(pY[T.arange(Y.shape[0]), Y])).mean() + rcost
	prediction = T.argmax(pY, axis=1)
	momentum = [theano.shared(
		np.zeros_like(p.get_value(), dtype=np.float32)) for p in params]
	catchs = [theano.shared(
		np.ones_like(p.get_value(), dtype=np.float32)) for p in params]
	
	#RMSProp
	updates = []
	grads = T.grad(cost, params)
	for p, g, m, c in zip(params, grads, momentum, catchs):
		updates_c = decay*c + (np.float32(1.0)-decay)*g*g
		updates_m = mu*m - lr*g / T.sqrt(updates_c + eps)
		updates_p = p + updates_m

		updates.append([c, updates_c])
		updates.append([m, updates_m])
		updates.append([p, updates_p])

	#init functions
	train_op = theano.function(inputs=[X, Y], updates=updates)
	prediction_op = theano.function(inputs=[X, Y], outputs=[cost, prediction])

	costs= []
	for i in range(max_iter):
		shuffle_X, shuffle_Y = shuffle(train_X, train_Y)
		for j in range(num_batch):
			x = shuffle_X[j*batch_sz : (j*batch_sz+batch_sz), :]
			y = shuffle_Y[j*batch_sz : (j*batch_sz+batch_sz)]

			train_op(x, y)
			if j % print_period == 0:
				cost_val, p_val = prediction_op(test_X, test_Y)
				e = error_rate(p_val, test_Y)
				costs.append(cost_val)
				print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, cost_val, e))
	plt.plot(costs)
	plt.show()