Пример #1
0
def test_hardrelu_values():
    npr.seed(1)

    for ii in xrange(NUM_TRIALS):
        np_X = npr.randn(6, 5)
        X = kayak.Parameter(np_X)
        Y = kayak.HardReLU(X)

        assert np.all(Y.value >= 0.0)
        assert np.all(np.maximum(np_X, 0.0) == Y.value)
Пример #2
0
def test_hardrelu_grad():
    npr.seed(2)

    # Needs to be small due to non-differentiability.
    epsilon = 1e-6

    for ii in xrange(NUM_TRIALS):
        np_X = npr.randn(6, 5)
        X = kayak.Parameter(np_X)
        Y = kayak.HardReLU(X)
        Z = kayak.MatSum(Y)

        Z.value
        assert np.all(Z.grad(X) >= 0.0)
        print "CHECKGRAD: ", ii, kayak.util.checkgrad(X, Z, epsilon)
        assert kayak.util.checkgrad(X, Z, epsilon) < MAX_GRAD_DIFF
def kayak_mlp(X, y):
    """
    Kayak implementation of a mlp with relu hidden layers and dropout
    """
    # Create a batcher object.
    batcher = kayak.Batcher(batch_size, X.shape[0])

    # count number of rows and columns
    num_examples, num_features = np.shape(X)

    X = kayak.Inputs(X, batcher)
    T = kayak.Targets(y, batcher)

    # ----------------------------- first hidden layer -------------------------------

    # set up weights for our input layer
    # use the same scheme as our numpy mlp
    input_range = 1.0 / num_features**(1 / 2)
    weights_1 = kayak.Parameter(0.1 * np.random.randn(X.shape[1], layer1_size))
    bias_1 = kayak.Parameter(0.1 * np.random.randn(1, layer1_size))

    # linear combination of weights and inputs
    hidden_1_input = kayak.ElemAdd(kayak.MatMult(X, weights_1), bias_1)

    # apply activation function to hidden layer
    hidden_1_activation = kayak.HardReLU(hidden_1_input)

    # apply a dropout for regularization
    hidden_1_out = kayak.Dropout(hidden_1_activation,
                                 layer1_dropout,
                                 batcher=batcher)

    # ----------------------------- output layer -----------------------------------

    weights_out = kayak.Parameter(0.1 * np.random.randn(layer1_size, 9))
    bias_out = kayak.Parameter(0.1 * np.random.randn(1, 9))

    # linear combination of layer2 output and output weights
    out = kayak.ElemAdd(kayak.MatMult(hidden_1_out, weights_out), bias_out)

    # apply activation function to output
    yhat = kayak.SoftMax(out)

    # ----------------------------- loss function -----------------------------------

    loss = kayak.MatAdd(kayak.MatSum(kayak.L2Loss(yhat, T)),
                        kayak.L2Norm(weights_1, layer1_l2))

    # Use momentum for the gradient-based optimization.
    mom_grad_W1 = np.zeros(weights_1.shape)
    mom_grad_W2 = np.zeros(weights_out.shape)

    # Loop over epochs.
    plot_loss = np.ones((iterations, 2))
    for epoch in xrange(iterations):

        # Track the total loss.
        total_loss = 0.0

        for batch in batcher:
            # Compute the loss of this minibatch by asking the Kayak
            # object for its value and giving it reset=True.
            total_loss += loss.value

            # Now ask the loss for its gradient in terms of the
            # weights and the biases -- the two things we're trying to
            # learn here.
            grad_W1 = loss.grad(weights_1)
            grad_B1 = loss.grad(bias_1)
            grad_W2 = loss.grad(weights_out)
            grad_B2 = loss.grad(bias_out)

            # Use momentum on the weight gradients.
            mom_grad_W1 = momentum * mom_grad_W1 + (1.0 - momentum) * grad_W1
            mom_grad_W2 = momentum * mom_grad_W2 + (1.0 - momentum) * grad_W2

            # Now make the actual parameter updates.
            weights_1.value -= learn_rate * mom_grad_W1
            bias_1.value -= learn_rate * grad_B1
            weights_out.value -= learn_rate * mom_grad_W2
            bias_out.value -= learn_rate * grad_B2

        # save values into table to print learning curve at the end of trianing
        plot_loss[epoch, 0] = epoch
        plot_loss[epoch, 1] = total_loss
        print epoch, total_loss

    #pyplot.plot(plot_loss[:,0], plot_loss[:,1], linewidth=2.0)
    #pyplot.show()

    def compute_predictions(x):
        X.data = x
        batcher.test_mode()
        return yhat.value

    return compute_predictions
Пример #4
0
Y = npr.randn(N, P)

batcher = kayak.Batcher(batch_size, N)

# Build network.
kyk_inputs = kayak.Inputs(X, batcher)

# Labels.
kyk_targets = kayak.Targets(Y, batcher)

# First layer weights and biases.
kyk_W1 = kayak.Parameter(npr.randn(D, H1))
kyk_B1 = kayak.Parameter(npr.randn(1, H1))

# First layer weight mult plus biases, then nonlinearity.
kyk_H1 = kayak.Dropout(kayak.HardReLU(
    kayak.ElemAdd(kayak.MatMult(kyk_inputs, kyk_W1), kyk_B1)),
                       drop_prob=0.5,
                       batcher=batcher)

# Second layer weights and bias.
kyk_W2 = kayak.Parameter(npr.randn(H1, P))
kyk_B2 = kayak.Parameter(npr.randn(1, P))

# Second layer multiplication.
kyk_out = kayak.Dropout(kayak.HardReLU(
    kayak.ElemAdd(kayak.MatMult(kyk_H1, kyk_W2), kyk_B2)),
                        drop_prob=0.5,
                        batcher=batcher)

# Elementwise Loss.
kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets)
Пример #5
0
def train(inputs, targets):
    # Create a batcher object.
    batcher = kayak.Batcher(batch_size, inputs.shape[0])

    # Inputs and targets need access to the batcher.
    X = kayak.Inputs(inputs, batcher)
    T = kayak.Targets(targets, batcher)

    # First-layer weights and biases, with random initializations.
    W1 = kayak.Parameter(0.1 * npr.randn(inputs.shape[1], layer1_sz))
    B1 = kayak.Parameter(0.1 * npr.randn(1, layer1_sz))

    # First hidden layer: ReLU + Dropout
    H1 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(X, W1), B1)),
                       layer1_dropout,
                       batcher=batcher)

    # Second-layer weights and biases, with random initializations.
    W2 = kayak.Parameter(0.1 * npr.randn(layer1_sz, layer2_sz))
    B2 = kayak.Parameter(0.1 * npr.randn(1, layer2_sz))

    # Second hidden layer: ReLU + Dropout
    H2 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(H1, W2),
                                                    B2)),
                       layer2_dropout,
                       batcher=batcher)

    # Output layer weights and biases, with random initializations.
    W3 = kayak.Parameter(0.1 * npr.randn(layer2_sz, 10))
    B3 = kayak.Parameter(0.1 * npr.randn(1, 10))

    # Output layer.
    Y = kayak.LogSoftMax(kayak.ElemAdd(kayak.MatMult(H2, W3), B3))

    # The training loss is negative multinomial log likelihood.
    loss = kayak.MatSum(kayak.LogMultinomialLoss(Y, T))

    # Use momentum for the gradient-based optimization.
    mom_grad_W1 = np.zeros(W1.shape)
    mom_grad_W2 = np.zeros(W2.shape)
    mom_grad_W3 = np.zeros(W3.shape)

    # Loop over epochs.
    for epoch in xrange(10):

        # Track the total loss.
        total_loss = 0.0

        # Loop over batches -- using batcher as iterator.
        for batch in batcher:
            # Compute the loss of this minibatch by asking the Kayak
            # object for its value and giving it reset=True.
            total_loss += loss.value

            # Now ask the loss for its gradient in terms of the
            # weights and the biases -- the two things we're trying to
            # learn here.
            grad_W1 = loss.grad(W1)
            grad_B1 = loss.grad(B1)
            grad_W2 = loss.grad(W2)
            grad_B2 = loss.grad(B2)
            grad_W3 = loss.grad(W3)
            grad_B3 = loss.grad(B3)

            # Use momentum on the weight gradients.
            mom_grad_W1 = momentum * mom_grad_W1 + (1.0 - momentum) * grad_W1
            mom_grad_W2 = momentum * mom_grad_W2 + (1.0 - momentum) * grad_W2
            mom_grad_W3 = momentum * mom_grad_W3 + (1.0 - momentum) * grad_W3

            # Now make the actual parameter updates.
            W1.value -= learn_rate * mom_grad_W1
            B1.value -= learn_rate * grad_B1
            W2.value -= learn_rate * mom_grad_W2
            B2.value -= learn_rate * grad_B2
            W3.value -= learn_rate * mom_grad_W3
            B3.value -= learn_rate * grad_B3

        print epoch, total_loss

    # After we've trained, we return a sugary little function handle
    # that makes things easy.  Basically, what we're doing here is
    # handing the output object (not the loss!) a dictionary where the
    # key is the Kayak input object 'X' (that is the features being
    # used here for logistic regression) and the value in that
    # dictionary is being determined by the argument to the lambda
    # expression.  The point here is that we wind up with a function
    # handle the can be called with a numpy object and it produces the
    # target values for novel data, using the parameters we just learned.

    def compute_predictions(x):
        X.data = x
        batcher.test_mode()
        return Y.value

    return compute_predictions