def test_vector_value(): npr.seed(3) for ii in xrange(NUM_TRIALS): np_pred = npr.randn(10, 1) np_targ = npr.randn(10, 1) pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ) assert close_float(out.value, np.sum((np_pred - np_targ)**2))
def __init__(self, maxnum, reduced_dims): self.threshold = 1e-2 dummyword = np.zeros((maxnum, 1)) W1 = np.random.randn(reduced_dims, maxnum) * 0.1 W2 = np.random.randn(maxnum, reduced_dims) * 0.1 self.input = ky.Parameter(dummyword) self.W1 = ky.Parameter(W1) self.W2 = ky.Parameter(W2) self.output = ky.MatMult(self.W1, self.input) self.recons = ky.MatMult(self.W2, self.output) self.loss = ky.MatSum(ky.L2Loss(self.recons, self.input)) #self.totloss = ky.MatAdd(self.loss,ky.L2Norm(self.W2,weight=1e-2),ky.L2Norm(self.W1,weight = 1e-2)) self.totloss = self.loss
def test_matrix_grad(): npr.seed(6) for ii in xrange(NUM_TRIALS): np_pred = npr.randn(10, 20) np_targ = npr.randn(10, 20) pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ) assert np.all(close_float(out.grad(pred), 2 * (np_pred - np_targ))) assert kayak.util.checkgrad(pred, out) < 1e-6
def test_scalar_value(): npr.seed(1) for ii in xrange(NUM_TRIALS): np_pred = npr.randn() np_targ = npr.randn() pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ) # Verify that a scalar is reproduced. assert close_float(out.value, (np_pred - np_targ)**2)
def test_matrix_value_1(): npr.seed(5) for ii in xrange(NUM_TRIALS): np_pred = npr.randn(10, 20) np_targ = npr.randn(10, 20) pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ) print out.value, (np_pred - np_targ)**2 assert close_float(out.value, np.sum((np_pred - np_targ)**2))
def test_scalar_grad(): npr.seed(2) for ii in xrange(NUM_TRIALS): np_pred = npr.randn() np_targ = npr.randn() pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ) assert close_float(out.grad(pred), 2 * (np_pred - np_targ)) assert kayak.util.checkgrad(pred, out) < 1e-6
def test_matrix_value_2(): npr.seed(7) for ii in xrange(NUM_TRIALS): np_pred = npr.randn(10, 20) np_targ = npr.randn(10, 20) pred = kayak.Parameter(np_pred) targ = kayak.Targets(np_targ) out = kayak.L2Loss(pred, targ, axis=0) print out.value, np.sum((np_pred - np_targ)**2, axis=0) assert np.all( close_float(out.value, np.sum((np_pred - np_targ)**2, axis=0)))
def kayak_mlp(X, y): """ Kayak implementation of a mlp with relu hidden layers and dropout """ # Create a batcher object. batcher = kayak.Batcher(batch_size, X.shape[0]) # count number of rows and columns num_examples, num_features = np.shape(X) X = kayak.Inputs(X, batcher) T = kayak.Targets(y, batcher) # ----------------------------- first hidden layer ------------------------------- # set up weights for our input layer # use the same scheme as our numpy mlp input_range = 1.0 / num_features**(1 / 2) weights_1 = kayak.Parameter(0.1 * np.random.randn(X.shape[1], layer1_size)) bias_1 = kayak.Parameter(0.1 * np.random.randn(1, layer1_size)) # linear combination of weights and inputs hidden_1_input = kayak.ElemAdd(kayak.MatMult(X, weights_1), bias_1) # apply activation function to hidden layer hidden_1_activation = kayak.HardReLU(hidden_1_input) # apply a dropout for regularization hidden_1_out = kayak.Dropout(hidden_1_activation, layer1_dropout, batcher=batcher) # ----------------------------- output layer ----------------------------------- weights_out = kayak.Parameter(0.1 * np.random.randn(layer1_size, 9)) bias_out = kayak.Parameter(0.1 * np.random.randn(1, 9)) # linear combination of layer2 output and output weights out = kayak.ElemAdd(kayak.MatMult(hidden_1_out, weights_out), bias_out) # apply activation function to output yhat = kayak.SoftMax(out) # ----------------------------- loss function ----------------------------------- loss = kayak.MatAdd(kayak.MatSum(kayak.L2Loss(yhat, T)), kayak.L2Norm(weights_1, layer1_l2)) # Use momentum for the gradient-based optimization. mom_grad_W1 = np.zeros(weights_1.shape) mom_grad_W2 = np.zeros(weights_out.shape) # Loop over epochs. plot_loss = np.ones((iterations, 2)) for epoch in xrange(iterations): # Track the total loss. total_loss = 0.0 for batch in batcher: # Compute the loss of this minibatch by asking the Kayak # object for its value and giving it reset=True. total_loss += loss.value # Now ask the loss for its gradient in terms of the # weights and the biases -- the two things we're trying to # learn here. grad_W1 = loss.grad(weights_1) grad_B1 = loss.grad(bias_1) grad_W2 = loss.grad(weights_out) grad_B2 = loss.grad(bias_out) # Use momentum on the weight gradients. mom_grad_W1 = momentum * mom_grad_W1 + (1.0 - momentum) * grad_W1 mom_grad_W2 = momentum * mom_grad_W2 + (1.0 - momentum) * grad_W2 # Now make the actual parameter updates. weights_1.value -= learn_rate * mom_grad_W1 bias_1.value -= learn_rate * grad_B1 weights_out.value -= learn_rate * mom_grad_W2 bias_out.value -= learn_rate * grad_B2 # save values into table to print learning curve at the end of trianing plot_loss[epoch, 0] = epoch plot_loss[epoch, 1] = total_loss print epoch, total_loss #pyplot.plot(plot_loss[:,0], plot_loss[:,1], linewidth=2.0) #pyplot.show() def compute_predictions(x): X.data = x batcher.test_mode() return yhat.value return compute_predictions
Y = np.dot(X, true_W) + 0.1 * npr.randn(N, P) kyk_batcher = kayak.Batcher(batch_size, N) # Build network. kyk_inputs = kayak.Inputs(X, kyk_batcher) # Labels. kyk_targets = kayak.Targets(Y, kyk_batcher) # Weights. W = 0.01 * npr.randn(D, P) kyk_W = kayak.Parameter(W) # Linear layer. kyk_out = kayak.MatMult(kyk_inputs, kyk_W) # Elementwise Loss. kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets) # Sum the losses. kyk_loss = kayak.MatSum(kyk_el_loss) for ii in xrange(100): for batch in kyk_batcher: loss = kyk_loss.value print loss, np.sum((kyk_W.value - true_W)**2) grad = kyk_loss.grad(kyk_W) kyk_W.value -= learn * grad