示例#1
0
	def train(self, dev_train, dev_lbl):

		# Train neural network.
		start_time = time.time()
		for epoch in range(self.num_epochs):
		    print "Epoch " + str(epoch + 1)
		    err = []

		    for batch in range(self.num_batches):
		        # get current minibatch
		        inp = dev_train.slice(batch*self.batch_size,(batch + 1)*self.batch_size)
		        target = dev_lbl.slice(batch*self.batch_size,(batch + 1)*self.batch_size)

		        # forward pass
		        cm.dot(self.w_w1.T, inp, target = self.h)

		        self.h.add_col_vec(self.w_b1)
		        self.h.apply_sigmoid()

		        cm.dot(self.w_w2.T, self.h, target = self.out)

		        self.out.add_col_vec(self.w_b2)
		        self.out.apply_sigmoid()

		        # back prop errors
		        self.out.subtract(target) # compute error

		        # gradients for w_w2 and w_b2
		        self.wu_w2.add_dot(self.h, self.out.T, beta = self.momentum)
		        self.wu_b2.add_sums(self.out, axis = 1, beta = self.momentum)

		        # compute delta
		        cm.dot(self.w_w2, self.out, target = self.delta)

		        # delta = delta * h * (1 - h)
		        cl.mult_by_sigmoid_deriv(self.delta, self.h)

		        # gradients for w_w1 and w_b1
		        self.wu_w1.add_dot(inp, self.delta.T, beta = self.momentum)
		        self.wu_b1.add_sums(self.delta, axis = 1, beta = self.momentum)

		        # update weights
		        self.w_w1.subtract_mult(self.wu_w1, self.epsilon/self.batch_size)
		        self.w_b1.subtract_mult(self.wu_b1, self.epsilon/self.batch_size)
		        self.w_w2.subtract_mult(self.wu_w2, self.epsilon/self.batch_size)
		        self.w_b2.subtract_mult(self.wu_b2, self.epsilon/self.batch_size)

		        # calculate error on current minibatch 
		        err.append(np.abs(self.out.asarray())>0.5)

		    print "Training misclassification rate: " + str(np.mean(err))
		    print "Time: " + str(time.time() - start_time)
示例#2
0
def test_mult_by_sigmoid_deriv():
    m = 256
    n = 128
    c_targets = np.array(np.random.randn(m, n)*10, dtype=np.float32, order='F')
    c_acts = np.array(np.random.rand(m, n), dtype=np.float32, order='F')

    g_targets = cm.CUDAMatrix(c_targets)
    g_acts = cm.CUDAMatrix(c_acts)

    c_targets = c_targets * c_acts * (1. - c_acts)
    cl.mult_by_sigmoid_deriv(g_targets, g_acts)

    assert np.max(np.abs(c_acts - g_acts.asarray())) < 10**-2, "Error in cudamat.learn.mult_by_sigmoid_deriv exceeded threshold"
示例#3
0
def test_mult_by_sigmoid_deriv():
    m = 256
    n = 128
    c_targets = np.array(np.random.randn(m, n) * 10,
                         dtype=np.float32,
                         order='F')
    c_acts = np.array(np.random.rand(m, n), dtype=np.float32, order='F')

    g_targets = cm.CUDAMatrix(c_targets)
    g_acts = cm.CUDAMatrix(c_acts)

    c_targets = c_targets * c_acts * (1. - c_acts)
    cl.mult_by_sigmoid_deriv(g_targets, g_acts)

    assert np.max(
        np.abs(c_acts - g_acts.asarray())
    ) < 10**-2, "Error in cudamat.learn.mult_by_sigmoid_deriv exceeded threshold"
示例#4
0
        out.add_col_vec(w_b2)
        out.apply_sigmoid()

        # back prop errors
        out.subtract(target) # compute error

        # gradients for w_w2 and w_b2
        wu_w2.add_dot(h, out.T)
        wu_b2.add_sums(out, axis = 1)

        # compute delta
        cm.dot(w_w2, out, target = delta)

        # delta = delta * h * (1 - h)
        cl.mult_by_sigmoid_deriv(delta, h)

        # gradients for w_w1 and w_b1
        wu_w1.add_dot(inp, delta.T)
        wu_b1.add_sums(delta, axis = 1)

        # update weights
        w_w1.subtract_mult(wu_w1, epsilon/batch_size)
        w_b1.subtract_mult(wu_b1, epsilon/batch_size)
        w_w2.subtract_mult(wu_w2, epsilon/batch_size)
        w_b2.subtract_mult(wu_b2, epsilon/batch_size)

        # calculate error on current minibatch 
        err.append(np.abs(out.asarray())>0.5)

    print "Training misclassification rate: " + str(np.mean(err))
示例#5
0
        out.add_col_vec(w_b2)
        out.apply_sigmoid()

        # back prop errors
        out.subtract(target)  # compute error

        # gradients for w_w2 and w_b2
        wu_w2.add_dot(h, out.T, beta=momentum)
        wu_b2.add_sums(out, axis=1, beta=momentum)

        # compute delta
        cm.dot(w_w2, out, target=delta)

        # delta = delta * h * (1 - h)
        cl.mult_by_sigmoid_deriv(delta, h)

        # gradients for w_w1 and w_b1
        wu_w1.add_dot(inp, delta.T, beta=momentum)
        wu_b1.add_sums(delta, axis=1, beta=momentum)

        # update weights
        w_w1.subtract_mult(wu_w1, epsilon / batch_size)
        w_b1.subtract_mult(wu_b1, epsilon / batch_size)
        w_w2.subtract_mult(wu_w2, epsilon / batch_size)
        w_b2.subtract_mult(wu_b2, epsilon / batch_size)

        # calculate error on current minibatch
        err.append(np.abs(out.asarray()) > 0.5)

    print("Training misclassification rate: %f" % np.mean(err))