def train(self, dev_train, dev_lbl): # Train neural network. start_time = time.time() for epoch in range(self.num_epochs): print "Epoch " + str(epoch + 1) err = [] for batch in range(self.num_batches): # get current minibatch inp = dev_train.slice(batch*self.batch_size,(batch + 1)*self.batch_size) target = dev_lbl.slice(batch*self.batch_size,(batch + 1)*self.batch_size) # forward pass cm.dot(self.w_w1.T, inp, target = self.h) self.h.add_col_vec(self.w_b1) self.h.apply_sigmoid() cm.dot(self.w_w2.T, self.h, target = self.out) self.out.add_col_vec(self.w_b2) self.out.apply_sigmoid() # back prop errors self.out.subtract(target) # compute error # gradients for w_w2 and w_b2 self.wu_w2.add_dot(self.h, self.out.T, beta = self.momentum) self.wu_b2.add_sums(self.out, axis = 1, beta = self.momentum) # compute delta cm.dot(self.w_w2, self.out, target = self.delta) # delta = delta * h * (1 - h) cl.mult_by_sigmoid_deriv(self.delta, self.h) # gradients for w_w1 and w_b1 self.wu_w1.add_dot(inp, self.delta.T, beta = self.momentum) self.wu_b1.add_sums(self.delta, axis = 1, beta = self.momentum) # update weights self.w_w1.subtract_mult(self.wu_w1, self.epsilon/self.batch_size) self.w_b1.subtract_mult(self.wu_b1, self.epsilon/self.batch_size) self.w_w2.subtract_mult(self.wu_w2, self.epsilon/self.batch_size) self.w_b2.subtract_mult(self.wu_b2, self.epsilon/self.batch_size) # calculate error on current minibatch err.append(np.abs(self.out.asarray())>0.5) print "Training misclassification rate: " + str(np.mean(err)) print "Time: " + str(time.time() - start_time)
def test_mult_by_sigmoid_deriv(): m = 256 n = 128 c_targets = np.array(np.random.randn(m, n)*10, dtype=np.float32, order='F') c_acts = np.array(np.random.rand(m, n), dtype=np.float32, order='F') g_targets = cm.CUDAMatrix(c_targets) g_acts = cm.CUDAMatrix(c_acts) c_targets = c_targets * c_acts * (1. - c_acts) cl.mult_by_sigmoid_deriv(g_targets, g_acts) assert np.max(np.abs(c_acts - g_acts.asarray())) < 10**-2, "Error in cudamat.learn.mult_by_sigmoid_deriv exceeded threshold"
def test_mult_by_sigmoid_deriv(): m = 256 n = 128 c_targets = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') c_acts = np.array(np.random.rand(m, n), dtype=np.float32, order='F') g_targets = cm.CUDAMatrix(c_targets) g_acts = cm.CUDAMatrix(c_acts) c_targets = c_targets * c_acts * (1. - c_acts) cl.mult_by_sigmoid_deriv(g_targets, g_acts) assert np.max( np.abs(c_acts - g_acts.asarray()) ) < 10**-2, "Error in cudamat.learn.mult_by_sigmoid_deriv exceeded threshold"
out.add_col_vec(w_b2) out.apply_sigmoid() # back prop errors out.subtract(target) # compute error # gradients for w_w2 and w_b2 wu_w2.add_dot(h, out.T) wu_b2.add_sums(out, axis = 1) # compute delta cm.dot(w_w2, out, target = delta) # delta = delta * h * (1 - h) cl.mult_by_sigmoid_deriv(delta, h) # gradients for w_w1 and w_b1 wu_w1.add_dot(inp, delta.T) wu_b1.add_sums(delta, axis = 1) # update weights w_w1.subtract_mult(wu_w1, epsilon/batch_size) w_b1.subtract_mult(wu_b1, epsilon/batch_size) w_w2.subtract_mult(wu_w2, epsilon/batch_size) w_b2.subtract_mult(wu_b2, epsilon/batch_size) # calculate error on current minibatch err.append(np.abs(out.asarray())>0.5) print "Training misclassification rate: " + str(np.mean(err))
out.add_col_vec(w_b2) out.apply_sigmoid() # back prop errors out.subtract(target) # compute error # gradients for w_w2 and w_b2 wu_w2.add_dot(h, out.T, beta=momentum) wu_b2.add_sums(out, axis=1, beta=momentum) # compute delta cm.dot(w_w2, out, target=delta) # delta = delta * h * (1 - h) cl.mult_by_sigmoid_deriv(delta, h) # gradients for w_w1 and w_b1 wu_w1.add_dot(inp, delta.T, beta=momentum) wu_b1.add_sums(delta, axis=1, beta=momentum) # update weights w_w1.subtract_mult(wu_w1, epsilon / batch_size) w_b1.subtract_mult(wu_b1, epsilon / batch_size) w_w2.subtract_mult(wu_w2, epsilon / batch_size) w_b2.subtract_mult(wu_b2, epsilon / batch_size) # calculate error on current minibatch err.append(np.abs(out.asarray()) > 0.5) print("Training misclassification rate: %f" % np.mean(err))