b = gpu.zeros((1,10)) mb = gpu.zeros((1,10)) alpha = 0.1 momentum = 0.5 momentum_type = 1 for i in xrange(200): for i in xrange(X.shape[0]): if momentum_type == 1: '''Use nesterov momentum to train the weights ''' n = w + (m*momentum) nb = b + (mb*momentum) out = gpu.softmax(gpu.dot(X[i],n)+nb) gradb = gpu.dot(gpu.ones((1,batch_size)),out - t[i]) grad = gpu.dot(X[i].T,out - t[i]) m = m*momentum - (alpha*grad/128.) mb = mb*momentum - (alpha*gradb/128.) w += m b += mb elif momentum_type == 2: '''Use classic momentum to train the weights ''' out = gpu.softmax(gpu.dot(X[i],w)+b) gradb = gpu.dot(gpu.ones((1,batch_size)),out - t[i]) grad = gpu.dot(X[i].T,out - t[i])
cv = [] train = np.float32(np.array(range(epochs))) cv = np.float32(np.array(range(epochs))) for epoch in range(epochs): for i in xrange(batches): #nesterov accelerated gradient n1 = w1+(m1*momentum)#nesterov updates 2.2 sec n2 = w2+(m2*momentum) nb1 = b1+(mb1*momentum) nb2 = b2+(mb2*momentum) z0 = X[i]*d02[rng.randint(0,75)] z1 = (gpu.dot(z0,n1)+nb1).logistic()*d05[rng.randint(0,75)]#dropout and activations 7.1 sec t0 = time.time() feedforward = gpu.softmax(gpu.dot(z1,n2)+nb2) time_softmax += time.time() - t0 #softmax 0.48 sec #gradients e1 = (feedforward - t[i]) grad2 = gpu.dot(z1.T,e1) grad1 = gpu.dot(X[i].T,(gpu.dot(e1,n2.T)* z1*(1-z1)))#grads 6 sec gradb2 = gpu.dot(gpu.ones((1, batch_size)),e1) gradb1= gpu.dot(gpu.ones((1, batch_size)),(gpu.dot(e1,n2.T)* z1*(1-z1))) #momentum and weight updates m1 = (momentum*m1) - ((grad1 + n1*L2)*alpha/(batch_size*1.0))#momentum und weight updates 7.4 sec m2 = (momentum*m2) - ((grad2 + n2*L2)*alpha/(batch_size*1.0)) mb1 = (momentum*mb1) - ((gradb1 + nb1*L2)*alpha/(batch_size*1.0)) mb2 = (momentum*mb2) - ((gradb2 + nb2*L2)*alpha/(batch_size*1.0)) w1 = w1 + m1
b = gpu.zeros((1, 10)) mb = gpu.zeros((1, 10)) alpha = 0.1 momentum = 0.5 momentum_type = 1 for i in xrange(200): for i in xrange(X.shape[0]): if momentum_type == 1: '''Use nesterov momentum to train the weights ''' n = w + (m * momentum) nb = b + (mb * momentum) out = gpu.softmax(gpu.dot(X[i], n) + nb) gradb = gpu.dot(gpu.ones((1, batch_size)), out - t[i]) grad = gpu.dot(X[i].T, out - t[i]) m = m * momentum - (alpha * grad / 128.) mb = mb * momentum - (alpha * gradb / 128.) w += m b += mb elif momentum_type == 2: '''Use classic momentum to train the weights ''' out = gpu.softmax(gpu.dot(X[i], w) + b) gradb = gpu.dot(gpu.ones((1, batch_size)), out - t[i]) grad = gpu.dot(X[i].T, out - t[i]) m = m * momentum - (alpha * grad / 128.)
train = np.float32(np.array(range(epochs))) cv = np.float32(np.array(range(epochs))) for epoch in range(epochs): for i in xrange(batches): #nesterov accelerated gradient n1 = w1 + (m1 * momentum) #nesterov updates 2.2 sec n2 = w2 + (m2 * momentum) nb1 = b1 + (mb1 * momentum) nb2 = b2 + (mb2 * momentum) z0 = X[i] * d02[rng.randint(0, 75)] z1 = (gpu.dot(z0, n1) + nb1).logistic() * d05[rng.randint( 0, 75)] #dropout and activations 7.1 sec t0 = time.time() feedforward = gpu.softmax(gpu.dot(z1, n2) + nb2) time_softmax += time.time() - t0 #softmax 0.48 sec #gradients e1 = (feedforward - t[i]) grad2 = gpu.dot(z1.T, e1) grad1 = gpu.dot(X[i].T, (gpu.dot(e1, n2.T) * z1 * (1 - z1))) #grads 6 sec gradb2 = gpu.dot(gpu.ones((1, batch_size)), e1) gradb1 = gpu.dot(gpu.ones((1, batch_size)), (gpu.dot(e1, n2.T) * z1 * (1 - z1))) #momentum and weight updates m1 = (momentum * m1) - ( (grad1 + n1 * L2) * alpha / (batch_size * 1.0)) #momentum und weight updates 7.4 sec m2 = (momentum * m2) - ((grad2 + n2 * L2) * alpha / (batch_size * 1.0))