def move_robot(): global x_deviation, y_max, tolerance if(abs(x_deviation)<tolerance): if(y_max>0.9): ut.red_light("ON") ut.stop() print("reached person...........") else: ut.red_light("OFF") ut.forward() print("moving robot ...FORWARD....!!!!!!!!!!!!!!") else: ut.red_light("OFF") if(x_deviation>=tolerance): delay1=get_delay(x_deviation) ut.left() time.sleep(delay1) ut.stop() print("moving robot ...Left....<<<<<<<<<<") if(x_deviation<=-1*tolerance): delay1=get_delay(x_deviation) ut.right() time.sleep(delay1) ut.stop() print("moving robot ...Right....>>>>>>>>")
def benchmark_pca(): Xtrain, Xtest, Ytrain, Ytest = get_transformed_data() print("Performing logistic regression...") N, D = Xtrain.shape Ytrain_ind = np.zeros((N, 10)) for i in range(N): Ytrain_ind[i, Ytrain[i]] = 1 Ntest = len(Ytest) Ytest_ind = np.zeros((Ntest, 10)) for i in range(Ntest): Ytest_ind[i, Ytest[i]] = 1 W = np.random.randn(D, 10) / np.sqrt(D) b = np.zeros(10) LL = [] LLtest = [] CRtest = [] # D = 300 -> error = 0.07 lr = 0.0001 reg = 0.01 for i in range(200): p_y = forward(Xtrain, W, b) # print "p_y:", p_y ll = cost(p_y, Ytrain_ind) LL.append(ll) p_y_test = forward(Xtest, W, b) lltest = cost(p_y_test, Ytest_ind) LLtest.append(lltest) err = error_rate(p_y_test, Ytest) CRtest.append(err) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) iters = range(len(LL)) plt.plot(iters, LL, label='train loss') plt.plot(iters, LLtest, label='test loss') plt.title('Loss') plt.legend() plt.show() plt.plot(CRtest) plt.title('Error') plt.show()
def benchmark_full(): Xtrain, Xtest, Ytrain, Ytest = get_normalized_data() print("Performing logistic regression...") # lr = LogisticRegression(solver='lbfgs') # convert Ytrain and Ytest to (N x K) matrices of indicator variables N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) W = np.random.randn(D, 10) / np.sqrt(D) b = np.zeros(10) LL = [] LLtest = [] CRtest = [] # reg = 1 # learning rate 0.0001 is too high, 0.00005 is also too high # 0.00003 / 2000 iterations => 0.363 error, -7630 cost # 0.00004 / 1000 iterations => 0.295 error, -7902 cost # 0.00004 / 2000 iterations => 0.321 error, -7528 cost # reg = 0.1, still around 0.31 error # reg = 0.01, still around 0.31 error lr = 0.00004 reg = 0.01 for i in range(500): p_y = forward(Xtrain, W, b) # print "p_y:", p_y ll = cost(p_y, Ytrain_ind) LL.append(ll) p_y_test = forward(Xtest, W, b) lltest = cost(p_y_test, Ytest_ind) LLtest.append(lltest) err = error_rate(p_y_test, Ytest) CRtest.append(err) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) iters = range(len(LL)) plt.plot(iters, LL, iters, LLtest) plt.show() plt.plot(CRtest) plt.show()
def sgd_batch(): """ use util functions to run the logistic classification with bp """ X_train, Y_train, X_test, Y_test = get_transformed_digit() N,D = X_train.shape yindi_train = y2indicator(Y_train) yindi_test = y2indicator(Y_test) M = yindi_test.shape[1] W = np.random.rand(D,M) b = np.random.rand(M) cost_train = [] cost_test = [] error_test = [] eta = 1e-4 penalty = 1e-2 batch_size = 500 batch_num = N // batch_size #batch for i in range(500): X_shuffle,Y_train_shuffle = shuffle(X_train,yindi_train) for ii in range(int(batch_num)): # x_tem = X_shuffle[ii].reshape(1,D) # y_tem = Y_train_shuffle[ii].reshape(1,10) x_tem = X_shuffle[int(i*batch_size):int((i+1)*batch_size)] y_tem = Y_train_shuffle[int(i*batch_size):int((i+1)*batch_size)] y_fit = forward(x = x_tem,w=W,b=b) W += eta*(deri_w(t_matrix = y_tem, y_matrix = y_fit,x = x_tem)-penalty*W) b += eta*(deri_b(t_matrix = y_tem, y_matrix = y_fit)-penalty*b) p_y_test = forward(x = X_test,w=W,b=b) cost_test_tem = cost(y_matrix = p_y_test,t_matrix = yindi_test) cost_test.append(cost_test_tem) if ii % 100 == 0: error_tem = error_rate(y_matrix = p_y_test, target = Y_test) print("the error rate in "+str(ii)+" iteration is :"+str(error_tem)) p_y_final = forward(x = X_test,w=W,b=b) error_final = error_rate(y_matrix = p_y_final, target = Y_test) print("the final error rate is "+str(error_final))
def move_robot(): global x_deviation, y_deviation, tolerance, arr_track_data print("moving robot .............!!!!!!!!!!!!!!") print(x_deviation, y_deviation, tolerance, arr_track_data) if(abs(x_deviation)<tolerance and abs(y_deviation)<tolerance): cmd="Stop" delay1=0 ut.stop() ut.red_light("ON") else: ut.red_light("OFF") if (abs(x_deviation)>abs(y_deviation)): if(x_deviation>=tolerance): cmd="Move Left" delay1=get_delay(x_deviation,'l') ut.left() time.sleep(delay1) ut.stop() if(x_deviation<=-1*tolerance): cmd="Move Right" delay1=get_delay(x_deviation,'r') ut.right() time.sleep(delay1) ut.stop() else: if(y_deviation>=tolerance): cmd="Move Forward" delay1=get_delay(y_deviation,'f') ut.forward() time.sleep(delay1) ut.stop() if(y_deviation<=-1*tolerance): cmd="Move Backward" delay1=get_delay(y_deviation,'b') ut.back() time.sleep(delay1) ut.stop() arr_track_data[4]=cmd arr_track_data[5]=delay1
def update(self, dt): core.Model.update(self, dt) self.rotation += self.rot_inc * dt if self.stage == Fruit.IN_HAND: self.move_to_hand() elif self.stage == Fruit.FLYING: self.x += self.speed[0] * dt self.y += self.speed[1] * dt self.speed = (self.speed[0], self.speed[1] + conf.gravity * dt) # check if out of screen if self.x < -conf.fruit.dimensions[0]/2 or self.y > conf.scene_height + conf.fruit.dimensions[1]/2: self.game_scene.create_yousuck() self.dont_keep() # check if should be eaten if self.monkey.state == Monkey.CLOSED: x, y = util.forward( (self.monkey.x, self.monkey.y - conf.monkey.dimensions[1]), conf.monkey.mouth_tweak.amount, conf.monkey.mouth_tweak.direction ) dist = (self.x - x)**2 + (self.y - y)**2 if dist <= conf.collision.fruit_monkey: self.stage = Fruit.EATEN self.rot_inc *= conf.fruit.rot_inc_extra elif self.stage == Fruit.EATEN: self.size_factor -= conf.fruit.shrink * dt if self.size_factor <= 0: self.size_factor = 0 self.dont_keep()
def train(self, X, Y, activation=1, lr=10e-7, reg=10e-7, epoch=10): N, D = X.shape #Diamentionality of our data batch_size = 500 n_batches = int(N / batch_size) ind = tar2ind( Y ) # WE convert our target array into indicator matrix using one hot encoding _, K = ind.shape self.W1 = np.random.randn(D, self.M) / np.sqrt( D) #Input to hidden weight self.W2 = np.random.randn(self.M, K) / np.sqrt( self.M) #Hidden to output weights self.b1 = np.random.randn(self.M) self.b2 = np.random.randn(K) dW2 = 0 db2 = 0 dW1 = 0 db1 = 0 mu = 0.9 # Momentum decay_rate = 0.99 cost = [] for n in range(0, 200): #tempx , tempy = shuffle(X, ind) for i in range(0, n_batches): X_tr = X[i * batch_size:(i * batch_size + batch_size), :] Y_tr = Y[i * batch_size:(i * batch_size + batch_size), ] ind = tar2ind(Y_tr) output, hidden = forward(X_tr, activation, self.W1, self.b1, self.W2, self.b2) #Performing backpropagation now dW2 = mu * dW2 + lr * (derivative_W2(ind, output, hidden, reg, self.W2)) self.W2 = self.W2 + dW2 db2 = mu * db2 + lr * (derivative_b2(ind, output, reg, self.b2)) self.b2 = self.b2 + db2 dW1 = mu * dW1 + lr * (derivative_W1( ind, output, hidden, self.W2, X_tr, activation, reg, self.W1)) self.W1 = self.W1 + dW1 db1 = mu * db1 + lr * (derivative_b1( ind, output, hidden, self.W2, activation, reg, self.b1)) self.b1 = self.b1 + db1 c = cross_entropy(ind, output) cost.append(c) if i % 10 == 0: result = np.argmax(output, axis=1) r = classification_rate(Y_tr, result) print("iteration:- ", i, "cost:- ", c, "classification rate:- ", r)
def move_robot(): global x_deviation, y_max, tolerance, arr_track_data print("moving robot .............!!!!!!!!!!!!!!") print(x_deviation, tolerance, arr_track_data) y = 1 - y_max #distance from bottom of the frame if (abs(x_deviation) < tolerance): delay1 = 0 if (y < 0.1): cmd = "Stop" ut.red_light("ON") ut.stop() else: cmd = "forward" ut.red_light("OFF") ut.forward() else: ut.red_light("OFF") if (x_deviation >= tolerance): cmd = "Move Left" delay1 = get_delay(x_deviation) ut.left() time.sleep(delay1) ut.stop() if (x_deviation <= -1 * tolerance): cmd = "Move Right" delay1 = get_delay(x_deviation) ut.right() time.sleep(delay1) ut.stop() arr_track_data[4] = cmd arr_track_data[5] = delay1
def render(self, screen): HasImageView.render(self, screen) for model in self.models: x, y = model.x, model.y x = int(x * conf.factor_width) y = int(y * conf.factor_height) rotated_img = pygame.transform.rotate(self.image, model.rotation) dimensions = int(rotated_img.get_rect().width * conf.factor_width), int(rotated_img.get_rect().height * conf.factor_height) rotated_img = pygame.transform.smoothscale(rotated_img, dimensions) rot_rect = rotated_img.get_rect().center x -= rot_rect[0] y -= rot_rect[1] # move (x, y) to the edge of the arm half = conf.arm.dimensions[0] / 2 x, y = util.forward((x, y), half, model.rotation) screen.blit(rotated_img, (x, y))
def merge_empty(self,other): """ Merges this occupied region with an empty region, resolving potentially ambiguous corners """ side = self.region.index(other.center()) self.region.merge(other) prev_side = util.backward(side) next_side = util.forward(side) if self.open[side]: if self.ambi[side]: self.open[prev_side].append(self.open[side].pop(0)) self.ambi[side] = False if self.ambi[next_side]: self.open[next_side].insert(0, self.open[side].pop()) self.ambi[next_side] = False if self.open[side]: raise TopologicalImpossibility()
def main(): Xtrain, Xtest, Ytrain, Ytest = get_transformed_data() print("Performing logistic regression...") N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn(D, 10) / np.sqrt(D) b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(50): p_y = forward(Xtrain, W, b) W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W) b += lr*(gradb(Ytrain_ind, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for full GD:", datetime.now() - t0) # 2. stochastic W = np.random.randn(D, 10) / np.sqrt(D) b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(50): # takes very long since we're computing cost for 41k samples tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in range(min(N, 500)): # shortcut so it won't take so long... x = tmpX[n,:].reshape(1,D) y = tmpY[n,:].reshape(1,10) p_y = forward(x, W, b) W += lr*(gradW(y, p_y, x) - reg*W) b += lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for SGD:", datetime.now() - t0) # 3. batch W = np.random.randn(D, 10) / np.sqrt(D) b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N // batch_sz t0 = datetime.now() for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:] y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:] p_y = forward(x, W, b) W += lr*(gradW(y, p_y, x) - reg*W) b += lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for batch GD:", datetime.now() - t0) x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): X, Y, _, _ = get_transformed_data() #First 300 factors X = X[:,:300] # normalize X first mu = X.mean(axis=0) std = X.std(axis=0) X = (X-mu) / std print("Performing logistic regression...") Xtrain = X[:-1000,] Ytrain = Y[:-1000] Xtest = X[-1000:,] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) #1. full gradient descent W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(200): p_y = forward(Xtrain, W, b) W+= lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W) b+= lr*(gradb(Ytrain_ind, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) err = error_rate(p_y_test, Ytest) if i % 10 ==0: print("FULL Cost a iteration %d: %.6f" %(i,ll)) print("FULL Error rate:", err) p_y = forward(Xtest, W, b) print("FULL Final error rate", error_rate(p_y, Ytest)) print("FULL GD time", (datetime.now() - t0)) #2. Stochastic gradient descent W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(1): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in xrange(min(N,500)): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) p_y = forward(x, W, b) W+= lr*(gradW(y, p_y, x) - reg*W) b+= lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) err = error_rate(p_y_test, Ytest) if n % int(N/2) ==0: print("STOCHASTIC Cost a iteration %d: %.6f" %(i,ll)) print("STOCHASTIC Error rate:", err) p_y = forward(Xtest, W, b) print("STOCHASTIC Final error rate", error_rate(p_y, Ytest)) print("STOCHASTIC GD time", (datetime.now() - t0)) #3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in xrange(n_batches): x = tmpX[j*batch_sz:((j+1)*batch_sz), :] y = tmpY[j*batch_sz:((j+1)*batch_sz), :] p_y = forward(x, W, b) W+= lr*(gradW(y, p_y, x) - reg*W) b+= lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % int(n_batches/2) ==0: err = error_rate(p_y_test, Ytest) print("BATCH Cost a iteration %d: %.6f" %(i,ll)) print("BATCH Error rate:", err) p_y = forward(Xtest, W, b) print("BATCH Final error rate", error_rate(p_y, Ytest)) print("BATCH GD time", (datetime.now() - t0)) x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label='full') x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label='stochastic') x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label='batch') plt.legend() plt.show()
def main(): # get PCA transformed data X, Y, _, _ = get_transformed_data() X = X[:, :300] # the first 300 features # normalize X first mu = X.mean(axis=0) std = X.std(axis=0) X = (X - mu) / std print "Performing logistic regression..." Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn( D, 10 ) / 28 # we're setting our initial weights to be pretty small, proportional to the square root of the dimensionality b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(200): p_y = forward(Xtrain, W, b) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) # do a forward pass on the test set so that we can calculate the cost on the test set and then plot that p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: # calculate the error rate on every 10 iterations err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for full GD:", datetime.now() - t0 # 2. stochastic W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange( 1): # takes very long since we're computing cost for 41k samples # on each pass, we typically want to shuffle through the training data and the labels tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) # we're actually only going to go through 500 samples because its slow for n in xrange(min(N, 500)): # shortcut so it won't take so long... # reshape x into a 2 dimensional matrix x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) # forward pass to get the output p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if n % ( N / 2 ) == 0: # calculate the error rate once for every N/2 samples err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for SGD:", datetime.now() - t0 # 3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in xrange(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in xrange(n_batches): # get the current batches input and targets x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] # forward pass to get the output predictions p_y = forward(x, W, b) # Gradient descent W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % ( n_batches / 2 ) == 0: # print error rate at every (number of batches)/2 iterations err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for batch GD:", datetime.now() - t0 x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): # compare 3: # 1. batch SGD # 2. batch SGD with momentum # 3. batch SGD with Nesterov momentum # all with L2 regularization print_period = 10 X, Y = get_normalized_data() lr = 0.00004 reg = 0.01 Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) M = 300 K = 10 W1 = np.random.randn(D, M) / 28 b1 = np.zeros(M) W2 = np.random.randn(M, K) / np.sqrt(M) b2 = np.zeros(K) W1_0 = W1.copy() b1_0 = b1.copy() W2_0 = W2.copy() b2_0 = b2.copy() # regular batch gradient descend epochs = 30 tr_costs = [] errors_batch = [] losses_test = [] batch_size = 500 number_batches = int(N // batch_size) #max_iter = 30 # 1. for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) W2 -= lr * (derivative_w2(z_tr, ytr, ytr_pred) + reg * W2) b2 -= lr * (derivative_b2(ytr, ytr_pred) + reg * b2) W1 -= lr * (derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1) b1 -= lr * (derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1) if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs, label='tr_costs') plt.plot(losses_test, label='losses_test') #plt.plot(errors_batch, label='errors_batch') # plt.show() # print("tr_costs", tr_costs) print("Final error rate:", error_rate(pY, Ytest)) # 2. W1 = W1_0.copy() b1 = b1_0.copy() W2 = W2_0.copy() b2 = b2_0.copy() # regular batch gradient descend tr_costs_momentum = [] errors_batch_momentum = [] losses_test_momentum = [] # momentum coeficient mu = 0.9 dW1 = 0 dW2 = 0 db1 = 0 db2 = 0 for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) # gradients gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2 gb2 = derivative_b2(ytr, ytr_pred) + reg * b2 gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1 gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1 # update velocity dW2 = mu * dW2 - lr * gW2 db2 = mu * db2 - lr * gb2 dW1 = mu * dW1 - lr * gW1 db1 = mu * db1 - lr * gb1 # update W2 += dW2 W1 += dW1 b2 += db2 b1 += db1 if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test_momentum.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch_momentum.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs_momentum.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs_momentum, label='tr_costs momentum') plt.plot(losses_test_momentum, label='losses_test momentum') #plt.plot(errors_batch, label='errors_batch') # plt.show() # print("tr_costs", errors_batch_momentum) print("Final error rate:", error_rate(pY, Ytest)) # 3. W1 = W1_0.copy() b1 = b1_0.copy() W2 = W2_0.copy() b2 = b2_0.copy() # regular batch gradient descend tr_costs_nesterov = [] errors_batch_nesterov = [] losses_test_nesterov = [] # momentum coeficient mu = 0.9 vW1 = 0 vW2 = 0 vb1 = 0 vb2 = 0 for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) # gradients gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2 gb2 = derivative_b2(ytr, ytr_pred) + reg * b2 gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1 gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1 # update velocity vW2 = mu * vW2 - lr * gW2 vb2 = mu * vb2 - lr * gb2 vW1 = mu * vW1 - lr * gW1 vb1 = mu * vb1 - lr * gb1 # update W2 += mu * vW2 - lr * gW2 W1 += mu * vW1 - lr * gW1 b2 += mu * vb2 - lr * gb2 b1 += mu * vb1 - lr * gb1 if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test_nesterov.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch_nesterov.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs_nesterov.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs_nesterov, label='tr_costs_nesterov') plt.plot(losses_test_nesterov, label='losses_test_nesterov') #plt.plot(errors_batch_nesterov, label='errors_batch') plt.legend() plt.show() # print("tr_costs_nesterov", errors_batch_momentum) print("Final error rate nesterov:", error_rate(pY, Ytest))
def main(): # compare 3: # 1. batch SGD # 2. batch SGD with momentum # 3. batch SGD with Nesterov momentum # all with L2 regularization X, Y = get_normalized_data() Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) M = 300 K = 10 ######################IMPORTANETE PARAMETERE #################### t = 1 # !!!!!!!!!!!!!!!! ############################################################### epochs = 20 print_period = 10 lr0 = 0.001 reg = 0.01 epsilon = 1e-8 # is it the same as 10e-8 beta1 = 0.9 # mu = 0.9 beta2 = 0.999 # decay = 0.999 batch_size = 500 number_batches = int(N // batch_size) W1 = np.random.randn(D, M) / 28 b1 = np.zeros(M) W2 = np.random.randn(M, K) / np.sqrt(M) b2 = np.zeros(K) tr_costs_momentum = [] errors_batch_momentum = [] losses_test_momentum = [] # momentum coeficient mW2 = 0 mW1 = 0 mb2 = 0 mb1 = 0 vW1 = 0 vW2 = 0 vb1 = 0 vb2 = 0 mW2_hat = 0 mW1_hat = 0 mb2_hat = 0 mb1_hat = 0 vW1_hat = 0 vW2_hat = 0 vb1_hat = 0 vb2_hat = 0 for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) # gradients gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2 gb2 = derivative_b2(ytr, ytr_pred) + reg * b2 gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1 gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1 # update momentum mW2 = beta1 * mW2 + (1 - beta1) * gW2 mW1 = beta1 * mW1 + (1 - beta1) * gW1 mb2 = beta1 * mb2 + (1 - beta1) * gb2 mb1 = beta1 * mb1 + (1 - beta1) * gb1 # update velocity vW2 = beta2 * vW2 + (1 - beta2) * gW2 * gW2 vb2 = beta2 * vb2 + (1 - beta2) * gb2 * gb2 vW1 = beta2 * vW1 + (1 - beta2) * gW1 * gW1 vb1 = beta2 * vb1 + (1 - beta2) * gb1 * gb1 # bias correction correction1 = (1 - beta1**t) mW2_hat = mW2 / correction1 mW1_hat = mW1 / correction1 mb2_hat = mb2 / correction1 mb1_hat = mb1 / correction1 correction2 = (1 - beta2**t) vW2_hat = vW2 / correction2 vW1_hat = vW1 / correction2 vb2_hat = vb2 / correction2 vb1_hat = vb1 / correction2 # update t !!!!!!! t += 1 # update W2 -= lr0 * (mW2_hat / np.sqrt(vW2_hat + epsilon)) W1 -= lr0 * (mW1_hat / np.sqrt(vW1_hat + epsilon)) b2 -= lr0 * (mb2_hat / np.sqrt(vb2_hat + epsilon)) b1 -= lr0 * (mb1_hat / np.sqrt(vb1_hat + epsilon)) if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test_momentum.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch_momentum.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs_momentum.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs_momentum, label='tr_costs momentum') plt.plot(losses_test_momentum, label='losses_test momentum RMS') #plt.plot(errors_batch, label='errors_batch') # plt.show() # print("tr_costs", errors_batch_momentum) print("Final error rate:", error_rate(pY, Ytest)) plt.legend() plt.show()
def predict(self, X, activation=1): output, _ = forward(X, activation, self.W1, self.b1, self.W2, self.b2) return np.argmax(output, axis=1)
def move_to_hand(self): """put the fruit inside the arm's palm""" x, y = self.arm.x, self.arm.y x, y = util.forward( (x, y), conf.arm.dimensions[0], self.arm.rotation) # to the edge of arm x, y = util.forward( (x, y), conf.arm.fruit_tweak.amount, self.arm.rotation + conf.arm.fruit_tweak.direction) # move a bit in the tweaking's direction self.x, self.y = x, y
def main(): X_train, X_test, t_train, t_test = get_pca_normalized_data() print("Performing multi-class logistic regression...\n") N, D = X_train.shape K = 10 T_train = T_indicator(t_train) T_test = T_indicator(t_test) lr = float(sys.argv[1]) reg = float(sys.argv[2]) batch_size = int(sys.argv[3]) ######## 1. FULL GRADIENT DESCENT ######## print('Full Gradient Descent') W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) J_test_full = [] t0 = datetime.now() for epoch in range(50): Y_train = forward(X_train, W, b) W -= lr * (gradW(T_train, Y_train, X_train) - reg * W) b -= lr * (gradb(T_train, Y_train) - reg * b) Y_test = forward(X_test, W, b) j_test = J(T_test, Y_test) J_test_full.append(j_test) if epoch % 1 == 0: err = accuracy(predict(Y_test), t_test) if epoch % 10 == 0: print("Epoch {}:\tcost: {}\taccuracy: {}".format( epoch, round(j_test, 4), err)) Y_test = forward(X_test, W, b) print("Final accuracy:", accuracy(predict(Y_test), t_test)) print("Elapsted time for full GD: {}\n".format(datetime.now() - t0)) ######## 2. STOCHASTIC GRADIENT DESCENT ######## print('Stochastic Gradient Descent') W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) J_test_stochastic = [] t0 = datetime.now() for epoch in range( 50): # takes very long since we're computing cost for 41k samples tmpX, tmpT = shuffle(X_train, T_train) for n in range(min(N, 500)): # shortcut so it won't take so long... x = tmpX[n, :].reshape(1, D) t = tmpT[n, :].reshape(1, 10) Y_train = forward(x, W, b) W -= lr * (gradW(t, Y_train, x) - reg * W) b -= lr * (gradb(t, Y_train) - reg * b) Y_test = forward(X_test, W, b) j_test = J(T_test, Y_test) J_test_stochastic.append(j_test) if epoch % 1 == 0: err = accuracy(predict(Y_test), t_test) if epoch % 10 == 0: print("Epoch {}:\tcost: {}\taccuracy: {}".format( epoch, round(j_test, 4), err)) Y_test_final = forward(X_test, W, b) print("Final accuracy:", accuracy(predict(Y_test_final), t_test)) print("Elapsted time for SGD: {}\n".format(datetime.now() - t0)) ######## 3. BATCH GRADIENT DESCENT ######## print('Batch Gradient Descent') W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) J_test_batch = [] nb_batches = N // batch_size t0 = datetime.now() for epoch in range(50): tmpX, tmpT = shuffle(X_train, T_train) for batch_index in range(nb_batches): x = tmpX[batch_index * batch_size:(batch_index * batch_size + batch_size), :] t = tmpT[batch_index * batch_size:(batch_index * batch_size + batch_size), :] Y_train = forward(x, W, b) W -= lr * (gradW(t, Y_train, x) - reg * W) b -= lr * (gradb(t, Y_train) - reg * b) Y_test = forward(X_test, W, b) j_test = J(T_test, Y_test) J_test_batch.append(j_test) if epoch % 1 == 0: err = accuracy(predict(Y_test), t_test) if epoch % 10 == 0: print("Epoch {}\tcost: {}\taccuracy: {}".format( epoch, round(j_test, 4), err)) Y_test_final = forward(X_test, W, b) print("Final accuracy:", accuracy(predict(Y_test_final), t_test)) print("Elapsted time for batch GD:", datetime.now() - t0) ######## PLOTS ######## x1 = np.linspace(0, 1, len(J_test_full)) plt.plot(x1, J_test_full, label="full") x2 = np.linspace(0, 1, len(J_test_stochastic)) plt.plot(x2, J_test_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(J_test_batch)) plt.plot(x3, J_test_batch, label="batch") plt.legend() #plt.savefig('full_vs_stoch_vs_batch_lr={}_reg={}_batch_size={}.png'.format(lr, reg, batch_size)) plt.show()
threshold = 0.05 # stopping criteria for i, (im, file_name) in enumerate(dataset_loader): im = im.cuda() # Prepare hints, mask, and get current classification data, target = util.get_colorization_data(im, opt, model, classifier) opt.target = opt.target if opt.targeted else target optimizer = torch.optim.Adam( [data['hints'].requires_grad_(), data['mask'].requires_grad_()], lr=opt.lr, betas=(0.9, 0.999)) prev_diff = 0 for itr in range(opt.num_iter): out_rgb, y = util.forward(model, classifier, opt, data) val, idx, labels = util.compute_class(opt, y) loss = util.compute_loss(opt, y, criterion) print(f'[{itr+1}/{opt.num_iter}] Loss: {loss:.3f} Labels: {labels}') optimizer.zero_grad() loss.backward() optimizer.step() print("%.5f" % (loss.item())) diff = val[0] - val[1] if opt.targeted: if idx[0] == opt.target and diff > threshold and ( diff - prev_diff).abs() < 1e-3: break else:
def main(): X, Y, _, _ = get_transformed_data() X = X[:, :300] # normalize X first mu = X.mean(axis=0) std = X.std(axis=0) X = (X - mu) / std print("Performing logistic regression...") Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # # 1. full # W = np.random.randn(D, 10) / 28 # b = np.zeros(10) # LL = [] # lr = 0.0001 # reg = 0.01 # t0 = datetime.now() # for i in xrange(200): # p_y = forward(Xtrain, W, b) # # W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) # b += lr * (gradb(Ytrain_ind, p_y) - reg * b) # # # p_y_test = forward(Xtest, W, b) # ll = cost(p_y_test, Ytest_ind) # LL.append(ll) # if i % 10 == 0: # err = error_rate(p_y_test, Ytest) # print("Cost at iteration %d: %.6f" % (i, ll)) # print("Error rate:", err) # p_y = forward(Xtest, W, b) # print("Final error rate:", error_rate(p_y, Ytest)) # print("Elapsted time for full GD:", datetime.now() - t0) # # # # 2. stochastic # W = np.random.randn(D, 10) / 28 # b = np.zeros(10) # LL_stochastic = [] # lr = 0.0001 # reg = 0.01 # # t0 = datetime.now() # for i in range(1): # takes very long since we're computing cost for 41k samples # tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) # for n in range(min(N, 500)): # shortcut so it won't take so long... # x = tmpX[n,:].reshape(1,D) # y = tmpY[n,:].reshape(1,10) # p_y = forward(x, W, b) # # W += lr*(gradW(y, p_y, x) - reg*W) # b += lr*(gradb(y, p_y) - reg*b) # # p_y_test = forward(Xtest, W, b) # ll = cost(p_y_test, Ytest_ind) # LL_stochastic.append(ll) # # if n % (N/2) == 0: # err = error_rate(p_y_test, Ytest) # print("Cost at iteration %d: %.6f" % (i, ll)) # print("Error rate:", err) # p_y = forward(Xtest, W, b) # print("Final error rate:", error_rate(p_y, Ytest)) # print("Elapsted time for SGD:", datetime.now() - t0) # # # # 3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j * batch_sz:(j + 1) * batch_sz, :] y = tmpY[j * batch_sz:(j + 1) * batch_sz, :] p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % (n_batches / 2) == 0: err = error_rate(p_y_test, Ytest) print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for batch GD:", datetime.now() - t0) x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): Xtrain, Xtest, Ytrain, Ytest = get_transformed_data() print("Performing logistic regression...") N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. Full GD W = np.random.randn( D, 10) / 28 # Square root of no. of dimentionality. i.e. 28 * 28 = 784 b = np.zeros(10) loss_batch = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(epoch): p_y = forward(Xtrain, W, b) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) p_y_test = forward(Xtest, W, b) temp_loss = cost(p_y_test, Ytest_ind) loss_batch.append(temp_loss) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, temp_loss)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for full GD:", datetime.now() - t0) print("=======================================================") # 2. Stochastic GD W = np.random.randn(D, 10) / 28 b = np.zeros(10) loss_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range( epoch ): # takes very long since we're computing cost for 41k samples tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) #for n in range(min(N, 500)): # shortcut so it won't take so long... for n in range(N): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) loss = cost(p_y_test, Ytest_ind) loss_stochastic.append(loss) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, loss)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for SGD:", datetime.now() - t0) print("=======================================================") # 3. Mini-batch GD W = np.random.randn(D, 10) / 28 b = np.zeros(10) loss_mini_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N // batch_sz t0 = datetime.now() for i in range(epoch): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) temp_loss = cost(p_y_test, Ytest_ind) loss_mini_batch.append(temp_loss) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, temp_loss)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for mini-batch GD:", datetime.now() - t0) # Plot graph x1 = np.linspace(0, 1, len(loss_batch)) plt.plot(x1, loss_batch, label="full(batch) GD") x2 = np.linspace(0, 1, len(loss_stochastic)) plt.plot(x2, loss_stochastic, label="stochastic GD") x3 = np.linspace(0, 1, len(loss_mini_batch)) plt.plot(x3, loss_mini_batch, label="mini-batch GD") plt.legend() plt.show()
def main(): Xtrain, Xtest, Ytrain, Ytest = get_transformed_data() print('logistic regression') # randomly assign weights N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) M = 10 scale = 28 # full grad descent W, b = initwb(D, M, scale) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(200): P_Y = forward(Xtrain, W, b) W += lr * (gradW(Ytrain_ind, P_Y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, P_Y) - reg * b) P_Y_test = forward(Xtest, W, b) ll = cost(P_Y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: err = error_rate(P_Y_test, Ytest) print("cost at iter: %d: %.6f" % (i, ll)) print("error rate: ", err, "\n") P_Y = forward(Xtest, W, b) print("final error: ", error_rate(P_Y, Ytest)) print("elapsed time for full GD: ", datetime.now() - t0) # 2. Stochastic W, b = initwb(D, M, scale) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(1): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in range(min(N, 500)): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) P_Y = forward(x, W, b) W += lr * (gradW(y, P_Y, x) - reg * W) b += lr * (gradb(y, P_Y) - reg * b) P_Y_test = forward(Xtest, W, b) ll = cost(P_Y_test, Ytest_ind) LL_stochastic.append(ll) if n % (N / 2) == 0: err = error_rate(P_Y_test, Ytest) print("Cost at iteration %d: %6.f" % (i, ll)) print("error rate: ", err) P_Y = forward(Xtest, W, b) print("error rate: ", error_rate(P_Y, Ytest)) print("elapsed time for SGD: ", datetime.now() - t0) # batch W, b = initwb(D, M, scale) LL_batch = [] lr = 0.001 reg = 0.01 batch_sz = 500 n_batches = N // batch_sz t0 = datetime.now() for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] P_Y = forward(x, W, b) W += lr * (gradW(y, P_Y, x) - reg * W) b += lr * (gradb(y, P_Y) - reg * b) P_Y_test = forward(Xtest, W, b) ll = cost(P_Y_test, Ytest_ind) LL_batch.append(ll) if j % (n_batches / 2) == 0: err = error_rate(P_Y_test, Ytest) print("Cost at iteration %d: %6.f" % (i, ll)) print("error rate: ", err) P_Y = forward(Xtest, W, b) print("error rate: ", error_rate(P_Y, Ytest)) print("elapsed time for SGD: ", datetime.now() - t0) x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): X, Y, _, _ = get_transformed_data() X = X[:, :300] # normalize the data: mu = X.mean(axis=0) std = X.std(axis=0) X = (X - mu) / std print('Performing logistic regression...') Xtrain, Ytrain = X[:-1000, :], Y[:-1000] Xtest, Ytest = X[-1000:, :], Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) K = len(set(Y)) np.random.seed() # 1. Full Gradient Descend: W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) LL = [] # a storage for costs lr = 0.0001 # learning rate reg = 0.01 # L2-regularization term t0 = datetime.now() print('utilizing full GD...') for i in range(200): p_y = forward(Xtrain, W, b) W += lr * (grad_W(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (grad_b(Ytrain_ind, p_y).sum(axis=0) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: error = error_rate(p_y_test, Ytest) print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error)) dt1 = datetime.now() - t0 p_y_test = forward(Xtest, W, b) plt.plot(LL) plt.title('Cost for full GD') plt.show() plt.savefig('Cost_full_GD.png') print('Final error rate:', error_rate(p_y_test, Ytest)) print('Elapsed time for full GD:', dt1) # 2. Stochastic Gradien Descent W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) LLstochastic = [] # a storage for costs lr = 0.0001 # learning rate reg = 0.01 # L2-regularization term t0 = datetime.now() print('utilizing stochastic GD...') for i in range(25): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) # we consider just 500 samples, not all the dataset for n in range(N): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, K) p_y = forward(x, W, b) W += lr * (grad_W(y, p_y, x) - reg * W) b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LLstochastic.append(ll) if n % (N // 2) == 0: error = error_rate(p_y_test, Ytest) print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error)) dt2 = datetime.now() - t0 p_y_test = forward(Xtest, W, b) plt.plot(LLstochastic) plt.title('Cost for stochastic GD') plt.show() plt.savefig('Cost_stochastic_GD.png') print('Final error rate:', error_rate(p_y_test, Ytest)) print('Elapsed time for stochastic GD:', dt2) # 3. Batch Gradient Descent: W = np.random.randn(D, K) / np.sqrt(D) b = np.zeros(K) LLbatch = [] lr = 0.0001 # learning rate reg = 0.01 # L2-regularization term batch_size = 500 n_batches = N // batch_size t0 = datetime.now() print('utilizing batch GD...') for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j * batch_size:batch_size * (j + 1), :] y = tmpY[j * batch_size:batch_size * (j + 1), :] p_y = forward(x, W, b) W += lr * (grad_W(y, p_y, x) - reg * W) b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LLbatch.append(ll) if j % (n_batches // 2) == 0: error = error_rate(p_y_test, Ytest) print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error)) dt3 = datetime.now() - t0 p_y_test = forward(Xtest, W, b) plt.plot(LLbatch) plt.title('Cost for batch GD') plt.show() plt.savefig('Cost_batch_GD.png') print('Final error rate:', error_rate(p_y_test, Ytest)) print('Elapsed time for batch GD', dt3) # plot all costs together: x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label='full') x2 = np.linspace(0, 1, len(LLstochastic)) plt.plot(x2, LLstochastic, label='stochastic') x3 = np.linspace(0, 1, len(LLbatch)) plt.plot(x3, LLbatch, label='batch') plt.legend() plt.show() plt.savefig('Costs_together.png')
def main(): # compare 3: # 1. batch SGD # 2. batch SGD with momentum # 3. batch SGD with Nesterov momentum # all with L2 regularization X, Y = get_normalized_data() Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) M = 300 K = 10 max_iter = 20 epochs = 20 print_period = 10 lr0 = 0.0004 reg = 0.01 epsilon = 10e-10 decay = 0.999 batch_size = 500 number_batches = int(N // batch_size) W1 = np.random.randn(D, M) / 28 b1 = np.zeros(M) W2 = np.random.randn(M, K) / np.sqrt(M) b2 = np.zeros(K) W1_0 = W1.copy() b1_0 = b1.copy() W2_0 = W2.copy() b2_0 = b2.copy() cache_W2 = 1 cache_W1 = 1 cache_b2 = 1 cache_b1 = 1 tr_costs = [] errors_batch = [] losses_test = [] # 1. Just grad & RMSprop # 1. for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) # gradients gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2 gb2 = derivative_b2(ytr, ytr_pred) + reg * b2 gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1 gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1 # # AdaGrad # cache_W2 += derivative_w2(z_tr, ytr, ytr_pred) * derivative_w2(z_tr, ytr, ytr_pred) # cache_W1 += derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) * derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) # cache_b2 += derivative_b2(ytr, ytr_pred) * derivative_b2(ytr, ytr_pred) # cache_b1 += derivative_b1(z_tr, ytr, ytr_pred, W2) * derivative_b1(z_tr, ytr, ytr_pred, W2) # RMSProp cache_W2 += decay * cache_W2 + (1 - decay) * gW2 * gW2 cache_W1 += decay * cache_W1 + (1 - decay) * gW1 * gW1 cache_b2 += decay * cache_b2 + (1 - decay) * gb2 * gb2 cache_b1 += decay * cache_b1 + (1 - decay) * gb1 * gb1 W2 -= lr0 * (gW2 // (cache_W2 + epsilon) + reg * W2) b2 -= lr0 * (gb2 // (cache_b2 + epsilon) + reg * b2) W1 -= lr0 * (gW1 // (cache_W1 + epsilon) + reg * W1) b1 -= lr0 * (gb1 // (cache_b1 + epsilon) + reg * b1) if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs, label='tr_costs') plt.plot(losses_test, label='losses_test RMS') #plt.plot(errors_batch, label='errors_batch') # plt.show() # print("tr_costs", tr_costs) print("Final error rate:", error_rate(pY, Ytest)) # 2. batch grad with momentum & RMSprop # # # 2. W1 = W1_0.copy() b1 = b1_0.copy() W2 = W2_0.copy() b2 = b2_0.copy() # regular batch gradient descend tr_costs_momentum = [] errors_batch_momentum = [] losses_test_momentum = [] # momentum coeficient mu = 0.8 cache_W2 = 1 cache_W1 = 1 cache_b2 = 1 cache_b1 = 1 dW1 = 0 dW2 = 0 db1 = 0 db2 = 0 cW1 = 0 cW2 = 0 cb1 = 0 cb2 = 0 for epoch in range(epochs): for j in range(number_batches): xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :] ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :] ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2) # gradients gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2 gb2 = derivative_b2(ytr, ytr_pred) + reg * b2 gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1 gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1 # potencjalnie pojebalem momentum i velocity # RMSProp cache_W2 = decay * cache_W2 + (1 - decay) * gW2 * gW2 cache_W1 = decay * cache_W1 + (1 - decay) * gW1 * gW1 cache_b2 = decay * cache_b2 + (1 - decay) * gb2 * gb2 cache_b1 = decay * cache_b1 + (1 - decay) * gb1 * gb1 cW2 = (gW2 // (cache_W2) + epsilon) cb2 = (gb2 // (cache_b2) + epsilon) cW1 = (gW1 // (cache_W1) + epsilon) cb1 = (gb1 // (cache_b1) + epsilon) # update velocity dW2 = mu * dW2 + (1 - mu) * lr0 * cW2 db2 = mu * db2 + (1 - mu) * lr0 * cb2 dW1 = mu * dW1 + (1 - mu) * lr0 * cW1 db1 = mu * db1 + (1 - mu) * lr0 * cb1 # update W2 -= dW2 W1 -= dW1 b2 -= db2 b1 -= db1 if j % print_period == 0: yte_pred, _ = forward(Xtest, W1, b1, W2, b2) l = cost(yte_pred, Ytest_ind) losses_test_momentum.append(l) print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l)) e = error_rate(yte_pred, Ytest) errors_batch_momentum.append(e) print("Error rate:", e) ctr = cost(ytr_pred, ytr) print("traning set cost", ctr) tr_costs_momentum.append(ctr) pY, _ = forward(Xtest, W1, b1, W2, b2) #plt.plot(tr_costs_momentum, label='tr_costs momentum') plt.plot(losses_test_momentum, label='losses_test momentum RMS') #plt.plot(errors_batch, label='errors_batch') # plt.show() # print("tr_costs", errors_batch_momentum) print("Final error rate:", error_rate(pY, Ytest)) plt.legend() plt.show()
def main(): Xtrain, Xtest, Ytrain, Ytest = get_normalized_data() print("Performing logistic regression...") N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn(D, 10) / np.sqrt(D) W0 = W.copy() # save for later b = np.zeros(10) test_losses_full = [] lr = 0.9 reg = 0. t0 = datetime.now() last_dt = 0 intervals = [] for i in range(50): p_y = forward(Xtrain, W, b) gW = gradW(Ytrain_ind, p_y, Xtrain) / N gb = gradb(Ytrain_ind, p_y) / N W += lr*(gW - reg*W) b += lr*(gb - reg*b) p_y_test = forward(Xtest, W, b) test_loss = cost(p_y_test, Ytest_ind) dt = (datetime.now() - t0).total_seconds() # save these dt2 = dt - last_dt last_dt = dt intervals.append(dt2) test_losses_full.append([dt, test_loss]) if (i + 1) % 10 == 0: print("Cost at iteration %d: %.6f" % (i + 1, test_loss)) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for full GD:", datetime.now() - t0) # save the max time so we don't surpass it in subsequent iterations max_dt = dt avg_interval_dt = np.mean(intervals) # 2. stochastic W = W0.copy() b = np.zeros(10) test_losses_sgd = [] lr = 0.001 reg = 0. t0 = datetime.now() last_dt_calculated_loss = 0 done = False for i in range(50): # takes very long since we're computing cost for 41k samples tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in range(N): x = tmpX[n,:].reshape(1,D) y = tmpY[n,:].reshape(1,10) p_y = forward(x, W, b) gW = gradW(y, p_y, x) gb = gradb(y, p_y) W += lr*(gW - reg*W) b += lr*(gb - reg*b) dt = (datetime.now() - t0).total_seconds() dt2 = dt - last_dt_calculated_loss if dt2 > avg_interval_dt: last_dt_calculated_loss = dt p_y_test = forward(Xtest, W, b) test_loss = cost(p_y_test, Ytest_ind) test_losses_sgd.append([dt, test_loss]) # time to quit if dt > max_dt: done = True break if done: break if (i + 1) % 1 == 0: print("Cost at iteration %d: %.6f" % (i + 1, test_loss)) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for SGD:", datetime.now() - t0) # 3. mini-batch W = W0.copy() b = np.zeros(10) test_losses_batch = [] batch_sz = 500 lr = 0.08 reg = 0. n_batches = int(np.ceil(N / batch_sz)) t0 = datetime.now() last_dt_calculated_loss = 0 done = False for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j*batch_sz:(j + 1)*batch_sz,:] y = tmpY[j*batch_sz:(j + 1)*batch_sz,:] p_y = forward(x, W, b) current_batch_sz = len(x) gW = gradW(y, p_y, x) / current_batch_sz gb = gradb(y, p_y) / current_batch_sz W += lr*(gW - reg*W) b += lr*(gb - reg*b) dt = (datetime.now() - t0).total_seconds() dt2 = dt - last_dt_calculated_loss if dt2 > avg_interval_dt: last_dt_calculated_loss = dt p_y_test = forward(Xtest, W, b) test_loss = cost(p_y_test, Ytest_ind) test_losses_batch.append([dt, test_loss]) # time to quit if dt > max_dt: done = True break if done: break if (i + 1) % 10 == 0: print("Cost at iteration %d: %.6f" % (i + 1, test_loss)) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for mini-batch GD:", datetime.now() - t0) # convert to numpy arrays test_losses_full = np.array(test_losses_full) test_losses_sgd = np.array(test_losses_sgd) test_losses_batch = np.array(test_losses_batch) plt.plot(test_losses_full[:,0], test_losses_full[:,1], label="full") plt.plot(test_losses_sgd[:,0], test_losses_sgd[:,1], label="sgd") plt.plot(test_losses_batch[:,0], test_losses_batch[:,1], label="mini-batch") plt.legend() plt.show()
def main(): X, Y, _, _ = get_transformed_data() X = X[:, :300] mu = X.mean(axis=0) std = X.std(axis=0) X = (X - mu) / std # normalize X first print "Performing logistic regression..." Xtrain = X[:-1000, ] Ytrain = Y[:-1000] Xtest = X[-1000:, ] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) #1. Full GD W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] #the whole array of lost functions with iterations. lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(50): p_y = forward(Xtrain, W, b) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "The lost sequence is given as:", LL print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for full GD:", datetime.now() - t0 #2. Stochastic W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(1): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in xrange(min(N, 500)): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if n % (N / 2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsed time for SGD:", datetime.now() - t0 # x1 = np.linspace(0, 1, len(LL)) # plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") plt.legend() plt.show() print LL #3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in xrange(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in xrange(n_batches): x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % (n_batches / 2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for batch GD:", datetime.now() - t0 x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): X, Y, _, _ = get_transformed_data() X = X[:, :300] mu = X.mean(axis=0) std = X.std(axis=0) np.place(std, std == 0, 1) X = (X - mu) / std Xtrain, Ytrain = X[:-1000], Y[:-1000] Xtest, Ytest = X[-1000:], Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) #Full W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] learning_rate = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(200): pY = forward(Xtrain, W, b) W -= learning_rate * (derivative_W(pY, Ytrain_ind, Xtrain) + reg * W) b -= learning_rate * (derivative_b(pY, Ytrain_ind) + reg * b) pYtest = forward(Xtest, W, b) ll = cost(pYtest, Ytest_ind) LL.append(ll) if i % 10 == 0: err = error_rate(pYtest, Ytest) print "Cost at iter %d: %.6f" % (i, ll) print "Error rate:", err pY = forward(Xtest, W, b) print "Final error rate:", error_rate(pY, pYtest) print "Elapsed time for full GD:", datetime.now() - t0 #SGD W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] learning_rate = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(1): # one epoch tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in xrange(min(N, 500)): x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) p_y = forward(x, W, b) W -= learning_rate * (derivative_W(p_y, y, x) + reg * W) b -= learning_rate * (derivative_b(p_y, y) + reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if n % (N / 2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final Error rate:", error_rate(p_y, Ytest) print "Elapsed time for SGD:", datetime.now() - t0 #Batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] learning_rate = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in xrange(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in xrange(n_batches): x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] p_y = forward(x, W, b) W -= learning_rate * (derivative_W(p_y, y, x) + reg * W) b -= learning_rate * (derivative_b(p_y, y) + reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % (n_batches / 2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final Error rate:", error_rate(p_y, Ytest) print "Elapsed time for Batch GD:", datetime.now() - t0 x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label='full') x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label='stochastic') x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label='batch') plt.legend() plt.show()
def main(): X, Y, _, _ = get_transformed_data() X = X[:, :300] # normalize X first mu = X.mean(axis=0) std = X.std(axis=0) X = (X - mu) / std print "Performing logistic regression..." Xtrain = X[:-1000,] Ytrain = Y[:-1000] Xtest = X[-1000:,] Ytest = Y[-1000:] N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(200): p_y = forward(Xtrain, W, b) W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W) b += lr*(gradb(Ytrain_ind, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for full GD:", datetime.now() - t0 # 2. stochastic W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in xrange(1): # takes very long since we're computing cost for 41k samples tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in xrange(min(N, 500)): # shortcut so it won't take so long... x = tmpX[n,:].reshape(1,D) y = tmpY[n,:].reshape(1,10) p_y = forward(x, W, b) W += lr*(gradW(y, p_y, x) - reg*W) b += lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if n % (N/2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for SGD:", datetime.now() - t0 # 3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N / batch_sz t0 = datetime.now() for i in xrange(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in xrange(n_batches): x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:] y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:] p_y = forward(x, W, b) W += lr*(gradW(y, p_y, x) - reg*W) b += lr*(gradb(y, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if j % (n_batches/2) == 0: err = error_rate(p_y_test, Ytest) print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest) print "Elapsted time for batch GD:", datetime.now() - t0 x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
def main(): Xtrain, Xtest, Ytrain, Ytest = get_transformed_data() print("Performing logistic regression...") N, D = Xtrain.shape Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range(50): p_y = forward(Xtrain, W, b) W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W) b += lr * (gradb(Ytrain_ind, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for full GD:", datetime.now() - t0) # 2. stochastic W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_stochastic = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() for i in range( 50): # takes very long since we're computing cost for 41k samples tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for n in range(min(N, 500)): # shortcut so it won't take so long... x = tmpX[n, :].reshape(1, D) y = tmpY[n, :].reshape(1, 10) p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_stochastic.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for SGD:", datetime.now() - t0) # 3. batch W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL_batch = [] lr = 0.0001 reg = 0.01 batch_sz = 500 n_batches = N // batch_sz t0 = datetime.now() for i in range(50): tmpX, tmpY = shuffle(Xtrain, Ytrain_ind) for j in range(n_batches): x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :] y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :] p_y = forward(x, W, b) W += lr * (gradW(y, p_y, x) - reg * W) b += lr * (gradb(y, p_y) - reg * b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL_batch.append(ll) if i % 1 == 0: err = error_rate(p_y_test, Ytest) if i % 10 == 0: print("Cost at iteration %d: %.6f" % (i, ll)) print("Error rate:", err) p_y = forward(Xtest, W, b) print("Final error rate:", error_rate(p_y, Ytest)) print("Elapsted time for batch GD:", datetime.now() - t0) x1 = np.linspace(0, 1, len(LL)) plt.plot(x1, LL, label="full") x2 = np.linspace(0, 1, len(LL_stochastic)) plt.plot(x2, LL_stochastic, label="stochastic") x3 = np.linspace(0, 1, len(LL_batch)) plt.plot(x3, LL_batch, label="batch") plt.legend() plt.show()
Ytrain_ind = y2indicator(Ytrain) Ytest_ind = y2indicator(Ytest) # 1. full W = np.random.randn(D, 10) / 28 b = np.zeros(10) LL = [] lr = 0.0001 reg = 0.01 t0 = datetime.now() <<<<<<< HEAD for i in xrange(200): ======= for i in range(200): >>>>>>> upstream/master p_y = forward(Xtrain, W, b) W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W) b += lr*(gradb(Ytrain_ind, p_y) - reg*b) p_y_test = forward(Xtest, W, b) ll = cost(p_y_test, Ytest_ind) LL.append(ll) if i % 10 == 0: err = error_rate(p_y_test, Ytest) <<<<<<< HEAD print "Cost at iteration %d: %.6f" % (i, ll) print "Error rate:", err p_y = forward(Xtest, W, b) print "Final error rate:", error_rate(p_y, Ytest)