Exemplo n.º 1
0
def move_robot():
    global x_deviation, y_max, tolerance
    
    if(abs(x_deviation)<tolerance):
        if(y_max>0.9):
            ut.red_light("ON")
            ut.stop()
            print("reached person...........")
    
        else:
            ut.red_light("OFF")
            ut.forward()
            print("moving robot ...FORWARD....!!!!!!!!!!!!!!")
    
    
    else:
        ut.red_light("OFF")
        if(x_deviation>=tolerance):
            delay1=get_delay(x_deviation)
                
            ut.left()
            time.sleep(delay1)
            ut.stop()
            print("moving robot ...Left....<<<<<<<<<<")
    
                
        if(x_deviation<=-1*tolerance):
            delay1=get_delay(x_deviation)
                
            ut.right()
            time.sleep(delay1)
            ut.stop()
            print("moving robot ...Right....>>>>>>>>")
def benchmark_pca():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = np.zeros((N, 10))
    for i in range(N):
        Ytrain_ind[i, Ytrain[i]] = 1

    Ntest = len(Ytest)
    Ytest_ind = np.zeros((Ntest, 10))
    for i in range(Ntest):
        Ytest_ind[i, Ytest[i]] = 1

    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    LLtest = []
    CRtest = []

    # D = 300 -> error = 0.07
    lr = 0.0001
    reg = 0.01
    for i in range(200):
        p_y = forward(Xtrain, W, b)
        # print "p_y:", p_y
        ll = cost(p_y, Ytrain_ind)
        LL.append(ll)

        p_y_test = forward(Xtest, W, b)
        lltest = cost(p_y_test, Ytest_ind)
        LLtest.append(lltest)

        err = error_rate(p_y_test, Ytest)
        CRtest.append(err)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
        if i % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i, ll))
            print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    iters = range(len(LL))
    plt.plot(iters, LL, label='train loss')
    plt.plot(iters, LLtest, label='test loss')
    plt.title('Loss')
    plt.legend()
    plt.show()
    plt.plot(CRtest)
    plt.title('Error')
    plt.show()
def benchmark_full():
    Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()

    print("Performing logistic regression...")
    # lr = LogisticRegression(solver='lbfgs')

    # convert Ytrain and Ytest to (N x K) matrices of indicator variables
    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    LLtest = []
    CRtest = []

    # reg = 1
    # learning rate 0.0001 is too high, 0.00005 is also too high
    # 0.00003 / 2000 iterations => 0.363 error, -7630 cost
    # 0.00004 / 1000 iterations => 0.295 error, -7902 cost
    # 0.00004 / 2000 iterations => 0.321 error, -7528 cost

    # reg = 0.1, still around 0.31 error
    # reg = 0.01, still around 0.31 error
    lr = 0.00004
    reg = 0.01
    for i in range(500):
        p_y = forward(Xtrain, W, b)
        # print "p_y:", p_y
        ll = cost(p_y, Ytrain_ind)
        LL.append(ll)

        p_y_test = forward(Xtest, W, b)
        lltest = cost(p_y_test, Ytest_ind)
        LLtest.append(lltest)

        err = error_rate(p_y_test, Ytest)
        CRtest.append(err)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
        if i % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i, ll))
            print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    iters = range(len(LL))
    plt.plot(iters, LL, iters, LLtest)
    plt.show()
    plt.plot(CRtest)
    plt.show()
Exemplo n.º 4
0
def sgd_batch():
    """
    use util functions to run the logistic classification with bp
    """
    
    X_train, Y_train, X_test, Y_test = get_transformed_digit()
    
    N,D = X_train.shape
    yindi_train = y2indicator(Y_train)
    yindi_test = y2indicator(Y_test)
    
    M = yindi_test.shape[1]
    
    W = np.random.rand(D,M)
    b = np.random.rand(M)
    
    cost_train = []
    cost_test = []
    error_test = []
    
    eta = 1e-4
    penalty = 1e-2

    batch_size = 500
    batch_num = N // batch_size

    #batch
    for i in range(500):
        X_shuffle,Y_train_shuffle = shuffle(X_train,yindi_train)
        for ii in range(int(batch_num)):
            # x_tem = X_shuffle[ii].reshape(1,D)
            # y_tem = Y_train_shuffle[ii].reshape(1,10)

            x_tem = X_shuffle[int(i*batch_size):int((i+1)*batch_size)]
            y_tem = Y_train_shuffle[int(i*batch_size):int((i+1)*batch_size)]

            y_fit = forward(x = x_tem,w=W,b=b)
            
            W += eta*(deri_w(t_matrix = y_tem, y_matrix = y_fit,x = x_tem)-penalty*W)
            b += eta*(deri_b(t_matrix = y_tem, y_matrix = y_fit)-penalty*b)

            p_y_test = forward(x = X_test,w=W,b=b)
            cost_test_tem = cost(y_matrix = p_y_test,t_matrix = yindi_test)
            cost_test.append(cost_test_tem)

            if ii % 100 == 0:
                error_tem = error_rate(y_matrix = p_y_test, target = Y_test)
                print("the error rate in "+str(ii)+" iteration is :"+str(error_tem))
    
    p_y_final = forward(x = X_test,w=W,b=b)
    error_final = error_rate(y_matrix = p_y_final, target = Y_test)
    print("the final error rate is "+str(error_final))
def move_robot():
    global x_deviation, y_deviation, tolerance, arr_track_data
    
    print("moving robot .............!!!!!!!!!!!!!!")
    print(x_deviation, y_deviation, tolerance, arr_track_data)
    
    if(abs(x_deviation)<tolerance and abs(y_deviation)<tolerance):
        cmd="Stop"
        delay1=0
        ut.stop()
        ut.red_light("ON")
    
    else:
        ut.red_light("OFF")
        if (abs(x_deviation)>abs(y_deviation)):
            if(x_deviation>=tolerance):
                cmd="Move Left"
                delay1=get_delay(x_deviation,'l')
                
                ut.left()
                time.sleep(delay1)
                ut.stop()
                
            if(x_deviation<=-1*tolerance):
                cmd="Move Right"
                delay1=get_delay(x_deviation,'r')
                
                ut.right()
                time.sleep(delay1)
                ut.stop()
        else:
            
            if(y_deviation>=tolerance):
                cmd="Move Forward"
                delay1=get_delay(y_deviation,'f')
                
                ut.forward()
                time.sleep(delay1)
                ut.stop()
                
            if(y_deviation<=-1*tolerance):
                cmd="Move Backward"
                delay1=get_delay(y_deviation,'b')
                
                ut.back()
                time.sleep(delay1)
                ut.stop()
    
    
    arr_track_data[4]=cmd
    arr_track_data[5]=delay1
Exemplo n.º 6
0
    def update(self, dt):
        core.Model.update(self, dt)
        self.rotation += self.rot_inc * dt

        if self.stage == Fruit.IN_HAND:
            self.move_to_hand()

        elif self.stage == Fruit.FLYING:
            self.x += self.speed[0] * dt
            self.y += self.speed[1] * dt
            self.speed = (self.speed[0], self.speed[1] + conf.gravity * dt)

            # check if out of screen
            if self.x < -conf.fruit.dimensions[0]/2 or self.y > conf.scene_height + conf.fruit.dimensions[1]/2:
                self.game_scene.create_yousuck()
                self.dont_keep()

            # check if should be eaten
            if self.monkey.state == Monkey.CLOSED:
                x, y = util.forward( (self.monkey.x, self.monkey.y - conf.monkey.dimensions[1]), conf.monkey.mouth_tweak.amount, conf.monkey.mouth_tweak.direction )
                dist = (self.x - x)**2 + (self.y - y)**2

                if dist <= conf.collision.fruit_monkey:
                    self.stage = Fruit.EATEN
                    self.rot_inc *= conf.fruit.rot_inc_extra

        elif self.stage == Fruit.EATEN:
            self.size_factor -= conf.fruit.shrink * dt
            if self.size_factor <= 0:
                self.size_factor = 0
                self.dont_keep()
Exemplo n.º 7
0
    def train(self, X, Y, activation=1, lr=10e-7, reg=10e-7, epoch=10):
        N, D = X.shape  #Diamentionality of our data
        batch_size = 500
        n_batches = int(N / batch_size)
        ind = tar2ind(
            Y
        )  # WE convert our target array into indicator matrix using one hot encoding
        _, K = ind.shape

        self.W1 = np.random.randn(D, self.M) / np.sqrt(
            D)  #Input to hidden weight
        self.W2 = np.random.randn(self.M, K) / np.sqrt(
            self.M)  #Hidden to output weights
        self.b1 = np.random.randn(self.M)
        self.b2 = np.random.randn(K)
        dW2 = 0
        db2 = 0
        dW1 = 0
        db1 = 0
        mu = 0.9  # Momentum
        decay_rate = 0.99

        cost = []
        for n in range(0, 200):
            #tempx , tempy = shuffle(X, ind)
            for i in range(0, n_batches):
                X_tr = X[i * batch_size:(i * batch_size + batch_size), :]
                Y_tr = Y[i * batch_size:(i * batch_size + batch_size), ]
                ind = tar2ind(Y_tr)
                output, hidden = forward(X_tr, activation, self.W1, self.b1,
                                         self.W2, self.b2)

                #Performing backpropagation now
                dW2 = mu * dW2 + lr * (derivative_W2(ind, output, hidden, reg,
                                                     self.W2))
                self.W2 = self.W2 + dW2
                db2 = mu * db2 + lr * (derivative_b2(ind, output, reg,
                                                     self.b2))
                self.b2 = self.b2 + db2
                dW1 = mu * dW1 + lr * (derivative_W1(
                    ind, output, hidden, self.W2, X_tr, activation, reg,
                    self.W1))
                self.W1 = self.W1 + dW1
                db1 = mu * db1 + lr * (derivative_b1(
                    ind, output, hidden, self.W2, activation, reg, self.b1))
                self.b1 = self.b1 + db1
                c = cross_entropy(ind, output)
                cost.append(c)

                if i % 10 == 0:
                    result = np.argmax(output, axis=1)
                    r = classification_rate(Y_tr, result)
                    print("iteration:- ", i, "cost:- ", c,
                          "classification rate:- ", r)
def move_robot():
    global x_deviation, y_max, tolerance, arr_track_data

    print("moving robot .............!!!!!!!!!!!!!!")
    print(x_deviation, tolerance, arr_track_data)

    y = 1 - y_max  #distance from bottom of the frame

    if (abs(x_deviation) < tolerance):
        delay1 = 0
        if (y < 0.1):
            cmd = "Stop"
            ut.red_light("ON")
            ut.stop()
        else:
            cmd = "forward"
            ut.red_light("OFF")
            ut.forward()

    else:
        ut.red_light("OFF")
        if (x_deviation >= tolerance):
            cmd = "Move Left"
            delay1 = get_delay(x_deviation)

            ut.left()
            time.sleep(delay1)
            ut.stop()

        if (x_deviation <= -1 * tolerance):
            cmd = "Move Right"
            delay1 = get_delay(x_deviation)

            ut.right()
            time.sleep(delay1)
            ut.stop()

    arr_track_data[4] = cmd
    arr_track_data[5] = delay1
Exemplo n.º 9
0
    def render(self, screen):
        HasImageView.render(self, screen)
        for model in self.models:
            x, y = model.x, model.y
            x = int(x * conf.factor_width)
            y = int(y * conf.factor_height)

            rotated_img = pygame.transform.rotate(self.image, model.rotation)
            dimensions = int(rotated_img.get_rect().width * conf.factor_width), int(rotated_img.get_rect().height * conf.factor_height)
            rotated_img = pygame.transform.smoothscale(rotated_img, dimensions)

            rot_rect = rotated_img.get_rect().center
            x -= rot_rect[0]
            y -= rot_rect[1]

            # move (x, y) to the edge of the arm
            half = conf.arm.dimensions[0] / 2
            x, y = util.forward((x, y), half, model.rotation)

            screen.blit(rotated_img, (x, y))
Exemplo n.º 10
0
    def merge_empty(self,other):
        """ Merges this occupied region with an empty region, resolving potentially ambiguous corners """

        side = self.region.index(other.center())

        self.region.merge(other)
        prev_side = util.backward(side)
        next_side = util.forward(side)

        if self.open[side]:        

            if self.ambi[side]:
                self.open[prev_side].append(self.open[side].pop(0))
                self.ambi[side] = False

            if self.ambi[next_side]:
                self.open[next_side].insert(0, self.open[side].pop())
                self.ambi[next_side] = False

            if self.open[side]:        
                raise TopologicalImpossibility()
Exemplo n.º 11
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)


    # 2. stochastic
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(50): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)


    # 3. batch
    W = np.random.randn(D, 10) / np.sqrt(D)
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Exemplo n.º 12
0
def main():
	X, Y, _, _  = get_transformed_data()
	#First 300 factors
	X = X[:,:300]


	# normalize X first
	mu  = X.mean(axis=0)
	std = X.std(axis=0)
	X = (X-mu) / std 


	print("Performing logistic regression...")	
	
	Xtrain = X[:-1000,]
	Ytrain = Y[:-1000]
	Xtest = X[-1000:,]
	Ytest = Y[-1000:]
	

	N, D = Xtrain.shape
	Ytrain_ind = y2indicator(Ytrain)
	Ytest_ind = y2indicator(Ytest)
	

	#1. full gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL = [] 
	

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(200):
		p_y = forward(Xtrain, W, b)
	

		W+=  lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
		b+=  lr*(gradb(Ytrain_ind, p_y) - reg*b)
		
		p_y_test = forward(Xtest, W, b)
		ll = cost(p_y_test, Ytest_ind)
		LL.append(ll)


		err = error_rate(p_y_test, Ytest)
		
		if i % 10 ==0:
			print("FULL Cost a iteration %d: %.6f" %(i,ll))
			print("FULL Error rate:", err)

	p_y = forward(Xtest, W, b)				
	print("FULL Final error rate", error_rate(p_y, Ytest))
	print("FULL GD time", (datetime.now() - t0))
	
	#2. Stochastic gradient descent
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_stochastic = [] 

	lr = 0.0001
	reg = 0.01 
	t0 = datetime.now()
	for i in range(1):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for n in xrange(min(N,500)):
		
			x = tmpX[n, :].reshape(1, D)
			y = tmpY[n, :].reshape(1, 10)
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_stochastic.append(ll)

			err = error_rate(p_y_test, Ytest)	
			if n % int(N/2) ==0:
				print("STOCHASTIC Cost a iteration %d: %.6f" %(i,ll))
				print("STOCHASTIC Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("STOCHASTIC Final error rate", error_rate(p_y, Ytest))
	print("STOCHASTIC GD time", (datetime.now() - t0))	


	#3. batch
	W = np.random.randn(D, 10) / 28
	b = np.zeros(10)
	LL_batch = [] 

	lr = 0.0001
	reg = 0.01 
	
	batch_sz = 500
	n_batches = N / batch_sz

	t0 = datetime.now()
	for i in range(50):
		tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
		
		for j in xrange(n_batches):
		
			x = tmpX[j*batch_sz:((j+1)*batch_sz), :]
			y = tmpY[j*batch_sz:((j+1)*batch_sz), :]
			p_y = forward(x, W, b)

			
			W+=  lr*(gradW(y, p_y, x) - reg*W)
			b+=  lr*(gradb(y, p_y) - reg*b)
			p_y_test = forward(Xtest, W, b)
			ll = cost(p_y_test, Ytest_ind)
			LL_batch.append(ll)

		
			
			if j % int(n_batches/2) ==0:
				err = error_rate(p_y_test, Ytest)		
				print("BATCH Cost a iteration %d: %.6f" %(i,ll))
				print("BATCH Error rate:", err)
	
	p_y = forward(Xtest, W, b)				
	print("BATCH Final error rate", error_rate(p_y, Ytest))
	print("BATCH GD time", (datetime.now() - t0))	


	

	x1 = np.linspace(0, 1, len(LL))
	plt.plot(x1, LL, label='full')
	x2 = np.linspace(0, 1, len(LL_stochastic))
	plt.plot(x2, LL_stochastic, label='stochastic')
	x3 = np.linspace(0, 1, len(LL_batch))
	plt.plot(x3, LL_batch, label='batch')
	plt.legend()
	plt.show()
def main():
    # get PCA transformed data
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]  # the first 300 features

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(
        D, 10
    ) / 28  # we're setting our initial weights to be pretty small, proportional to the square root of the dimensionality
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        # do a forward pass on the test set so that we can calculate the cost on the test set and then plot that
        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:  # calculate the error rate on every 10 iterations
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(
            1):  # takes very long since we're computing cost for 41k samples
        # on each pass, we typically want to shuffle through the training data and the labels
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        # we're actually only going to go through 500 samples because its slow
        for n in xrange(min(N, 500)):  # shortcut so it won't take so long...
            # reshape x into a 2 dimensional matrix
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            # forward pass to get the output
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (
                    N / 2
            ) == 0:  # calculate the error rate once for every N/2 samples
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            # get the current batches input and targets
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            # forward pass to get the output predictions
            p_y = forward(x, W, b)

            # Gradient descent
            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (
                    n_batches / 2
            ) == 0:  # print error rate at every (number of batches)/2 iterations
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():

    # compare 3:
    # 1. batch SGD
    # 2. batch SGD with momentum
    # 3. batch SGD with Nesterov momentum
    # all with L2 regularization

    print_period = 10

    X, Y = get_normalized_data()
    lr = 0.00004

    reg = 0.01

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 300
    K = 10

    W1 = np.random.randn(D, M) / 28
    b1 = np.zeros(M)
    W2 = np.random.randn(M, K) / np.sqrt(M)
    b2 = np.zeros(K)

    W1_0 = W1.copy()
    b1_0 = b1.copy()
    W2_0 = W2.copy()
    b2_0 = b2.copy()

    # regular batch gradient descend

    epochs = 30

    tr_costs = []
    errors_batch = []
    losses_test = []
    batch_size = 500
    number_batches = int(N // batch_size)

    #max_iter = 30
    # 1.
    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            W2 -= lr * (derivative_w2(z_tr, ytr, ytr_pred) + reg * W2)
            b2 -= lr * (derivative_b2(ytr, ytr_pred) + reg * b2)
            W1 -= lr * (derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1)
            b1 -= lr * (derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1)

            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs, label='tr_costs')
    plt.plot(losses_test, label='losses_test')
    #plt.plot(errors_batch, label='errors_batch')
#    plt.show()
#    print("tr_costs", tr_costs)
    print("Final error rate:", error_rate(pY, Ytest))

    # 2.
    W1 = W1_0.copy()
    b1 = b1_0.copy()
    W2 = W2_0.copy()
    b2 = b2_0.copy()

    # regular batch gradient descend

    tr_costs_momentum = []
    errors_batch_momentum = []
    losses_test_momentum = []

    # momentum coeficient
    mu = 0.9

    dW1 = 0
    dW2 = 0
    db1 = 0
    db2 = 0

    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            # gradients
            gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2
            gb2 = derivative_b2(ytr, ytr_pred) + reg * b2
            gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1
            gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1

            # update velocity
            dW2 = mu * dW2 - lr * gW2
            db2 = mu * db2 - lr * gb2
            dW1 = mu * dW1 - lr * gW1
            db1 = mu * db1 - lr * gb1

            # update
            W2 += dW2
            W1 += dW1
            b2 += db2
            b1 += db1
            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test_momentum.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch_momentum.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs_momentum.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs_momentum, label='tr_costs momentum')
    plt.plot(losses_test_momentum, label='losses_test momentum')
    #plt.plot(errors_batch, label='errors_batch')
    # plt.show()
#    print("tr_costs", errors_batch_momentum)
    print("Final error rate:", error_rate(pY, Ytest))


# 3.

    W1 = W1_0.copy()
    b1 = b1_0.copy()
    W2 = W2_0.copy()
    b2 = b2_0.copy()

    # regular batch gradient descend

    tr_costs_nesterov = []
    errors_batch_nesterov = []
    losses_test_nesterov = []

    # momentum coeficient
    mu = 0.9

    vW1 = 0
    vW2 = 0
    vb1 = 0
    vb2 = 0

    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            # gradients
            gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2
            gb2 = derivative_b2(ytr, ytr_pred) + reg * b2
            gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1
            gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1

            # update velocity
            vW2 = mu * vW2 - lr * gW2
            vb2 = mu * vb2 - lr * gb2
            vW1 = mu * vW1 - lr * gW1
            vb1 = mu * vb1 - lr * gb1

            # update
            W2 += mu * vW2 - lr * gW2
            W1 += mu * vW1 - lr * gW1
            b2 += mu * vb2 - lr * gb2
            b1 += mu * vb1 - lr * gb1

            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test_nesterov.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch_nesterov.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs_nesterov.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs_nesterov, label='tr_costs_nesterov')
    plt.plot(losses_test_nesterov, label='losses_test_nesterov')
    #plt.plot(errors_batch_nesterov, label='errors_batch')
    plt.legend()
    plt.show()
#    print("tr_costs_nesterov", errors_batch_momentum)
    print("Final error rate nesterov:", error_rate(pY, Ytest))
Exemplo n.º 15
0
def main():

    # compare 3:
    # 1. batch SGD
    # 2. batch SGD with momentum
    # 3. batch SGD with Nesterov momentum
    # all with L2 regularization

    X, Y = get_normalized_data()

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 300
    K = 10
    ######################IMPORTANETE PARAMETERE ####################
    t = 1  # !!!!!!!!!!!!!!!!
    ###############################################################
    epochs = 20
    print_period = 10
    lr0 = 0.001
    reg = 0.01
    epsilon = 1e-8  # is it the same as 10e-8

    beta1 = 0.9  # mu = 0.9
    beta2 = 0.999  # decay = 0.999
    batch_size = 500
    number_batches = int(N // batch_size)

    W1 = np.random.randn(D, M) / 28
    b1 = np.zeros(M)
    W2 = np.random.randn(M, K) / np.sqrt(M)
    b2 = np.zeros(K)

    tr_costs_momentum = []
    errors_batch_momentum = []
    losses_test_momentum = []

    # momentum coeficient

    mW2 = 0
    mW1 = 0
    mb2 = 0
    mb1 = 0

    vW1 = 0
    vW2 = 0
    vb1 = 0
    vb2 = 0

    mW2_hat = 0
    mW1_hat = 0
    mb2_hat = 0
    mb1_hat = 0

    vW1_hat = 0
    vW2_hat = 0
    vb1_hat = 0
    vb2_hat = 0

    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            # gradients
            gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2
            gb2 = derivative_b2(ytr, ytr_pred) + reg * b2
            gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1
            gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1

            # update momentum
            mW2 = beta1 * mW2 + (1 - beta1) * gW2
            mW1 = beta1 * mW1 + (1 - beta1) * gW1
            mb2 = beta1 * mb2 + (1 - beta1) * gb2
            mb1 = beta1 * mb1 + (1 - beta1) * gb1

            # update velocity
            vW2 = beta2 * vW2 + (1 - beta2) * gW2 * gW2
            vb2 = beta2 * vb2 + (1 - beta2) * gb2 * gb2
            vW1 = beta2 * vW1 + (1 - beta2) * gW1 * gW1
            vb1 = beta2 * vb1 + (1 - beta2) * gb1 * gb1

            # bias correction
            correction1 = (1 - beta1**t)
            mW2_hat = mW2 / correction1
            mW1_hat = mW1 / correction1
            mb2_hat = mb2 / correction1
            mb1_hat = mb1 / correction1

            correction2 = (1 - beta2**t)
            vW2_hat = vW2 / correction2
            vW1_hat = vW1 / correction2
            vb2_hat = vb2 / correction2
            vb1_hat = vb1 / correction2

            # update t !!!!!!!
            t += 1

            # update
            W2 -= lr0 * (mW2_hat / np.sqrt(vW2_hat + epsilon))
            W1 -= lr0 * (mW1_hat / np.sqrt(vW1_hat + epsilon))
            b2 -= lr0 * (mb2_hat / np.sqrt(vb2_hat + epsilon))
            b1 -= lr0 * (mb1_hat / np.sqrt(vb1_hat + epsilon))

            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test_momentum.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" %
                      (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch_momentum.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs_momentum.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs_momentum, label='tr_costs momentum')
    plt.plot(losses_test_momentum, label='losses_test momentum RMS')
    #plt.plot(errors_batch, label='errors_batch')
    # plt.show()
    #    print("tr_costs", errors_batch_momentum)
    print("Final error rate:", error_rate(pY, Ytest))
    plt.legend()
    plt.show()
Exemplo n.º 16
0
 def predict(self, X, activation=1):
     output, _ = forward(X, activation, self.W1, self.b1, self.W2, self.b2)
     return np.argmax(output, axis=1)
Exemplo n.º 17
0
 def move_to_hand(self):
     """put the fruit inside the arm's palm"""
     x, y = self.arm.x, self.arm.y
     x, y = util.forward( (x, y), conf.arm.dimensions[0], self.arm.rotation) # to the edge of arm
     x, y = util.forward( (x, y), conf.arm.fruit_tweak.amount, self.arm.rotation + conf.arm.fruit_tweak.direction) # move a bit in the tweaking's direction
     self.x, self.y  = x, y
Exemplo n.º 18
0
def main():
    X_train, X_test, t_train, t_test = get_pca_normalized_data()
    print("Performing multi-class logistic regression...\n")

    N, D = X_train.shape
    K = 10
    T_train = T_indicator(t_train)
    T_test = T_indicator(t_test)

    lr = float(sys.argv[1])
    reg = float(sys.argv[2])
    batch_size = int(sys.argv[3])

    ######## 1. FULL GRADIENT DESCENT ########
    print('Full Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_full = []
    t0 = datetime.now()
    for epoch in range(50):
        Y_train = forward(X_train, W, b)
        W -= lr * (gradW(T_train, Y_train, X_train) - reg * W)
        b -= lr * (gradb(T_train, Y_train) - reg * b)

        Y_test = forward(X_test, W, b)
        j_test = J(T_test, Y_test)
        J_test_full.append(j_test)

        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}:\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test), t_test))
    print("Elapsted time for full GD: {}\n".format(datetime.now() - t0))

    ######## 2. STOCHASTIC GRADIENT DESCENT ########
    print('Stochastic Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_stochastic = []
    t0 = datetime.now()
    for epoch in range(
            50):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpT = shuffle(X_train, T_train)
        for n in range(min(N, 500)):  # shortcut so it won't take so long...
            x = tmpX[n, :].reshape(1, D)
            t = tmpT[n, :].reshape(1, 10)
            Y_train = forward(x, W, b)

            W -= lr * (gradW(t, Y_train, x) - reg * W)
            b -= lr * (gradb(t, Y_train) - reg * b)

            Y_test = forward(X_test, W, b)
            j_test = J(T_test, Y_test)
            J_test_stochastic.append(j_test)

        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}:\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test_final = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test_final), t_test))
    print("Elapsted time for SGD: {}\n".format(datetime.now() - t0))

    ######## 3. BATCH GRADIENT DESCENT ########
    print('Batch Gradient Descent')
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    J_test_batch = []
    nb_batches = N // batch_size
    t0 = datetime.now()
    for epoch in range(50):
        tmpX, tmpT = shuffle(X_train, T_train)
        for batch_index in range(nb_batches):
            x = tmpX[batch_index * batch_size:(batch_index * batch_size +
                                               batch_size), :]
            t = tmpT[batch_index * batch_size:(batch_index * batch_size +
                                               batch_size), :]
            Y_train = forward(x, W, b)

            W -= lr * (gradW(t, Y_train, x) - reg * W)
            b -= lr * (gradb(t, Y_train) - reg * b)

            Y_test = forward(X_test, W, b)
            j_test = J(T_test, Y_test)
            J_test_batch.append(j_test)
        if epoch % 1 == 0:
            err = accuracy(predict(Y_test), t_test)
            if epoch % 10 == 0:
                print("Epoch {}\tcost: {}\taccuracy: {}".format(
                    epoch, round(j_test, 4), err))
    Y_test_final = forward(X_test, W, b)
    print("Final accuracy:", accuracy(predict(Y_test_final), t_test))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    ######## PLOTS ########
    x1 = np.linspace(0, 1, len(J_test_full))
    plt.plot(x1, J_test_full, label="full")
    x2 = np.linspace(0, 1, len(J_test_stochastic))
    plt.plot(x2, J_test_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(J_test_batch))
    plt.plot(x3, J_test_batch, label="batch")
    plt.legend()
    #plt.savefig('full_vs_stoch_vs_batch_lr={}_reg={}_batch_size={}.png'.format(lr, reg, batch_size))
    plt.show()
Exemplo n.º 19
0
threshold = 0.05  # stopping criteria

for i, (im, file_name) in enumerate(dataset_loader):
    im = im.cuda()

    # Prepare hints, mask, and get current classification
    data, target = util.get_colorization_data(im, opt, model, classifier)
    opt.target = opt.target if opt.targeted else target
    optimizer = torch.optim.Adam(
        [data['hints'].requires_grad_(), data['mask'].requires_grad_()],
        lr=opt.lr,
        betas=(0.9, 0.999))

    prev_diff = 0
    for itr in range(opt.num_iter):
        out_rgb, y = util.forward(model, classifier, opt, data)
        val, idx, labels = util.compute_class(opt, y)
        loss = util.compute_loss(opt, y, criterion)
        print(f'[{itr+1}/{opt.num_iter}] Loss: {loss:.3f} Labels: {labels}')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print("%.5f" % (loss.item()))

        diff = val[0] - val[1]

        if opt.targeted:
            if idx[0] == opt.target and diff > threshold and (
                    diff - prev_diff).abs() < 1e-3:
                break
        else:
Exemplo n.º 20
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print("Performing logistic regression...")

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # # 1. full
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL = []
    # lr = 0.0001
    # reg = 0.01
    # t0 = datetime.now()
    # for i in xrange(200):
    #     p_y = forward(Xtrain, W, b)
    #
    #     W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
    #     b += lr * (gradb(Ytrain_ind, p_y) - reg * b)
    #
    #
    #     p_y_test = forward(Xtest, W, b)
    #     ll = cost(p_y_test, Ytest_ind)
    #     LL.append(ll)
    #     if i % 10 == 0:
    #         err = error_rate(p_y_test, Ytest)
    #         print("Cost at iteration %d: %.6f" % (i, ll))
    #         print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for full GD:", datetime.now() - t0)
    #
    #
    # # 2. stochastic
    # W = np.random.randn(D, 10) / 28
    # b = np.zeros(10)
    # LL_stochastic = []
    # lr = 0.0001
    # reg = 0.01
    #
    # t0 = datetime.now()
    # for i in range(1): # takes very long since we're computing cost for 41k samples
    #     tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
    #     for n in range(min(N, 500)): # shortcut so it won't take so long...
    #         x = tmpX[n,:].reshape(1,D)
    #         y = tmpY[n,:].reshape(1,10)
    #         p_y = forward(x, W, b)
    #
    #         W += lr*(gradW(y, p_y, x) - reg*W)
    #         b += lr*(gradb(y, p_y) - reg*b)
    #
    #         p_y_test = forward(Xtest, W, b)
    #         ll = cost(p_y_test, Ytest_ind)
    #         LL_stochastic.append(ll)
    #
    #         if n % (N/2) == 0:
    #             err = error_rate(p_y_test, Ytest)
    #             print("Cost at iteration %d: %.6f" % (i, ll))
    #             print("Error rate:", err)
    # p_y = forward(Xtest, W, b)
    # print("Final error rate:", error_rate(p_y, Ytest))
    # print("Elapsted time for SGD:", datetime.now() - t0)
    #
    #
    # # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j + 1) * batch_sz, :]
            y = tmpY[j * batch_sz:(j + 1) * batch_sz, :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Exemplo n.º 21
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. Full GD
    W = np.random.randn(
        D, 10) / 28  # Square root of no. of dimentionality. i.e. 28 * 28 = 784
    b = np.zeros(10)
    loss_batch = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in range(epoch):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        temp_loss = cost(p_y_test, Ytest_ind)
        loss_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)
    print("=======================================================")

    # 2. Stochastic GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in range(
            epoch
    ):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        #for n in range(min(N, 500)): # shortcut so it won't take so long...
        for n in range(N):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            loss = cost(p_y_test, Ytest_ind)
            loss_stochastic.append(loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)
    print("=======================================================")

    # 3. Mini-batch GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    loss_mini_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()

    for i in range(epoch):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)

        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            temp_loss = cost(p_y_test, Ytest_ind)
            loss_mini_batch.append(temp_loss)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, temp_loss))
                print("Error rate:", err)

    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for mini-batch GD:", datetime.now() - t0)

    # Plot graph
    x1 = np.linspace(0, 1, len(loss_batch))
    plt.plot(x1, loss_batch, label="full(batch) GD")
    x2 = np.linspace(0, 1, len(loss_stochastic))
    plt.plot(x2, loss_stochastic, label="stochastic GD")
    x3 = np.linspace(0, 1, len(loss_mini_batch))
    plt.plot(x3, loss_mini_batch, label="mini-batch GD")
    plt.legend()
    plt.show()
Exemplo n.º 22
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()

    print('logistic regression')

    # randomly assign weights

    N, D = Xtrain.shape

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 10
    scale = 28

    # full grad descent
    W, b = initwb(D, M, scale)

    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(200):
        P_Y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, P_Y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, P_Y) - reg * b)

        P_Y_test = forward(Xtest, W, b)
        ll = cost(P_Y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(P_Y_test, Ytest)
            print("cost at iter:  %d:  %.6f" % (i, ll))
            print("error rate:  ", err, "\n")

    P_Y = forward(Xtest, W, b)
    print("final error:  ", error_rate(P_Y, Ytest))
    print("elapsed time for full GD:  ", datetime.now() - t0)

    # 2.  Stochastic
    W, b = initwb(D, M, scale)

    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)

            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)

            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)

    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    # batch
    W, b = initwb(D, M, scale)

    LL_batch = []
    lr = 0.001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            P_Y = forward(x, W, b)

            W += lr * (gradW(y, P_Y, x) - reg * W)
            b += lr * (gradb(y, P_Y) - reg * b)
            P_Y_test = forward(Xtest, W, b)

            ll = cost(P_Y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(P_Y_test, Ytest)
                print("Cost at iteration %d:  %6.f" % (i, ll))
                print("error rate:  ", err)
    P_Y = forward(Xtest, W, b)

    print("error rate:  ", error_rate(P_Y, Ytest))
    print("elapsed time for SGD:  ", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize the data:
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print('Performing logistic regression...')
    Xtrain, Ytrain = X[:-1000, :], Y[:-1000]
    Xtest, Ytest = X[-1000:, :], Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)
    K = len(set(Y))

    np.random.seed()

    # 1. Full Gradient Descend:
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LL = []  # a storage for costs
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    t0 = datetime.now()
    print('utilizing full GD...')
    for i in range(200):
        p_y = forward(Xtrain, W, b)

        W += lr * (grad_W(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (grad_b(Ytrain_ind, p_y).sum(axis=0) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)

        if i % 10 == 0:
            error = error_rate(p_y_test, Ytest)
            print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))
    dt1 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LL)
    plt.title('Cost for full GD')
    plt.show()
    plt.savefig('Cost_full_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for full GD:', dt1)

    # 2. Stochastic Gradien Descent
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LLstochastic = []  # a storage for costs
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    t0 = datetime.now()
    print('utilizing stochastic GD...')
    for i in range(25):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        # we consider just 500 samples, not all the dataset
        for n in range(N):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, K)
            p_y = forward(x, W, b)

            W += lr * (grad_W(y, p_y, x) - reg * W)
            b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LLstochastic.append(ll)

            if n % (N // 2) == 0:
                error = error_rate(p_y_test, Ytest)
                print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))

    dt2 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LLstochastic)
    plt.title('Cost for stochastic GD')
    plt.show()
    plt.savefig('Cost_stochastic_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for stochastic GD:', dt2)

    # 3. Batch Gradient Descent:
    W = np.random.randn(D, K) / np.sqrt(D)
    b = np.zeros(K)
    LLbatch = []
    lr = 0.0001  # learning rate
    reg = 0.01  # L2-regularization term
    batch_size = 500
    n_batches = N // batch_size
    t0 = datetime.now()
    print('utilizing batch GD...')
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_size:batch_size * (j + 1), :]
            y = tmpY[j * batch_size:batch_size * (j + 1), :]
            p_y = forward(x, W, b)

            W += lr * (grad_W(y, p_y, x) - reg * W)
            b += lr * (grad_b(y, p_y).sum(axis=0) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LLbatch.append(ll)

            if j % (n_batches // 2) == 0:
                error = error_rate(p_y_test, Ytest)
                print('i: %d, cost: %.6f, error: %.6f' % (i, ll, error))
    dt3 = datetime.now() - t0
    p_y_test = forward(Xtest, W, b)
    plt.plot(LLbatch)
    plt.title('Cost for batch GD')
    plt.show()
    plt.savefig('Cost_batch_GD.png')
    print('Final error rate:', error_rate(p_y_test, Ytest))
    print('Elapsed time for batch GD', dt3)

    # plot all costs together:
    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label='full')

    x2 = np.linspace(0, 1, len(LLstochastic))
    plt.plot(x2, LLstochastic, label='stochastic')

    x3 = np.linspace(0, 1, len(LLbatch))
    plt.plot(x3, LLbatch, label='batch')

    plt.legend()
    plt.show()
    plt.savefig('Costs_together.png')
def main():

    # compare 3:
    # 1. batch SGD
    # 2. batch SGD with momentum
    # 3. batch SGD with Nesterov momentum
    # all with L2 regularization

    X, Y = get_normalized_data()

    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    M = 300
    K = 10

    max_iter = 20
    epochs = 20
    print_period = 10
    lr0 = 0.0004
    reg = 0.01
    epsilon = 10e-10
    decay = 0.999
    batch_size = 500
    number_batches = int(N // batch_size)

    W1 = np.random.randn(D, M) / 28
    b1 = np.zeros(M)
    W2 = np.random.randn(M, K) / np.sqrt(M)
    b2 = np.zeros(K)

    W1_0 = W1.copy()
    b1_0 = b1.copy()
    W2_0 = W2.copy()
    b2_0 = b2.copy()

    cache_W2 = 1
    cache_W1 = 1
    cache_b2 = 1
    cache_b1 = 1

    tr_costs = []
    errors_batch = []
    losses_test = []

# 1. Just grad & RMSprop

   # 1.
    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            # gradients
            gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2
            gb2 = derivative_b2(ytr, ytr_pred) + reg * b2
            gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1
            gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1

            # # AdaGrad
            # cache_W2 += derivative_w2(z_tr, ytr, ytr_pred) * derivative_w2(z_tr, ytr, ytr_pred)
            # cache_W1 += derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) * derivative_w1(xtr, z_tr, ytr, ytr_pred, W2)
            # cache_b2 += derivative_b2(ytr, ytr_pred) * derivative_b2(ytr, ytr_pred)
            # cache_b1 += derivative_b1(z_tr, ytr, ytr_pred, W2) * derivative_b1(z_tr, ytr, ytr_pred, W2)

            # RMSProp
            cache_W2 += decay * cache_W2 + (1 - decay) * gW2 * gW2
            cache_W1 += decay * cache_W1 + (1 - decay) * gW1 * gW1
            cache_b2 += decay * cache_b2 + (1 - decay) * gb2 * gb2
            cache_b1 += decay * cache_b1 + (1 - decay) * gb1 * gb1

            W2 -= lr0 * (gW2 // (cache_W2 + epsilon) + reg * W2)
            b2 -= lr0 * (gb2 // (cache_b2 + epsilon) + reg * b2)
            W1 -= lr0 * (gW1 // (cache_W1 + epsilon) + reg * W1)
            b1 -= lr0 * (gb1 // (cache_b1 + epsilon) + reg * b1)

            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs, label='tr_costs')
    plt.plot(losses_test, label='losses_test RMS')
    #plt.plot(errors_batch, label='errors_batch')
#    plt.show()
#    print("tr_costs", tr_costs)
    print("Final error rate:", error_rate(pY, Ytest))


# 2. batch grad with momentum & RMSprop
#
#
    # 2.
    W1 = W1_0.copy()
    b1 = b1_0.copy()
    W2 = W2_0.copy()
    b2 = b2_0.copy()

    # regular batch gradient descend

    tr_costs_momentum = []
    errors_batch_momentum = []
    losses_test_momentum = []

    # momentum coeficient
    mu = 0.8

    cache_W2 = 1
    cache_W1 = 1
    cache_b2 = 1
    cache_b1 = 1

    dW1 = 0
    dW2 = 0
    db1 = 0
    db2 = 0

    cW1 = 0
    cW2 = 0
    cb1 = 0
    cb2 = 0

    for epoch in range(epochs):
        for j in range(number_batches):
            xtr = Xtrain[j * batch_size:(j * batch_size + batch_size), :]
            ytr = Ytrain_ind[j * batch_size:(j * batch_size + batch_size), :]
            ytr_pred, z_tr = forward(xtr, W1, b1, W2, b2)

            # gradients
            gW2 = derivative_w2(z_tr, ytr, ytr_pred) + reg * W2
            gb2 = derivative_b2(ytr, ytr_pred) + reg * b2
            gW1 = derivative_w1(xtr, z_tr, ytr, ytr_pred, W2) + reg * W1
            gb1 = derivative_b1(z_tr, ytr, ytr_pred, W2) + reg * b1

            # potencjalnie pojebalem momentum i velocity

            # RMSProp
            cache_W2 = decay * cache_W2 + (1 - decay) * gW2 * gW2
            cache_W1 = decay * cache_W1 + (1 - decay) * gW1 * gW1
            cache_b2 = decay * cache_b2 + (1 - decay) * gb2 * gb2
            cache_b1 = decay * cache_b1 + (1 - decay) * gb1 * gb1

            cW2 = (gW2 // (cache_W2) + epsilon)
            cb2 = (gb2 // (cache_b2) + epsilon)
            cW1 = (gW1 // (cache_W1) + epsilon)
            cb1 = (gb1 // (cache_b1) + epsilon)

            # update velocity
            dW2 = mu * dW2 + (1 - mu) * lr0 * cW2
            db2 = mu * db2 + (1 - mu) * lr0 * cb2
            dW1 = mu * dW1 + (1 - mu) * lr0 * cW1
            db1 = mu * db1 + (1 - mu) * lr0 * cb1

            # update
            W2 -= dW2
            W1 -= dW1
            b2 -= db2
            b1 -= db1

            if j % print_period == 0:
                yte_pred, _ = forward(Xtest, W1, b1, W2, b2)
                l = cost(yte_pred, Ytest_ind)
                losses_test_momentum.append(l)
                print("test set Cost at iteration epoch=%d, j=%d: %.6f" % (epoch, j, l))

                e = error_rate(yte_pred, Ytest)
                errors_batch_momentum.append(e)
                print("Error rate:", e)

                ctr = cost(ytr_pred, ytr)
                print("traning set cost", ctr)
                tr_costs_momentum.append(ctr)

    pY, _ = forward(Xtest, W1, b1, W2, b2)

    #plt.plot(tr_costs_momentum, label='tr_costs momentum')
    plt.plot(losses_test_momentum, label='losses_test momentum RMS')
    #plt.plot(errors_batch, label='errors_batch')
    # plt.show()
#    print("tr_costs", errors_batch_momentum)
    print("Final error rate:", error_rate(pY, Ytest))
    plt.legend()
    plt.show()
Exemplo n.º 25
0
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / np.sqrt(D)
    W0 = W.copy() # save for later
    b = np.zeros(10)
    test_losses_full = []
    lr = 0.9
    reg = 0.
    t0 = datetime.now()
    last_dt = 0
    intervals = []
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        gW = gradW(Ytrain_ind, p_y, Xtrain) / N
        gb = gradb(Ytrain_ind, p_y) / N

        W += lr*(gW - reg*W)
        b += lr*(gb - reg*b)

        p_y_test = forward(Xtest, W, b)
        test_loss = cost(p_y_test, Ytest_ind)
        dt = (datetime.now() - t0).total_seconds()

        # save these
        dt2 = dt - last_dt
        last_dt = dt
        intervals.append(dt2)

        test_losses_full.append([dt, test_loss])
        if (i + 1) % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)

    # save the max time so we don't surpass it in subsequent iterations
    max_dt = dt
    avg_interval_dt = np.mean(intervals)


    # 2. stochastic
    W = W0.copy()
    b = np.zeros(10)
    test_losses_sgd = []
    lr = 0.001
    reg = 0.

    t0 = datetime.now()
    last_dt_calculated_loss = 0
    done = False
    for i in range(50): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(N):
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            gW = gradW(y, p_y, x)
            gb = gradb(y, p_y)

            W += lr*(gW - reg*W)
            b += lr*(gb - reg*b)

            dt = (datetime.now() - t0).total_seconds()
            dt2 = dt - last_dt_calculated_loss

            if dt2 > avg_interval_dt:
                last_dt_calculated_loss = dt
                p_y_test = forward(Xtest, W, b)
                test_loss = cost(p_y_test, Ytest_ind)
                test_losses_sgd.append([dt, test_loss])

            # time to quit
            if dt > max_dt:
                done = True
                break
        if done:
            break

        if (i + 1) % 1 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)


    # 3. mini-batch
    W = W0.copy()
    b = np.zeros(10)
    test_losses_batch = []
    batch_sz = 500
    lr = 0.08
    reg = 0.
    n_batches = int(np.ceil(N / batch_sz))


    t0 = datetime.now()
    last_dt_calculated_loss = 0
    done = False
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j*batch_sz:(j + 1)*batch_sz,:]
            y = tmpY[j*batch_sz:(j + 1)*batch_sz,:]
            p_y = forward(x, W, b)

            current_batch_sz = len(x)
            gW = gradW(y, p_y, x) / current_batch_sz
            gb = gradb(y, p_y) / current_batch_sz

            W += lr*(gW - reg*W)
            b += lr*(gb - reg*b)

            dt = (datetime.now() - t0).total_seconds()
            dt2 = dt - last_dt_calculated_loss

            if dt2 > avg_interval_dt:
                last_dt_calculated_loss = dt
                p_y_test = forward(Xtest, W, b)
                test_loss = cost(p_y_test, Ytest_ind)
                test_losses_batch.append([dt, test_loss])

            # time to quit
            if dt > max_dt:
                done = True
                break
        if done:
            break

        if (i + 1) % 10 == 0:
            print("Cost at iteration %d: %.6f" % (i + 1, test_loss))
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for mini-batch GD:", datetime.now() - t0)


    # convert to numpy arrays
    test_losses_full = np.array(test_losses_full)
    test_losses_sgd = np.array(test_losses_sgd)
    test_losses_batch = np.array(test_losses_batch)

    
    plt.plot(test_losses_full[:,0], test_losses_full[:,1], label="full")
    plt.plot(test_losses_sgd[:,0], test_losses_sgd[:,1], label="sgd")
    plt.plot(test_losses_batch[:,0], test_losses_batch[:,1], label="mini-batch")
    plt.legend()
    plt.show()
Exemplo n.º 26
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    # normalize X first

    print "Performing logistic regression..."
    Xtrain = X[:-1000, ]
    Ytrain = Y[:-1000]
    Xtest = X[-1000:, ]
    Ytest = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    #1. Full GD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    #the whole array of lost functions with iterations.

    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)

        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err

    p_y = forward(Xtest, W, b)
    print "The lost sequence is given as:", LL
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0

    #2. Stochastic

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()

    for i in xrange(1):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err

    p_y = forward(Xtest, W, b)

    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for SGD:", datetime.now() - t0

    # x1 = np.linspace(0, 1, len(LL))
    # plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    plt.legend()
    plt.show()
    print LL

    #3. batch

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()

    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)
            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)

            LL_batch.append(ll)
            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Exemplo n.º 27
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    mu = X.mean(axis=0)
    std = X.std(axis=0)
    np.place(std, std == 0, 1)
    X = (X - mu) / std

    Xtrain, Ytrain = X[:-1000], Y[:-1000]
    Xtest, Ytest = X[-1000:], Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    #Full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    learning_rate = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(200):
        pY = forward(Xtrain, W, b)

        W -= learning_rate * (derivative_W(pY, Ytrain_ind, Xtrain) + reg * W)
        b -= learning_rate * (derivative_b(pY, Ytrain_ind) + reg * b)

        pYtest = forward(Xtest, W, b)
        ll = cost(pYtest, Ytest_ind)
        LL.append(ll)

        if i % 10 == 0:
            err = error_rate(pYtest, Ytest)
            print "Cost at iter %d: %.6f" % (i, ll)
            print "Error rate:", err

    pY = forward(Xtest, W, b)
    print "Final error rate:", error_rate(pY, pYtest)
    print "Elapsed time for full GD:", datetime.now() - t0

    #SGD
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    learning_rate = 0.0001
    reg = 0.01
    t0 = datetime.now()

    for i in xrange(1):  # one epoch
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)):
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W -= learning_rate * (derivative_W(p_y, y, x) + reg * W)
            b -= learning_rate * (derivative_b(p_y, y) + reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final Error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for SGD:", datetime.now() - t0

    #Batch

    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    learning_rate = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz
    t0 = datetime.now()

    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W -= learning_rate * (derivative_W(p_y, y, x) + reg * W)
            b -= learning_rate * (derivative_b(p_y, y) + reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)

            if j % (n_batches / 2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final Error rate:", error_rate(p_y, Ytest)
    print "Elapsed time for Batch GD:", datetime.now() - t0

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label='full')
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label='stochastic')
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label='batch')
    plt.legend()
    plt.show()
Exemplo n.º 28
0
def main():
    X, Y, _, _ = get_transformed_data()
    X = X[:, :300]

    # normalize X first
    mu = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mu) / std

    print "Performing logistic regression..."
    Xtrain = X[:-1000,]
    Ytrain = Y[:-1000]
    Xtest  = X[-1000:,]
    Ytest  = Y[-1000:]

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in xrange(200):
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for full GD:", datetime.now() - t0


    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in xrange(1): # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in xrange(min(N, 500)): # shortcut so it won't take so long...
            x = tmpX[n,:].reshape(1,D)
            y = tmpY[n,:].reshape(1,10)
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

            if n % (N/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for SGD:", datetime.now() - t0


    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N / batch_sz

    t0 = datetime.now()
    for i in xrange(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in xrange(n_batches):
            x = tmpX[j*batch_sz:(j*batch_sz + batch_sz),:]
            y = tmpY[j*batch_sz:(j*batch_sz + batch_sz),:]
            p_y = forward(x, W, b)

            W += lr*(gradW(y, p_y, x) - reg*W)
            b += lr*(gradb(y, p_y) - reg*b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
            if j % (n_batches/2) == 0:
                err = error_rate(p_y_test, Ytest)
                print "Cost at iteration %d: %.6f" % (i, ll)
                print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)
    print "Elapsted time for batch GD:", datetime.now() - t0



    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
def main():
    Xtrain, Xtest, Ytrain, Ytest = get_transformed_data()
    print("Performing logistic regression...")

    N, D = Xtrain.shape
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
    for i in range(50):
        p_y = forward(Xtrain, W, b)

        W += lr * (gradW(Ytrain_ind, p_y, Xtrain) - reg * W)
        b += lr * (gradb(Ytrain_ind, p_y) - reg * b)

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for full GD:", datetime.now() - t0)

    # 2. stochastic
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_stochastic = []
    lr = 0.0001
    reg = 0.01

    t0 = datetime.now()
    for i in range(
            50):  # takes very long since we're computing cost for 41k samples
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for n in range(min(N, 500)):  # shortcut so it won't take so long...
            x = tmpX[n, :].reshape(1, D)
            y = tmpY[n, :].reshape(1, 10)
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_stochastic.append(ll)

        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for SGD:", datetime.now() - t0)

    # 3. batch
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL_batch = []
    lr = 0.0001
    reg = 0.01
    batch_sz = 500
    n_batches = N // batch_sz

    t0 = datetime.now()
    for i in range(50):
        tmpX, tmpY = shuffle(Xtrain, Ytrain_ind)
        for j in range(n_batches):
            x = tmpX[j * batch_sz:(j * batch_sz + batch_sz), :]
            y = tmpY[j * batch_sz:(j * batch_sz + batch_sz), :]
            p_y = forward(x, W, b)

            W += lr * (gradW(y, p_y, x) - reg * W)
            b += lr * (gradb(y, p_y) - reg * b)

            p_y_test = forward(Xtest, W, b)
            ll = cost(p_y_test, Ytest_ind)
            LL_batch.append(ll)
        if i % 1 == 0:
            err = error_rate(p_y_test, Ytest)
            if i % 10 == 0:
                print("Cost at iteration %d: %.6f" % (i, ll))
                print("Error rate:", err)
    p_y = forward(Xtest, W, b)
    print("Final error rate:", error_rate(p_y, Ytest))
    print("Elapsted time for batch GD:", datetime.now() - t0)

    x1 = np.linspace(0, 1, len(LL))
    plt.plot(x1, LL, label="full")
    x2 = np.linspace(0, 1, len(LL_stochastic))
    plt.plot(x2, LL_stochastic, label="stochastic")
    x3 = np.linspace(0, 1, len(LL_batch))
    plt.plot(x3, LL_batch, label="batch")
    plt.legend()
    plt.show()
Exemplo n.º 30
0
    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    # 1. full
    W = np.random.randn(D, 10) / 28
    b = np.zeros(10)
    LL = []
    lr = 0.0001
    reg = 0.01
    t0 = datetime.now()
<<<<<<< HEAD
    for i in xrange(200):
=======
    for i in range(200):
>>>>>>> upstream/master
        p_y = forward(Xtrain, W, b)

        W += lr*(gradW(Ytrain_ind, p_y, Xtrain) - reg*W)
        b += lr*(gradb(Ytrain_ind, p_y) - reg*b)
        

        p_y_test = forward(Xtest, W, b)
        ll = cost(p_y_test, Ytest_ind)
        LL.append(ll)
        if i % 10 == 0:
            err = error_rate(p_y_test, Ytest)
<<<<<<< HEAD
            print "Cost at iteration %d: %.6f" % (i, ll)
            print "Error rate:", err
    p_y = forward(Xtest, W, b)
    print "Final error rate:", error_rate(p_y, Ytest)