def q_2_4():
    print("******RUNNING TITANIC DATA SET*****")

    data, test_data, feature_names, class_names = load_titanic_data()
    data = preprocess_titanic(data, True)

    perm = np.random.RandomState(seed=20).permutation((data.shape[0]))
    data = data[perm]
    data, valid = data[:800], data[800:]
    idy = data.shape[1] - 1

    type_map, categories_map = gen_maps(data)
    classifier = DecisionTree(type_map, categories_map)
    classifier.fit(data, 4, 10)
    train_predictions = classifier.predict(data)
    train_actual = extract_column(data, idy)
    valid_predictions = classifier.predict(valid)
    valid_actual = extract_column(valid, idy)

    print("Decision Tree training Accuracies:       ",
          error_rate(train_predictions, train_actual))
    print("Decision Tree Validation Accuracies:    ",
          error_rate(valid_predictions, valid_actual))

    classifier = RandomForest(300, 300, 2, type_map, categories_map, 20)
    classifier.fit(data, 10, 10)
    train_predictions = classifier.predict(data)
    train_actual = extract_column(data, idy)
    valid_predictions = classifier.predict(valid)
    valid_actual = extract_column(valid, idy)

    print("Random Forest training Accuracies:       ",
          error_rate(train_predictions, train_actual))
    print("Random Forest Validation Accuracies:    ",
          error_rate(valid_predictions, valid_actual))
    def fit(self, X, Y, learning_rate=10e-7, reg=0, epochs=120000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        self.W = np.random.randn(D) / np.sqrt(D)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            pY = self.forward(X)

            #gradient descent step
            self.W -= learning_rate*(np.dot(X.T, (pY - Y)) + reg*self.W)
            self.b -= learning_rate*((pY - Y).sum() + reg*self.b)

            if i % 20 == 0:
                pYvalid = self.forward(Xvalid)
                c = sigmoid_cost(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print "Epoch: {}".format(i)
                if e < best_validation_error:
                    best_validation_error = e
        print "best validation error: {}".format(best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()
示例#3
0
def main():

    df, X, y = preprocess_data()
    X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8)
    logistic_regressor = LogisticRegressor(alpha=0.05,
                                           c=0.01,
                                           T=1000,
                                           random_seed=0,
                                           intercept=True)
    losses = logistic_regressor.fit(X_train, y_train)
    plot_losses(losses=losses, savefig=True)

    train_error = error_rate(y_train, logistic_regressor.predict(X_train))
    test_error = error_rate(y_test, logistic_regressor.predict(X_test))

    print('Training Error Rate: %f' % train_error)
    print('Test Error Rate: %f' % test_error)
示例#4
0
def main():

    X, T = get_facialexpression(balance_ones=True)
    # X, T  = np.shuffle(X,T)
    label_map = [
        'Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'
    ]
    # klass =3  error_rate=0.0
    # klass =4  error_rate=0.0
    # klass =5  error_rate=0.0
    # klass =0
    klass = 4
    N, D = X.shape
    X = np.concatenate(
        (np.ones((N, 1)), X),
        axis=1,
    )
    T = T.astype(np.int32)
    X = X.astype(np.float32)
    #Fix for forecasting on one image
    T = class1detect(T, detect=klass)

    D += 1

    # params
    lr = 5e-7
    max_iteration = 150
    W = np.random.randn(D) / np.sqrt(D)
    cost = []
    error = []
    for i in xrange(max_iteration):
        Y = forward(W, X)
        cost.append(cross_entropy(T, Y))
        error.append(error_rate(T, Y))

        W += lr * X.T.dot(T - Y)

        if i % 5 == 0:
            print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1])

    if i % 5 == 0:
        print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1])

    print "Final weight:", W
    print T
    print np.round(Y)

    plt.title('logistic regression ' + label_map[klass])
    plt.xlabel('iterations')
    plt.ylabel('cross entropy')
    plt.plot(cost)
    plt.show()

    plt.title('logistic regression ' + label_map[klass])
    plt.xlabel('iterations')
    plt.ylabel('error rate')
    plt.plot(error)
    plt.show()
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=0,
            epochs=12000,
            show_figure=False):
        X, Y = shuffle(X, Y)

        Xvalid, Yvalid = X[-1000:, :], Y[-1000:]
        X, Y = X[:-1000, :], Y[:-1000]

        K = len(set(Y))
        N, D = X.shape

        Yind_valid = np.zeros((1000, K), dtype=np.int32)
        Yind = np.zeros((N, K), dtype=np.int32)
        Yind_valid[np.arange(1000), Yvalid] = 1
        Yind[np.arange(N), Y] = 1

        self.W = np.random.randn(D, K) / np.sqrt(D + K)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in xrange(epochs):
            for j in xrange(N):
                xj = X[j, :].T
                yj = Y[j]

                yp = np.argmax((self.W.T).dot(xj), axis=0)

                # gradient descent step
                self.W[:, yj] += (xj + reg * self.W[:, yj])
                self.W[:, yp] -= (xj + reg * self.W[:, yp])
                # self.b -= learning_rate *((pY-Y).sum() 	+ reg*self.b)

                if i % 20 == 0:
                    import code
                    code.interact(local=dict(globals(), **locals()))
                    pYvalid = self.forward(Xvalid)
                    # c = sigmoid_cost(Yvalid, pYvalid)
                    c = cross_entropy(Yind_valid, pYvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, pYvalid)
                    sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" %
                                     (format(i, '04d'), c, e))
                    sys.stdout.flush()
                    # print "i", i, "cost:", c, "error", e
                    if e < best_validation_error:
                        best_validation_error = e
        print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(costs)
            plt.show()
示例#6
0
    def _train_store_prediction(self, sess, batch_x, batch_y, name, prediction_path):
        loss, prediction = sess.run([self.loss, self.predicter], feed_dict={self.x: batch_x,
                                                                            self.y: batch_y})

        logging.info("Verification error= {:.1f}%, loss= {:.4f}".format(utils.error_rate(prediction, batch_y),
                                                                        loss))

        img = utils.combine_img_prediction(batch_x, batch_y, prediction)
        utils.save_image(img, "%s/%s.jpg" % (prediction_path, name))
        return
示例#7
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            show_fig=False):

        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]

        T = one_hot_encoder(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for epoch in range(epochs):
            Y_hat, Z = self.forward(X)

            # Weight updates ----------------------
            Y_hat_T = Y_hat - T
            self.W2 -= learning_rate * (Z.T.dot(Y_hat_T) + reg * self.W2)
            self.b2 -= learning_rate * (Y_hat_T.sum() + reg * self.b2)

            val = Y_hat_T.dot(self.W2.T) * (1 - Z * Z)  #tanh
            self.W1 -= learning_rate * (X.T.dot(val) + reg * self.W1)
            self.b1 -= learning_rate * (val.sum() + reg * self.b1)
            # -------------------------------------

            if epoch % 10 == 0:
                Y_hat_valid, _ = self.forward(X_valid)
                c = cross_entropy(T_valid, Y_hat_valid)
                costs.append(c)
                e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1))
                print("epoch:", epoch, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.title('Validation cost')

        print("Final train classification_rate:",
              self.score(Y, self.predict(Y_hat)))
示例#8
0
def weather_predictor(df, do_print=False):
    """
    This function creates a DecisionTreeClassifier that predicts the weather
    tag using the data from the trail dataset, if do_print is True, will
    summary results
    """
    df = df.dropna()
    X = df.loc[:, 'Total':'DAY_OF_WEEK']  # (df.columns != 'weather')] #
    y = df['weather']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    y_train = y_train.to_numpy(dtype=str)
    y_test = y_test.to_numpy(dtype=str)
    if do_print:
        print('weather_predictor')
        print('training set error rate: ' +
              str(100 * utils.error_rate(y_train, y_pred_train)) + '%')
        print('test set error rate: ' +
              str(100 * utils.error_rate(y_test, y_pred_test)) + '%')
    def fit(self,
            X,
            Y,
            learning_rate=10e-8,
            reg=10e-8,
            epochs=10000,
            show_figure=False):

        X, Y = shuffle(X, Y)
        K = len(set(Y))
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        Tvalid = y2indicator(Yvalid, K)
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape

        T = y2indicator(Y, K)
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        self.b1 = np.zeros(self.M)

        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in xrange(epochs):
            pY, Z = self.forward(X)
            # gradient descent step
            self.W2 -= learning_rate * (Z.T.dot(pY - T) + reg * self.W2)
            self.b2 -= learning_rate * ((pY - T).sum(axis=0) + reg * self.b2)

            self.W1 -= learning_rate * (X.T.dot(
                (pY - T).dot(self.W2.T) * Z * (1 - Z)) + reg * self.W1)
            self.b1 -= learning_rate * (((pY - T).dot(self.W2.T) * Z *
                                         (1 - Z)).sum(axis=0) + reg * self.b1)

            if i % 10 == 0:
                pYvalid, Zvalid = self.forward(Xvalid)

                c = cost(Tvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))

                print "i", i, "cost:", c, "error", e
                if e < best_validation_error:
                    best_validation_error = e
        print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(costs)
            plt.show()
示例#10
0
    def fit(self,
            X,
            Y,
            learning_rate=5 * 10e-7,
            reg=1.0,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M) / np.sqrt(self.M)
        self.b2 = 0

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation
            pY, Z = self.forward(X)

            # gradient descent step

            pY_Y = pY - Y
            self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2)
            self.b2 -= learning_rate * ((pY_Y).sum() + reg * self.b2)

            # relu
            #dZ =  np.outer(pY_Y, self.W2) * (Z>0)

            # tanh
            dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z)
            self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1)

            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = sigmoid_cost(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print(f'i: {i}    cost: {c}      error: {e}')
                if e < best_validation_error:
                    best_validation_error = e
        print(f'Best validation error : {best_validation_error}')
        print(f'Score is : {self.score(Xvalid,Yvalid)}')
        if show_fig:
            plt.plot(costs)
            plt.show()
示例#11
0
    def fit(self,
            X,
            Y,
            learning_rate=10e-6,
            reg=10e-7,
            epochs=1000,
            show_figure=False):
        X, Y = shuffle(X, Y)
        x_valid = X[-10:]
        y_valid = Y[-10:]
        t_valid = utils.y2indicator(y_valid)

        x = X[:-10]
        y = Y[:-10]
        t = utils.y2indicator(y)

        N, D = x.shape
        K = len(set(y))

        self.W1 = np.random.randn(D, self.M)
        self.b1 = np.random.randn(self.M)

        self.W2 = np.random.randn(self.M, K)
        self.b2 = np.random.randn(K)

        costs = []

        for i in range(epochs):
            pY, Z = self.forward(x)

            #Updating Weights
            D = pY - t
            self.W2 -= learning_rate * (Z.T.dot(D) + reg * self.W2)
            self.b2 -= learning_rate * (D.sum() + reg * self.b2)

            dZ = D.dot(self.W2.T) * Z * (1 - Z)
            self.W1 -= learning_rate * (x.T.dot(dZ) + reg * self.W1)
            self.b1 -= learning_rate * (dZ.sum() + reg * self.b1)

            if i % 10 == 0:
                pY_valid, _ = self.forward(x_valid)
                c = utils.cost(t_valid, pY_valid)
                costs.append(c)
                e = utils.error_rate(y_valid, np.argmax(pY_valid, axis=1))
                print("i:", i, " cost: ", c, " error: ", e)

        if show_figure:
            plt.plot(costs)
            plt.show()
def kaggle():
    data, test_data, feature_names, class_names = load_titanic_data()
    data = preprocess_titanic(data, True)
    test = preprocess_titanic(test_data, False)

    type_map, categories_map = gen_maps(data)
    classifier = DecisionTree(type_map, categories_map)

    classifier.fit(data, 4, 10)
    predictions = classifier.predict(test)
    pred_train = classifier.predict(data)
    actual = extract_column(data, 9)
    print(error_rate(pred_train, actual))
    results_to_csv(predictions.flatten())
    """
示例#13
0
    def fit(self, X, y, plot_cost=False):
        X_train, Y_train, X_test, Y_test = get_train_test(X,
                                                          y,
                                                          percent_train=0.7)
        n, d = X_train.shape
        k = Y_train.shape[1]

        self.W1, self.b1 = init_weight_bias(d, self.hidden_layer_sizes[0])
        self.W2, self.b2 = init_weight_bias(self.hidden_layer_sizes[0], k)
        costs = []
        best_validation_error = 1

        if (self.batch_size == 'auto'):
            self.batch_size = min(200, n)

        num_batches = int(n / self.batch_size)

        for i in range(self.max_iter):
            X_temp, Y_temp = shuffle(X_train, Y_train)
            for j in range(num_batches):
                X_temp, Y_temp = X_train[
                    j * self.batch_size:j * self.batch_size +
                    self.batch_size, :], Y_train[j * self.batch_size:j *
                                                 self.batch_size +
                                                 self.batch_size, :]
                Ypred, Z1 = self.forward(X_temp)

                pY_t = Ypred - Y_temp
                self.W2 -= self.learning_rate_init * (Z1.T.dot(pY_t))
                self.b2 -= self.learning_rate_init * (pY_t.sum(axis=0))
                dZ = pY_t.dot(self.W2.T) * (Z1 > 0)
                self.W1 -= self.learning_rate_init * X_temp.T.dot(dZ)
                self.b1 -= self.learning_rate_init * dZ.sum(axis=0)

            if (i % 2) == 0:
                pY_test, _ = self.forward(X_test)
                c = cost(Y_test, pY_test)
                costs.append(c)
                e = error_rate(Y_test.argmax(axis=1), pY_test.argmax(axis=1))
                print('Iteration', i, 'Cost:', c, 'Error Rate:', e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if plot_cost:
            plt.plot(costs)
            plt.show()
示例#14
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            show_fig=False):

        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]

        T = one_hot_encoder(Y)

        self.W = np.random.randn(D, K) / np.sqrt(D)
        self.b = np.zeros(K)

        costs = []
        best_validation_error = 1
        for epoch in range(epochs):
            Y_hat = self.forward(X)

            self.W -= learning_rate * (self.dJ_dw(T, Y_hat, X) + reg * self.W)
            self.b -= learning_rate * (self.dJ_db(T, Y_hat) + reg * self.b)

            if epoch % 100 == 0:
                Y_hat_valid = self.forward(X_valid)
                c = cross_entropy(T_valid, Y_hat_valid)
                costs.append(c)
                e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1))
                print("epoch:", epoch, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.title('Validation cost')
            plt.show()
        print("Final train classification_rate:", self.score(X, Y))
def main():
	user_action=3
	X, T  = get_ecommerce(user_action=user_action)
	# X, T  = np.shuffle(X,T)

	N, D  = X.shape 
	X 		= np.concatenate((np.ones((N,1)), X), axis=1, ) 
	T = T.astype(np.int32)
	X = X.astype(np.float32)
	D+=1

	# params
	lr = 5e-4
	max_iteration=1000
	W  		= np.random.randn(D) / np.sqrt(D)
	cost 	= []
	error = [] 
	for i in xrange(max_iteration):
		Y = forward(W, X)
		cost.append(cross_entropy(T,Y))
		error.append(error_rate(T,Y))

		W += lr*X.T.dot(T-Y)

		if i % 5 == 0:
			print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1])

	if i % 5 == 0:
			print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1])
					
	print "Final weight:", W 	

	plt.title('logistic regression user_action=%d' % (user_action))
	plt.xlabel('iterations')
	plt.ylabel('cross entropy')
	plt.plot(cost)
	plt.show()

	plt.title('logistic regression user_action=%d' % (user_action))
	plt.xlabel('iterations')
	plt.ylabel('error rate')
	plt.plot(error)
	plt.show()
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=0,
            epochs=12000,
            show_figure=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:, :], Y[-1000:]
        X, Y = X[:-1000, :], Y[:-1000]

        N, D = X.shape
        self.W = np.random.randn(D) / np.sqrt(D)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in xrange(epochs):
            pY = self.forward(X)
            # gradient descent step
            self.W -= learning_rate * (X.T.dot(pY - Y) + reg * self.W)
            self.b -= learning_rate * ((pY - Y).sum() + reg * self.b)

            if i % 20 == 0:
                pYvalid = self.forward(Xvalid)
                # c = sigmoid_cost(Yvalid, pYvalid)
                c = cross_entropy(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, pYvalid)
                sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" %
                                 (format(i, '04d'), c, e))
                sys.stdout.flush()
                # print "i", i, "cost:", c, "error", e
                if e < best_validation_error:
                    best_validation_error = e
        print "best_validation_error:", best_validation_error

        if show_figure:
            plt.plot(costs)
            plt.show()
def main():
    #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv'
    file_loc = 'D:/dev/data/face_emotion_recognizer/fer2013.csv'
    X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc)
    
    pca = PCA(n_components=400)
    pca.fit(X_train)
    X_train = pca.transform(X_train)
    X_test = pca.transform(X_test)
    T_train = one_hot_encoder(Y_train)
    T_test = one_hot_encoder(Y_test)

    D = X_train.shape[1] # number of features
    K = len(set(Y_train)) # number of classes
    decay_rate = 0.999
    eps = 1e-10
    epochs = 100
    n_batches = 10
    batch_size = X_train.shape[0]//n_batches
    print_time = n_batches
    M = 300
    learning_rate=1e-6
    reg=1e-8
    

    W1_init = np.random.randn(D, M) / np.sqrt(D)
    b1_init = np.zeros(M)
    W2_init = np.random.randn(M, K) / np.sqrt(M)
    b2_init = np.zeros(K)
    

    thX = th.matrix('X')
    thT = th.matrix('Y')
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')
    cache_W1 = theano.shared(1, 'cache_w1')
    cache_b1 = theano.shared(1, 'cache_b1')
    cache_W2 = theano.shared(1, 'cache_w2')
    cache_b2 = theano.shared(1, 'cache_b2')


    # forward model
    thZ = th.nnet.relu(thX.dot(W1) + b1)
    #thZ[thZ < 0] = 0
    # Z = np.tanh(X.dot(self.W1) + self.b1)
    thY = th.nnet.softmax(thZ.dot(W2) + b2)

    # Cost
    cost = -((thT*th.log(thY)).sum() + reg*((W1*W1).sum() + (b1*b1).sum() + (W2*W2).sum() + (b2*b2).sum()))

    # Prediction
    prediction = th.argmax(thY, axis=1)

    # Updates
    dJ_dW1 = th.grad(cost, W1)
    dJ_db1 = th.grad(cost, b1)
    dJ_dW2 = th.grad(cost, W2)
    dJ_db2 = th.grad(cost, b2)

    cache_W1 = decay_rate*cache_W1 + (1-decay_rate)*dJ_dW1*dJ_dW1
    cache_b1 = decay_rate*cache_b1 + (1-decay_rate)*dJ_db1*dJ_db1
    cache_W2 = decay_rate*cache_W2 + (1-decay_rate)*dJ_dW2*dJ_dW2
    cache_b2 = decay_rate*cache_b2 + (1-decay_rate)*dJ_db2*dJ_db2
    
    update_W1 = W1 - learning_rate*dJ_dW1/(np.sqrt(cache_W1)+eps)
    update_b1 = b1 - learning_rate*dJ_db1/(np.sqrt(cache_b1)+eps)
    update_W2 = W2 - learning_rate*dJ_dW2/(np.sqrt(cache_W2)+eps)
    update_b2 = b2 - learning_rate*dJ_db2/(np.sqrt(cache_b2)+eps)

    train = theano.function(inputs=[thX, thT], updates=[(W1, update_W1), (b1, update_b1), (W2, update_W2), (b2, update_b2)])#

    get_prediction = theano.function(inputs=[thX, thT], outputs=[cost, prediction])
    
    costs = []
    for epoch in range(epochs):
        X_shuffled, T_shuffled = shuffle(X_train, T_train)
        for batch in range(n_batches):
            # Get the batch
            X_batch = X_shuffled[batch*batch_size:(batch+1)*batch_size,:]
            Y_batch = T_shuffled[batch*batch_size:(batch+1)*batch_size,:]

            train(X_batch, Y_batch)
            
            if batch % print_time == 0:
                c, pred = get_prediction(X_test, T_test)
                err = error_rate(Y_test, pred)
                print("epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" %(epoch, batch, c, err))
                costs.append(c)

    plt.plot(costs)
    plt.title('Validation cost')
    plt.show()
示例#18
0
    def fit(self,
            Xin,
            Yin,
            learning_rate=10e-7,
            reg=10e-8,
            epochs=10000,
            show_figure=False):
        Nvalid = 500
        N, D = Xin.shape
        K = len(np.unique(Yin))
        Xin, Yin = shuffle(Xin, Yin)

        Xtrain, Ytrain = Xin[-Nvalid:, :], Yin[-Nvalid:, ]
        Xvalid, Yvalid = Xin[:-Nvalid, :], Yin[:-Nvalid, ]
        Ttrain, Tvalid = y2indicator(Ytrain, K), y2indicator(Yvalid, K)

        #Initialize Wi,bi
        W1_init = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        b1_init = np.random.randn(self.M) / np.sqrt(self.M)
        W2_init = np.random.randn(self.M, K) / np.sqrt(K + self.M)
        b2_init = np.random.randn(K) / np.sqrt(K)

        #Theano shared
        W1 = theano.shared(W1_init, 'W1')
        b1 = theano.shared(b1_init, 'b1')
        W2 = theano.shared(W2_init, 'W2')
        b2 = theano.shared(b2_init, 'b2')

        #Theano variables
        thX = T.matrix('X')
        thT = T.matrix('T')
        thZ = sigmoid(thX.dot(W1) + b1)
        thY = T.nnet.softmax(thZ.dot(W2) + b2)

        #Theano updatebles
        costs = -(thT * np.log(thY) + (1 - thT) * np.log((1 - thY))).sum()
        prediction = T.argmax(thY, axis=1)

        W1_update = W1 - learning_rate * (T.grad(costs, W1) + reg * W1)
        b1_update = b1 - learning_rate * (T.grad(costs, b1) + reg * b1)

        W2_update = W2 - learning_rate * (T.grad(costs, W2) + reg * W2)
        b2_update = b2 - learning_rate * (T.grad(costs, b2) + reg * b2)

        self._train = theano.function(
            inputs=[thX, thT],
            updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update),
                     (b2, b2_update)],
        )

        self._predict = theano.function(
            inputs=[thX, thT],
            outputs=[costs, prediction],
        )

        train_costs = []
        train_errors = []
        valid_costs = []
        valid_errors = []

        for i in xrange(epochs):
            self._train(Xtrain, Ttrain)
            if i % 10 == 0:
                ctrain, pYtrain = self._predict(Xtrain, Ttrain)
                err = error_rate(Ttrain, pYtrain)
                train_costs.append(ctrain)
                train_errors.append(err)

                cvalid, pYvalid = self._predict(Xvalid, Tvalid)
                err = error_rate(Tvalid, pYvalid)
                valid_costs.append(cvalid)
                valid_errors.append(err)
                print "i=%d\tc=%.3f\terr==%.3f\t" % (i, cvalid, err)

        cvalid, pYvalid = self._predict(Xvalid, Tvalid)
        err = error_rate(Tvalid, pYvalid)
        valid_costs.append(cvalid)
        valid_errors.append(err)

        print "i=%d\tc=%.3f\terr==%.3f\t" % (epochs, cvalid, err)

        print "Final train classification rate", classification_rate(
            Ytrain, pYtrain)
        print "Final valid classification rate", classification_rate(
            Yalid, pYalid)

        plt.title('Multi layer perceptron: Costs')
        plt.xlabel('iterations')
        plt.ylabel('costs')
        legend1, = plt.plot(train_costs, label='train cost')
        legend2, = plt.plot(valid_costs, label='valid cost')
        plt.legend([
            legend1,
            legend2,
        ])
        plt.show()

        plt.title('Multi layer perceptron: Error rates')
        plt.xlabel('iterations')
        plt.ylabel('error rates')
        legend1, = plt.plot(train_errors, label='train error')
        legend2, = plt.plot(valid_errors, label='valid error')
        plt.legend([
            legend1,
            legend2,
        ])
        plt.show()
示例#19
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            n_batches=10,
            show_fig=False):

        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]

        batch_size = X.shape[0] // n_batches

        T = one_hot_encoder(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        # 1st moment
        mW1 = 0
        mb1 = 0
        mW2 = 0
        mb2 = 0

        # 2nd moment
        vW1 = 0
        vb1 = 0
        vW2 = 0
        vb2 = 0

        # hyperparams
        beta1 = 0.9
        beta2 = 0.999
        eps = 1e-8

        costs = []
        t = 1
        for epoch in range(epochs):
            X_shuffled, T_shuffled = shuffle(X, T)
            for ibatch in range(n_batches):
                # Get the batch
                X_batch = X_shuffled[ibatch * batch_size:(ibatch + 1) *
                                     batch_size, :]
                Y_batch = T_shuffled[ibatch * batch_size:(ibatch + 1) *
                                     batch_size, :]

                Y_hat, Z = self.forward(X_batch)

                # Weight updates ----------------------
                Y_hat_T = Y_hat - Y_batch
                dJ_dW2 = Z.T.dot(Y_hat_T) + reg * self.W2
                dJ_db2 = Y_hat_T.sum() + reg * self.b2

                val = (Y_hat - Y_batch).dot(self.W2.T) * (Z > 0)  # Relu
                #val = Y_hat_T.dot(self.W2.T) * (1-Z*Z) # tanh
                dJ_dW1 = X_batch.T.dot(val) + reg * self.W1
                dJ_db1 = val.sum() + reg * self.b1

                # Mean
                mW2 = beta1 * mW2 + (1 - beta1) * dJ_dW2
                mb2 = beta1 * mb2 + (1 - beta1) * dJ_db2
                mW1 = beta1 * mW1 + (1 - beta1) * dJ_dW1
                mb1 = beta1 * mb1 + (1 - beta1) * dJ_db1

                # Velocity terms
                vW2 = beta2 * vW2 + (1 - beta2) * dJ_dW2 * dJ_dW2
                vb2 = beta2 * vb2 + (1 - beta2) * dJ_db2 * dJ_db2
                vW1 = beta2 * vW1 + (1 - beta2) * dJ_dW1 * dJ_dW1
                vb1 = beta2 * vb1 + (1 - beta2) * dJ_db1 * dJ_db1

                correction1 = 1 - beta1**t
                hat_mW2 = mW2 / correction1
                hat_mb2 = mb2 / correction1
                hat_mW1 = mW1 / correction1
                hat_mb1 = mb1 / correction1

                correction2 = 1 - beta2**t
                hat_vW2 = vW2 / correction2
                hat_vb2 = vb2 / correction2
                hat_vW1 = vW1 / correction2
                hat_vb1 = vb1 / correction2

                self.W2 -= learning_rate * hat_mW2 / (np.sqrt(hat_vW2) + eps)
                self.b2 -= learning_rate * hat_mb2 / (np.sqrt(hat_vb2) + eps)
                self.W1 -= learning_rate * hat_mW1 / (np.sqrt(hat_vW1) + eps)
                self.b1 -= learning_rate * hat_mb1 / (np.sqrt(hat_vb1) + eps)
                # -------------------------------------

                Y_hat_valid, _ = self.forward(X_valid)
                c = cross_entropy(T_valid, Y_hat_valid)
                costs.append(c)

                if ibatch % (n_batches) == 0:
                    e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1))
                    print("epoch:", epoch, " cost:", c, " error:", e)

                t += 1

        if show_fig:
            plt.plot(costs)
            plt.title('Validation cost')
            plt.show()

        print("Final train classification_rate:", self.score(X, Y))
示例#20
0
    def fit(self,
            X,
            Y,
            activation=tf.nn.relu,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            n_batches=10,
            decay_rate=0.9,
            show_fig=False):
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]
        T = one_hot_encoder(Y)

        eps = 1e-10
        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes
        batch_size = X.shape[0] // n_batches
        print_time = n_batches // 1

        M1 = D
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, activation_fn=activation)
            self.layers.append(h)
            M1 = M2

        # the final layer
        h = HiddenLayer(M1, K, activation_fn=tf.nn.softmax)
        self.layers.append(h)

        for layer in self.layers:
            self.params += layer.params

        tfX = tf.placeholder(tf.float32, shape=(None, D), name='tfX')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='tfT')
        tfY = self.forward(tfX)

        predict_op = tf.argmax(tfY, axis=1)

        cost = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=tfY, labels=tfT))
        train_op = tf.train.RMSPropOptimizer(learning_rate,
                                             decay=0.99,
                                             momentum=0.9).minimize(cost)

        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for epoch in range(epochs):
                X_shuffled, T_shuffled = shuffle(X, T)
                for batch in range(n_batches):
                    # Get the batch
                    X_batch = X_shuffled[batch * batch_size:(batch + 1) *
                                         batch_size, :]
                    Y_batch = T_shuffled[batch * batch_size:(batch + 1) *
                                         batch_size, :]

                    session.run(train_op,
                                feed_dict={
                                    tfX: X_batch,
                                    tfT: Y_batch
                                })

                    if batch % print_time == 0:
                        test_cost = session.run(cost,
                                                feed_dict={
                                                    tfX: X_valid,
                                                    tfT: T_valid
                                                })
                        prediction = session.run(predict_op,
                                                 feed_dict={tfX: X_valid})
                        err = error_rate(Y_valid, prediction)
                        # print(prediction.shape)
                        print(
                            "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]"
                            % (epoch, batch, test_cost, err))
                        costs.append(test_cost)

        plt.plot(costs)
        plt.title('Validation cost')
        plt.show()
	def fit(self, X, Y, learning_rate=10e-4, reg=10e-8, epochs=10000, show_figure=False):
		Nvalid = 1000
		N, D  = X.shape 
		K =  len(np.unique(Y))
		X, Y  = shuffle(X, Y)

		Xvalid, Yvalid = X[-Nvalid:,:],  Y[-Nvalid:,]
		X, Y = X[:-Nvalid,:], Y[:-Nvalid,]


		#Initialize Hidden layers 
		self.hidden_layers = [] 
		M1 = D 		
		for count, M2 in enumerate(self.hidden_layer_sizes):
			hidden_layer =  HiddenLayer(M1, M2, count)
			self.hidden_layers.append(hidden_layer)
			M1=M2

		#final layer
		W, b = init_weight_and_bias(M1, K)  
		self.W = theano.shared(W, 'W_logreg')
		self.b = theano.shared(b, 'b_logreg')

		#collect parameters for later use
		self.params = []
		for h in self.hidden_layers: 
			self.params += h.params
		self.params += [self.W, self.b]
		
		
		#Theano variables 
		thX = T.fmatrix('X')
		thY = T.ivector('Y')		
		pY =self.th_forward(thX)

		costs = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY]))
		prediction = self.th_predict(thX)


		#actual prediction functions and variabels
		self.predict_op=theano.function(inputs=[thX], outputs=prediction)
		cost_predict_op=theano.function(inputs=[thX, thY], outputs=[costs, prediction])

		#Streamline initializations
		updates = [
			(p, p - learning_rate*(T.grad(costs,p) + reg*p)) for p  in self.params
		]
		
		train_op = theano.function(
			inputs=[thX, thY],
			updates=updates,
			allow_input_downcast=True,
		)

		batch_sz=200
		n_batches = N / batch_sz
		costs = [] 
		for i in xrange(epochs):
			X,Y = shuffle(X,Y)
			for j in range(n_batches):
				Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz),:]
				Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

				train_op(Xbatch.astype(np.float32), Ybatch.astype(np.int32))
				
				if j % 100 == 0:
					c, p = cost_predict_op(Xvalid.astype(np.float32), Yvalid.astype(np.int32))
					costs.append(c)
					err = error_rate(Yvalid, p)

					print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,j,n_batches,c,err)
			print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,batch_sz,n_batches,c,err)		
		
		print "Final error rate", err 
			
		if show_fig: 
			plt.plot(costs)
			plt.show() 
示例#22
0
def main():
    #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv'
    file_loc = 'D:/dev/data/mnist/train.csv'
    X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc,
                                                split_train_test=True)

    pca = PCA(n_components=400)
    pca.fit(X_train)
    X_train = pca.transform(X_train)
    #Y = Y_train
    T_train = one_hot_encoder(Y_train)
    X_test = pca.transform(X_test)
    T_test = one_hot_encoder(Y_test)

    #######################################################

    D = X_train.shape[1]  # number of features
    K = len(set(Y_train))  # number of classes
    M = 300
    reg = 0.00001
    batch_size = 500
    n_batches = X_train.shape[0] // batch_size
    learning_rate = 0.00004
    epochs = 10

    W1_init = np.random.randn(D, M) / np.sqrt(D)
    b1_init = np.zeros(M)
    W2_init = np.random.randn(M, K) / np.sqrt(M)
    b2_init = np.zeros(K)

    # Define all variables
    X = tf.placeholder(tf.float32, shape=(None, D), name='X')
    T = tf.placeholder(tf.float32, shape=(None, K), name='Y')
    W1 = tf.Variable(W1_init.astype(np.float32))
    b1 = tf.Variable(b1_init.astype(np.float32))
    W2 = tf.Variable(W2_init.astype(np.float32))
    b2 = tf.Variable(b2_init.astype(np.float32))

    # Model definition
    Z = tf.nn.relu(tf.matmul(X, W1) + b1)
    Y_hat = tf.matmul(Z, W2) + b2

    # Cost
    cost = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y_hat, labels=T))

    # Optimization
    train = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                      decay=0.99,
                                      momentum=0.9).minimize(cost)

    # Predictions
    predic_op = tf.argmax(Y_hat, axis=1)

    costs = []
    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)
        for epoch in range(epochs):
            X_shuffled, T_shuffled = shuffle(X_train, T_train)
            for batch in range(n_batches):
                # Get the batch
                X_batch = X_shuffled[batch * batch_size:(batch + 1) *
                                     batch_size, :]
                Y_batch = T_shuffled[batch * batch_size:(batch + 1) *
                                     batch_size, :]

                session.run(train, feed_dict={X: X_batch, T: Y_batch})

                if batch % 10 == 0:
                    c = session.run(cost, feed_dict={X: X_test, T: T_test})
                    Y_test_predictions = session.run(predic_op,
                                                     feed_dict={X: X_test})
                    err = error_rate(Y_test, Y_test_predictions)
                    print(
                        "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" %
                        (epoch, batch, c, err))
                    costs.append(c)

    plt.plot(costs)
    plt.title('Validation cost')
    plt.show()
示例#23
0
 def score(self, X, Y):
     prediction = self.predict(X)
     return np.round(1 - error_rate(Y, prediction), 4)
示例#24
0
# =======================================================

MAX_DEPTH = 5

rf_source = skl_ens.RandomForestClassifier(n_estimators=NB_TREE,
                                           max_depth=MAX_DEPTH,
                                           oob_score=True)
rf_target = skl_ens.RandomForestClassifier(n_estimators=NB_TREE,
                                           max_depth=MAX_DEPTH,
                                           oob_score=True,
                                           class_weight=None)

rf_source.fit(X_source, y_source)
rf_source_score_target = rf_source.score(X_target_095, y_target_095)
print("Error rate de rf_source sur data target : ",
      error_rate(rf_source_score_target))

rf_target.fit(X_target_005, y_target_005)
rf_target_score_target = rf_target.score(X_target_095, y_target_095)
print("Error rate de rf_target(5%) sur data target(95%) : ",
      error_rate(rf_target_score_target))

#for i in range(SIZE_TEST):
#    print('Test n°', i)
#
#    rf_source.fit(X_source, y_source)
#    rf_source_score_target = rf_source[i].score(X_target_095, y_target_095)
#    print("Error rate de rf_source sur data target : ",
#          error_rate(rf_source_score_target))
#
#
 def score(self, X, Y):
     prediction = self.predict(X)
     return 1 - error_rate(Y, prediction)
示例#26
0
 def _output_minibatch_stats(self, sess, summary_writer, step, batch_x, batch_y):
     # Calculate batch loss and accuracy
     summary_str, loss, acc, predictions = sess.run([self.summary_op,
                                                     self.loss,
                                                     self.accuracy,
                                                     self.predicter],
                                                    feed_dict={self.x: batch_x,
                                                               self.y: batch_y})
     summary_writer.add_summary(summary_str, step)
     summary_writer.flush()
     logging.info(
         "Iter {:}, Minibatch Loss= {:.4f}, Training Accuracy= {:.4f}, Minibatch error= {:.1f}%".format(step, loss,
                                                                                                        acc,
                                                                                                        utils.error_rate(
                                                                                                            predictions,
                                                                                                            batch_y)))
示例#27
0
def main():
    X, Y = get_ecommerce(user_action=None)
    X, Y = shuffle(X, Y)

    # Running variables
    learning_rate = 5e-4
    max_iterations = 10000

    # Define dimensions
    N, D = X.shape
    M = 5
    K = len(np.unique(Y))

    Ntrain = N - 100
    Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain]
    Ytrain_ind = y2indicator(Ytrain, K)

    Ntest = 100
    Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:]
    Ytest_ind = y2indicator(Ytest, K)

    W1_init = np.random.randn(D, M) / np.sqrt(M + D)
    b1_init = np.random.randn(M) / np.sqrt(M)

    W2_init = np.random.randn(M, K) / np.sqrt(M + K)
    b2_init = np.random.randn(K) / np.sqrt(K)

    #Define theano shared
    W1 = theano.shared(W1_init, 'W1')
    b1 = theano.shared(b1_init, 'b1')
    W2 = theano.shared(W2_init, 'W2')
    b2 = theano.shared(b2_init, 'b2')

    #Define constant tensor matrices
    thX = T.matrix('X')
    thT = T.matrix('T')

    #Define cost
    thZ = sigmoid(thX.dot(W1) + b1)
    thY = softmax(thZ.dot(W2) + b2)

    cost = -(thT * np.log(thY) + (1 - thT) * np.log(1 - thY)).sum()
    prediction = T.argmax(thY, axis=1)

    #Define updates
    W1_update = W1 - learning_rate * T.grad(cost, W1)
    b1_update = b1 - learning_rate * T.grad(cost, b1)
    W2_update = W2 - learning_rate * T.grad(cost, W2)
    b2_update = b2 - learning_rate * T.grad(cost, b2)

    train = theano.function(
        inputs=[thX, thT],
        updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update),
                 (b2, b2_update)],
    )
    predict = theano.function(
        inputs=[thX, thT],
        outputs=[cost, prediction],
    )

    LL = []
    train_errors = []
    test_errors = []
    train_costs = []
    test_costs = []
    for i in xrange(max_iterations):
        train(Xtrain, Ytrain_ind)
        if i % 10 == 0:
            c, pYtrain = predict(Xtrain, Ytrain_ind)
            err = error_rate(Ytrain, pYtrain)
            train_costs.append(c)
            train_errors.append(err)

            c, pYtest = predict(Xtest, Ytest_ind)
            err = error_rate(Ytest, pYtest)
            test_costs.append(c)
            test_errors.append(err)
            print "i=%d\tc=%.3f\terr==%.3f\t" % (i, c, err)

    print "i=%d\tc=%.3f\terr==%.3f\t" % (max_iterations, c, err)

    print "Final train classification rate", classification_rate(
        Ytrain, pYtrain)
    print "Final test  classification rate", classification_rate(Ytest, pYtest)

    plt.title('Multi layer perceptron: Costs')
    plt.xlabel('iterations')
    plt.ylabel('costs')
    legend1, = plt.plot(train_costs, label='train cost')
    legend2, = plt.plot(test_costs, label='test cost')
    plt.legend([
        legend1,
        legend2,
    ])
    plt.show()

    plt.title('Multi layer perceptron: Error rates')
    plt.xlabel('iterations')
    plt.ylabel('error rates')
    legend1, = plt.plot(train_errors, label='train error')
    legend2, = plt.plot(test_errors, label='test error')
    plt.legend([
        legend1,
        legend2,
    ])
    plt.show()
示例#28
0
def main():
    #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv'
    file_loc = 'D:/dev/data/mnist/train.csv'
    X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc,
                                                split_train_test=True)

    pca = PCA(n_components=400)
    pca.fit(X_train)
    X = pca.transform(X_train)
    Y = Y_train
    T = one_hot_encoder(Y)

    X_test = pca.transform(X_test)
    T_test = one_hot_encoder(Y_test)

    #######################################################

    D = X.shape[1]  # number of features
    K = len(set(Y))  # number of classes
    M = 300
    reg = 0.00001
    batch_size = 500
    n_batches = X.shape[0] // batch_size
    learning_rate = 0.0004
    epochs = 1000
    print_time = epochs // 10

    W_init = np.random.randn(D, K) / np.sqrt(D)
    b_init = np.zeros(K)

    thX = Th.matrix('X')
    thT = Th.matrix('T')
    W = theano.shared(W_init, 'W')
    b = theano.shared(b_init, 'b')

    # Forward model
    thY = Th.nnet.softmax(thX.dot(W) + b)

    # Cost
    cost = -((thT * Th.log(thY)).sum() + reg * ((W * W).sum() + (b * b).sum()))

    # Predictions
    prediction = Th.argmax(thY, axis=1)

    update_W = W - learning_rate * Th.grad(cost, W)
    update_b = b - learning_rate * Th.grad(cost, b)

    train = theano.function(inputs=[thX, thT],
                            updates=[(W, update_W), (b, update_b)])

    get_prediction = theano.function(inputs=[thX, thT],
                                     outputs=[cost, prediction])

    costs = []
    for epoch in range(epochs):
        X_shuffled, T_shuffled = shuffle(X, T)
        for batch in range(n_batches):
            # Get the batch
            X_batch = X_shuffled[batch * batch_size:(batch + 1) *
                                 batch_size, :]
            Y_batch = T_shuffled[batch * batch_size:(batch + 1) *
                                 batch_size, :]

            train(X_batch, Y_batch)

            if batch % print_time == 0:
                test_cost, prediction = get_prediction(X_test, T_test)
                err = error_rate(Y_test, prediction)
                print("epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" %
                      (epoch, batch, test_cost, err))
                costs.append(test_cost)

    plt.plot(costs)
    plt.title('Validation cost')
    plt.show()
示例#29
0
    def fit(self,
            X,
            Y,
            activation=th.nnet.relu,
            learning_rate=1e-8,
            reg=1e-12,
            epochs=10000,
            n_batches=10,
            decay_rate=0.9,
            show_fig=False):
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)

        X, Y = shuffle(X, Y)
        X_valid, Y_valid = X[-1000:], Y[-1000:]
        T_valid = one_hot_encoder(Y_valid)
        X, Y = X[:-1000], Y[:-1000]
        T = one_hot_encoder(Y)

        self.rng = theano.tensor.shared_randomstreams.RandomStreams()

        eps = 1e-10
        D = X.shape[1]  # number of features
        K = len(set(Y))  # number of classes
        batch_size = X.shape[0] // n_batches
        print_time = n_batches // 1

        M1 = D
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, activation_fn=activation)
            self.layers.append(h)
            M1 = M2

        # the final layer
        h = HiddenLayer(M1, K, activation_fn=th.nnet.softmax)
        self.layers.append(h)

        for layer in self.layers:
            self.params += layer.params

        dparams = [
            theano.shared(np.zeros_like(p.get_value())) for p in self.params
        ]
        cache = [
            theano.shared(np.zeros_like(p.get_value())) for p in self.params
        ]

        thX = th.matrix('X')
        thT = th.matrix('T')
        thY_train = self.forward_train(thX)

        # Cost
        regularization_cost = reg * th.mean([(p * p).sum()
                                             for p in self.params])
        #cost = -th.mean(th.log(thY[th.arange(thT.shape[0]), thT])) #+ regularization_cost
        cost_train = -th.mean(thT * th.log(thY_train)) + regularization_cost

        # Gradient
        grads = th.grad(cost_train, self.params)

        update_params = [(p, p - learning_rate *
                          (decay_rate * v + (1 - decay_rate) * g + reg * p))
                         for g, v, p in zip(grads, dparams, self.params)]
        update_velocity = [(v, decay_rate * v + (1 - decay_rate) * g)
                           for g, v in zip(grads, dparams)]
        # updates = [(p, p - learning_rate*g) for g, p in zip(grads, self.params)]
        updates = update_params + update_velocity

        train_op = theano.function(inputs=[thX, thT], updates=updates)

        thY_predict = self.forward_predict(thX)
        cost_predict = -th.mean(
            thT * th.log(thY_predict)) + regularization_cost

        # Predictions
        prediction = th.argmax(thY_predict, axis=1)

        cost_predict_op = theano.function(inputs=[thX, thT],
                                          outputs=[cost, prediction])

        costs = []
        for epoch in range(epochs):
            X_shuffled, T_shuffled = shuffle(X, T)
            for batch in range(n_batches):
                # Get the batch
                X_batch = X_shuffled[batch * batch_size:(batch + 1) *
                                     batch_size, :]
                Y_batch = T_shuffled[batch * batch_size:(batch + 1) *
                                     batch_size, :]

                train_op(X_batch, Y_batch)

                if batch % print_time == 0:
                    test_cost, prediction = cost_predict_op(X_valid, T_valid)
                    err = error_rate(Y_valid, prediction)
                    # print(prediction.shape)
                    print(
                        "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" %
                        (epoch, batch, test_cost, err))
                    costs.append(test_cost)

        plt.plot(costs)
        plt.title('Validation cost')
        plt.show()
	def fit(self, X, Y, learning_rate=10e-5, epochs=200, reg=10e-8, batch_sz=200, show_fig=False, activation=tf.tanh):
		X, Y = shuffle(X, Y)
		K = len(np.unique(Y))  

		T = y2indicator(Y, K).astype(np.float32)
		Xvalid, Yvalid, Tvalid = X[-1000:,], Y[-1000:], T[-1000:,:] 
		Xtrain, Ytrain, Ttrain = X[:-1000,:], Y[:-1000],T[:-1000,:] 

		N, D = Xtrain.shape
		

		#Varianel initialization
		W1, b1 = init_weight_and_bias(D,self.M)
		W2, b2 = init_weight_and_bias(self.M,K)



		self.W1 = tf.Variable(W1.astype(np.float32), 'W1')
		self.b1 = tf.Variable(b1.astype(np.float32), 'b1')
		self.W2 = tf.Variable(W2.astype(np.float32), 'W2')
		self.b2 = tf.Variable(b2.astype(np.float32), 'b2')
		self.params = [self.W1, self.b1, self.W2, self.b2] 
		# Define placeholders
		X = tf.placeholder(tf.float32,shape=(None,D),name='X')
		T = tf.placeholder(tf.float32,shape=(None,K),name='Y')

		
		

		Z = activation(tf.matmul(X, self.W1) + self.b1) 		
		Yish = tf.matmul(Z, self.W2) + self.b2 

		rcost  = reg*tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params])
		cost   = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=Yish) ) + rcost 
		
		
		train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
		self.predict_op = tf.argmax(Yish, 1)

		n_batches = N // batch_sz 
		costs=[] 
		errors=[] 
		init = tf.global_variables_initializer()
		with tf.Session() as session:
			session.run(init)

			for i in xrange(epochs):
				Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
				for j in xrange(n_batches): 
					Xbatch = Xtrain[j*batch_sz:(j+1)*batch_sz,:]
					Ybatch = Ytrain[j*batch_sz:(j+1)*batch_sz]
					Tbatch = Ttrain[j*batch_sz:(j+1)*batch_sz,:]

					session.run(train_op,
						feed_dict={
							X: Xbatch,
							T: Tbatch 
					})

					if j % 10 == 0: 
						c = session.run(cost, feed_dict={X:Xvalid, T:Tvalid} )
						pYvalid  = session.run( self.predict_op, feed_dict={X: Xvalid} )
						err = error_rate(Yvalid, pYvalid)
						print "i:%d\tj:%d\tc:%.3f\terr:%.3f\t" % (i,j,c,err)	
						costs.append(c)
						errors.append(err)

		if show_fig:
			plt.title('costs')
			plt.plot(costs)
			plt.show()

			plt.title('error rate')
			plt.plot(errors)
			plt.show()