def fit(self,
            Xtrain,
            ytrain,
            epoch=1000,
            learning_rate=0.01,
            L2_regulation=0.0):
        '''
        Build model
        :param Xtrain: a set of observations
        :param ytrain: labels
        :param epoch:
        :param learning_rate:
        :return:
        '''
        K = np.amax(ytrain) + 1
        Y = utils.convert2indicator(ytrain)
        N, D = Xtrain.shape
        self.W1, self.b1, self.W2, self.b2 = self.initializeWeights(
            K, D, self.M)

        l_cost = list()
        l_iterations = list()
        l_score = list()

        for i in range(0, epoch):
            # update weights
            print('Epoch ' + str(i))

            # compute score
            Yhat, Z = self.predict(Xtrain)
            yhat = np.argmax(Yhat, axis=1)
            yindex = np.argmax(Y, axis=1)
            score = np.mean(yhat == yindex)
            l_score.append(score)
            print('Score: ' + str(score))

            cost = self.cost(Yhat, Y)
            l_cost.append(cost)
            l_iterations.append(i)
            print('Cost: ' + str(cost))

            # full gradient descent
            # compute the gradient over the whole data
            startRange = 0
            endRange = N

            gradient_W2, gradient_b2 = self.updateW2andb2(
                self.W2, self.b2, Y, Yhat, Z, N, K, self.M, startRange,
                endRange)
            gradient_W1, gradient_b1 = self.updateW1andb1(
                self.W1, self.W2, self.b1, Xtrain, D, Y, Yhat, Z, N, K, self.M,
                startRange, endRange)

            self.W1 += learning_rate * (gradient_W1 + L2_regulation * self.W1)
            self.b1 += learning_rate * (gradient_b1 + L2_regulation * self.b1)
            self.W2 += learning_rate * (gradient_W2 + L2_regulation * self.W2)
            self.b2 += learning_rate * (gradient_b2 + L2_regulation * self.b2)
            print('\n')

        return l_cost, l_iterations, l_score
예제 #2
0
    def fit(self, Xtrain, ytrain, epoch=1000):
        K = np.amax(ytrain) + 1  # get the number of classes
        Y = utils.convert2indicator(ytrain)
        N, D = Xtrain.shape

        layers = self.initializeLayers(nFeatures=D,
                                       nClasses=K,
                                       hiddenLayersSize=self.hiddenLayersSize)

        # initialize placeholders
        tf_X = tf.placeholder(dtype=tf.float32, name='X', shape=(N, D))
        tf_Y = tf.placeholder(dtype=tf.float32, name='Y', shape=(N, K))

        # define symbolic formula
        tf_Yhat = self.forward(tf_X, layers)
        tf_cost = tf.math.reduce_sum(
            -1 * tf.multiply(tf_Y, tf.log(tf_Yhat)))  # cross-entropy
        tf_train = tf.train.GradientDescentOptimizer(
            learning_rate=0.001).minimize(tf_cost)
        tf_yhat = tf.math.argmax(tf_Yhat, axis=1)

        scores = []
        iterations = []

        with tf.Session() as session:
            session.run(tf.global_variables_initializer())

            for i in range(epoch):
                print('iteration ' + str(i))
                session.run(tf_train, feed_dict={tf_X: Xtrain, tf_Y: Y})

                yhat = session.run(tf_yhat, feed_dict={tf_X: Xtrain, tf_Y: Y})
                score = np.mean(yhat == ytrain)
                print('score: ' + str(score))

                iterations.append(i)
                scores.append(score)

                print()

        self.plot(scores, iterations)
예제 #3
0
    def fit(self, X, y, epoch=1000):
        K = np.amax(y) + 1
        Y = utils.convert2indicator(y)
        N, D = X.shape

        tf_X, tf_Y = self.initializePlaceholder()
        tf_W1, tf_b1, tf_W2, tf_b2 = self.initializeWeights(D, self.M, K)

        # define symbolic operations
        tf_Yhat = self.forward(tf_X, tf_W1, tf_b1, tf_W2, tf_b2)
        tf_cost = tf.math.reduce_sum(
            -1 * tf.multiply(tf_Y, tf.log(tf_Yhat)))  # cross-entropy
        tf_train = tf.train.GradientDescentOptimizer(
            learning_rate=0.001).minimize(tf_cost)
        tf_yhat = tf.math.argmax(tf_Yhat, axis=1)

        scores = []
        iterations = []

        with tf.Session() as session:
            session.run(tf.global_variables_initializer())

            for i in range(epoch):
                print('iteration ' + str(i))
                session.run(tf_train, feed_dict={tf_X: X, tf_Y: Y})

                cost = session.run(tf_cost, feed_dict={tf_X: X, tf_Y: Y})
                print("Cost = " + str(cost))

                yhat = session.run(tf_yhat, feed_dict={tf_X: X, tf_Y: Y})
                score = np.mean(yhat == y)
                print('Score = ' + str(score))

                iterations.append(i)
                scores.append(score)

                print()

        self.plot(scores, iterations)
예제 #4
0
    def fit(self,
            Xtrain,
            ytrain,
            learning_rate=0.001,
            epoch=20,
            batch_size=100):
        N = Xtrain.shape[0]
        self.layers = self.initializeLayers(self.D, self.K,
                                            self.hiddenLayersSize)

        # STEP 1: greedy layer-wise training of autoencoders
        input_autoencoder = Xtrain

        for layer in self.layers[:-1]:
            print('Pretraining layer = (' + str(layer.M1) + ', ' +
                  str(layer.M2) + ')')
            layer.fit(input_autoencoder)
            input_autoencoder = layer.Z

        # STEP 2
        print('Fit model')
        self.tf_X = tf.placeholder(dtype=tf.float64)
        tf_Y = tf.placeholder(dtype=tf.float64)

        Ytrain = utils.convert2indicator(ytrain)

        self.tf_Yhat = self.forward(self.tf_X)
        tf_cost = tf.math.reduce_sum(-tf_Y * tf.math.log(self.tf_Yhat))
        train_op = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(tf_cost)

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())

        iteration = 0
        iterations = []
        costs = []

        nBatches = np.int(np.round(N * 1.0 / batch_size - 0.5))

        for i in range(epoch):
            for j in range(nBatches + 1):
                iterations.append(iteration)

                # mini-batch gradient descent
                trainingCost = 0
                if j == nBatches:
                    _, trainingCost = self.session.run(
                        (train_op, tf_cost),
                        feed_dict={
                            self.tf_X: Xtrain[j * nBatches:N],
                            tf_Y: Ytrain[j * nBatches:N]
                        })
                else:
                    _, trainingCost = self.session.run(
                        (train_op, tf_cost),
                        feed_dict={
                            self.tf_X: Xtrain[j * nBatches:(j + 1) * nBatches],
                            tf_Y: Ytrain[j * nBatches:(j + 1) * nBatches]
                        })

                # just for testing
                costs.append(trainingCost)

                print("Training. Epoch " + str(i) + "/ Iteration " +
                      str(iteration) + "/ Training error = " +
                      str(trainingCost / len(Xtrain)))

                iteration += 1
    def fit(self, Xtrain, ytrain, strategy):
        '''
        Build model
        :param Xtrain: a set of observations
        :param ytrain: labels
        :param epoch:
        :param learning_rate:
        :return:
        '''
        epoch = strategy['epoch']
        learning_rate = strategy['learning_rate']
        L2_regulation = strategy['L2_regulation']

        Y = utils.convert2indicator(ytrain)
        K = np.amax(ytrain) + 1  # get number of classes
        N, D = Xtrain.shape
        self.W1, self.b1, self.W2, self.b2 = self.initializeWeights(
            K, D, self.M)

        # for logging
        l_cost = list()
        l_iterations = list()
        l_score = list()

        iteration = -1

        self.cache_W2 = 1  # adagrad, rmsprop
        self.cache_b2 = 1  # adgrad, rmsprop
        self.cache_W1 = 1  # adgrad, rmsprop
        self.cache_b1 = 1  # adarad, rmsprop

        self.v_W1 = 0  # momentum
        self.v_b1 = 0  # momentum
        self.v_W2 = 0  # momentum
        self.v_b2 = 0  # momentum

        for i in range(0, epoch):

            # update weights
            for j in range(0, N):
                # for each epoch, run over all samples separately
                print('Epoch ' + str(i))
                iteration += 1
                print('Iteration ' + str(iteration))
                print('Learning rate: ' + str(learning_rate))

                # compute score
                Yhat, Z = self.predict(Xtrain)
                yhat = np.argmax(Yhat, axis=1)
                yindex = np.argmax(Y, axis=1)
                score = np.mean(yhat == yindex)
                l_score.append(score)
                print('Score: ' + str(score))

                cost = self.cost(Yhat, Y)
                l_cost.append(cost)
                l_iterations.append(iteration)
                print('Cost: ' + str(cost))

                # compute the gradient at a single observation
                startRange = j
                endRange = j + 1
                print('Choose observation ' + str(startRange))
                gradient_W2, gradient_b2 = self.updateW2andb2(
                    self.W2, self.b2, Y, Yhat, Z, N, K, self.M, startRange,
                    endRange)
                gradient_W1, gradient_b1 = self.updateW1andb1(
                    self.W1, self.W2, self.b1, Xtrain, D, Y, Yhat, Z, N, K,
                    self.M, startRange, endRange)
                print('Update weights')

                # update learning rate
                if strategy['name'] == 'STEP_DECAY':
                    if iteration >= 1 and iteration % strategy['step'] == 0:
                        learning_rate = learning_rate / strategy['factor']

                    self.W1 += learning_rate * (gradient_W1 +
                                                L2_regulation * self.W1)
                    self.b1 += learning_rate * (gradient_b1 +
                                                L2_regulation * self.b1)
                    self.W2 += learning_rate * (gradient_W2 +
                                                L2_regulation * self.W2)
                    self.b2 += learning_rate * (gradient_b2 +
                                                L2_regulation * self.b2)

                elif strategy['name'] == 'ADAGRAD':
                    self.cache_b1 += gradient_b1 * gradient_b1
                    self.b1 += learning_rate * (
                        gradient_b1 + L2_regulation * self.b1) / (
                            np.sqrt(self.cache_b1) + strategy['epsilon'])

                    self.cache_W1 += gradient_W1 * gradient_W1
                    self.W1 += learning_rate * (
                        gradient_W1 + L2_regulation * self.W1) / (
                            np.sqrt(self.cache_W1) + strategy['epsilon'])

                    self.cache_b2 += gradient_b2 * gradient_b2
                    self.b2 += learning_rate * (
                        gradient_b2 + L2_regulation * self.b2) / (
                            np.sqrt(self.cache_b2) + strategy['epsilon'])

                    self.cache_W2 += gradient_W2 * gradient_W2
                    self.W2 += learning_rate * (
                        gradient_W2 + L2_regulation * self.W2) / (
                            np.sqrt(self.cache_W2) + strategy['epsilon'])

                elif strategy['name'] == 'CONSTANT':
                    self.W1 += learning_rate * (gradient_W1 +
                                                L2_regulation * self.W1)
                    self.b1 += learning_rate * (gradient_b1 +
                                                L2_regulation * self.b1)
                    self.W2 += learning_rate * (gradient_W2 +
                                                L2_regulation * self.W2)
                    self.b2 += learning_rate * (gradient_b2 +
                                                L2_regulation * self.b2)

                elif strategy['name'] == 'RMSPROP':
                    self.cache_b1 = strategy['decay_rate'] * self.cache_b1 + (
                        1 - strategy['decay_rate']) * gradient_b1 * gradient_b1
                    self.b1 += learning_rate * (
                        gradient_b1 + L2_regulation * self.b1) / (
                            np.sqrt(self.cache_b1) + strategy['epsilon'])

                    self.cache_W1 = strategy['decay_rate'] * self.cache_W1 + (
                        1 - strategy['decay_rate']) * gradient_W1 * gradient_W1
                    self.W1 += learning_rate * (
                        gradient_W1 + L2_regulation * self.W1) / (
                            np.sqrt(self.cache_W1) + strategy['epsilon'])

                    self.cache_b2 = strategy['decay_rate'] * self.cache_b2 + (
                        1 - strategy['decay_rate']) * gradient_b2 * gradient_b2
                    self.b2 += learning_rate * (
                        gradient_b2 + L2_regulation * self.b2) / (
                            np.sqrt(self.cache_b2) + strategy['epsilon'])

                    self.cache_W2 = strategy['decay_rate'] * self.cache_W2 + (
                        1 - strategy['decay_rate']) * gradient_W2 * gradient_W2
                    self.W2 += learning_rate * (
                        gradient_W2 + L2_regulation * self.W2) / (
                            np.sqrt(self.cache_W2) + strategy['epsilon'])

                elif strategy['name'] == 'MOMENTUM':
                    self.v_W1 = strategy['mu'] * self.v_W1 + learning_rate * (
                        gradient_W1 + L2_regulation * self.W1)
                    self.W1 += self.v_W1

                    self.v_b1 = strategy['mu'] * self.v_b1 + learning_rate * (
                        gradient_b1 + L2_regulation * self.b1)
                    self.b1 += self.v_b1

                    self.v_W2 = strategy['mu'] * self.v_W2 + learning_rate * (
                        gradient_W2 + L2_regulation * self.W2)
                    self.W2 += self.v_W2

                    self.v_b2 = strategy['mu'] * self.v_b2 + learning_rate * (
                        gradient_b2 + L2_regulation * self.b2)
                    self.b2 += self.v_b2

                print('\n')

        return l_cost, l_iterations, l_score
def main():
    Xtrain, ytrain = utils.readTrainingDigitRecognizer(
        '../data/digit-recognizer/train.csv')
    Ytrain = utils.convert2indicator(ytrain)
    build_model(Xtrain, Ytrain)
    def fit(self,
            Xtrain,
            ytrain,
            Xval,
            yval,
            epoch=20,
            learning_rate=0.001,
            batch_size=30):
        """
        train model
        :param Xtrain: observations' input
        :param ytrain: observations' label
        :param epoch: the number of epoch for training
        :return:
        """
        K = np.amax(ytrain) + 1  # get the number of classes
        Ytrain = utils.convert2indicator(ytrain)
        Yval = utils.convert2indicator(yval)
        N, D = Xtrain.shape

        layers = self.initializeLayers(nFeatures=D,
                                       nClasses=K,
                                       hiddenLayersSize=self.hiddenLayersSize)

        # initialize placeholders
        tf_X = tf.placeholder(dtype=tf.float32, name='X')
        tf_Y = tf.placeholder(dtype=tf.float32, name='Y')

        # define symbolic formula
        tf_Yhat_training = self.forward_train(
            tf_X, layers, self.pkeep)  # backpropogation during training
        tf_cost_training = tf.math.reduce_sum(-1 * tf.multiply(
            tf_Y, tf.log(tf_Yhat_training + 1e-4)))  # cross-entropy

        tf_train = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate).minimize(tf_cost_training)

        # we do not use dropout when testing
        tf_Yhat_testing = self.forward_test(
            tf_X, layers)  # backpropogation during testing
        tf_cost_testing = tf.math.reduce_sum(
            -1 *
            tf.multiply(tf_Y, tf.log(tf_Yhat_testing + 1e-4)))  # cross-entropy

        tf_yhat = tf.math.argmax(tf_Yhat_training, axis=1)

        # just for plotting
        trainingErrors = []
        validationErrors = []
        trainingAccuracies = []
        validationAccuracies = []
        iterations = []

        with tf.Session() as session:
            session.run(tf.global_variables_initializer())

            iteration = 0
            nBatches = np.int(np.round(N * 1.0 / batch_size - 0.5))

            for i in range(epoch):

                for j in range(nBatches + 1):
                    print('iteration ' + str(iteration))
                    iterations.append(iteration)
                    iteration += 1

                    # mini-batch gradient descent
                    if j == nBatches:
                        session.run(tf_train,
                                    feed_dict={
                                        tf_X: Xtrain[j * nBatches:N],
                                        tf_Y: Ytrain[j * nBatches:N]
                                    })
                    else:
                        session.run(tf_train,
                                    feed_dict={
                                        tf_X:
                                        Xtrain[j * nBatches:(j + 1) *
                                               nBatches],
                                        tf_Y:
                                        Ytrain[j * nBatches:(j + 1) * nBatches]
                                    })

                    yhat = session.run(tf_yhat,
                                       feed_dict={
                                           tf_X: Xtrain,
                                           tf_Y: Ytrain
                                       })
                    accuracy = np.mean(yhat == ytrain)
                    print("training accuracy: " + str(accuracy))
                    trainingAccuracies.append(accuracy)

                    yhat = session.run(tf_yhat,
                                       feed_dict={
                                           tf_X: Xval,
                                           tf_Y: Yval
                                       })
                    accuracy = np.mean(yhat == yval)
                    print("validation accuracy: " + str(accuracy))
                    validationAccuracies.append(accuracy)

                    trainingError = session.run(tf_cost_testing,
                                                feed_dict={
                                                    tf_X: Xtrain,
                                                    tf_Y: Ytrain
                                                }) / len(Xtrain)
                    print('training error: ' + str(trainingError))
                    trainingErrors.append(trainingError)

                    validationError = session.run(tf_cost_testing,
                                                  feed_dict={
                                                      tf_X: Xval,
                                                      tf_Y: Yval
                                                  }) / len(Xval)
                    print('validation error: ' + str(validationError))
                    validationErrors.append(validationError)

                    print()

        self.plotError(trainingErrors, validationErrors, iterations)
        self.plotAccuracy(trainingAccuracies, validationAccuracies, iterations)