예제 #1
0
    def fit(self, epochs, batch_size, params):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #Do not assign whole gpu memory, just use it on the go
        config.allow_soft_placement = True  #If an operation is not defined in the default device, let it execute in another.

        timestr = time.strftime("%Y%m%d-%H%M%S")

        random_seed = 123
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)

        # Data Path
        CallFolder = '../../Raw_Data/'

        StoreFolder = 'Final_Results/'
        if not os.path.isdir(StoreFolder):
            os.makedirs(StoreFolder)

        StoreFolder_selfeval = 'Selfeval_Results/'
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        StoreFolder_all_labeled = 'All_labeled_data/'
        if not os.path.isdir(StoreFolder_all_labeled):
            os.makedirs(StoreFolder_all_labeled)

        StoreFolder_Model = 'Models/'
        if os.path.exists(StoreFolder_Model) and os.path.isdir(
                StoreFolder_Model):
            shutil.rmtree(StoreFolder_Model)
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        #########################################################
        # Decide whether self-evaluation or final submission
        Val_split = 9.5 / 10

        # You want to preprocess the data?
        preprocessing = True

        # Hyperparameters
        # epochs = 400
        # batch_size = 128
        learning_rate = 0.0002
        # params = 800
        activation = tf.nn.tanh

        # At which sample starts the prediction for the test data?
        sample_number = 30000

        #########################################################
        # LOAD AND SHUFFLE DATA!
        DataTrain = np.array(
            pd.read_hdf(CallFolder + "train_labeled.h5", "train"))
        X_train_labeled = DataTrain[:, 1:]
        features = X_train_labeled.shape[1]
        y_train_labeled = DataTrain[:, 0]
        classes = np.max(y_train_labeled) + 1

        X_test = np.array(
            pd.read_hdf(CallFolder + "train_unlabeled.h5",
                        "train"))  # X_test = unlabeled data
        print('Unpreprocessed Data')
        print('X_train_labeled:   ', X_train_labeled.shape, end=' ||  ')
        print('y_train:   ', y_train_labeled.shape)
        print('X_test:    ', X_test.shape, '\n')

        (X_train_labeled, y_train_labeled) = shuffle(X_train_labeled,
                                                     y_train_labeled)

        X_train = np.concatenate((X_train_labeled, X_test), axis=0)
        np.save(os.path.join(StoreFolder_all_labeled, 'X_train.npy'),
                X_train)  # STORE BEFORE PREPROCESSING, BUT AFTER SHUFFLING!

        #########################################################
        if preprocessing == True:
            X_train_labeled, X_test = centering(X_train_labeled, X_test)
            X_train_labeled = normalize(X_train_labeled)
            X_test = normalize(X_test)

        samples = len(X_train_labeled)
        X_valid = X_train_labeled[int(Val_split * samples):samples, :]
        y_valid = y_train_labeled[int(Val_split * samples):samples]
        X_train = X_train_labeled[0:int(Val_split * samples), :]
        y_train = y_train_labeled[0:int(Val_split * samples)]
        print('Final Data')
        print('Shape of X_train:', X_train.shape)
        print('Shape of y_train:', y_train.shape)
        print('Shape of X_valid:', X_valid.shape)
        print('Shape of y_valid:', y_valid.shape, '\n')

        ##################
        # CREATE GRAPH
        g = tf.Graph()
        with g.as_default():
            # build the graph
            NN.build_NN(features, classes, learning_rate, params, activation)

        ##################
        # TRAINING
        print()
        print('Training... ')
        with tf.Session(graph=g, config=config) as sess:
            [avg_loss_plot, valid_accuracy_plot,
             test_accuracy_plot] = train(path=StoreFolder_Model,
                                         sess=sess,
                                         epochs=epochs,
                                         random_seed=random_seed,
                                         batch_size=batch_size,
                                         training_set=(X_train, y_train),
                                         validation_set=(X_valid, y_valid),
                                         test_set=None)

        del g

        ##################
        # CREATE GRAPH
        g2 = tf.Graph()
        with g2.as_default():
            # build the graph
            NN.build_NN(features, classes, learning_rate, params, activation)

            # Saver
            saver = tf.train.Saver()

        ##################
        # PREDICTION
        with tf.Session(graph=g2, config=config) as sess:
            epoch = np.argmax(valid_accuracy_plot) + 1
            load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model)
            y_test_pred = predict(sess, X_test)

        ##################
        #  CREATE NEW DATASET
        y_train = np.concatenate((y_train_labeled, y_test_pred), axis=0)
        np.save(os.path.join(StoreFolder_all_labeled, 'y_train.npy'), y_train)
    ##################
    # CREATE GRAPH
    g2 = tf.Graph()
    with g2.as_default():
        # build the graph
        NN.build_NN(features, classes, learning_rate, params, activation)

        # Saver
        saver = tf.train.Saver()

    ##################
    # PREDICTION
    with tf.Session(graph=g2, config=config) as sess:
        epoch = np.argmax(valid_accuracy_plot) + 1
        load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model)
        y_test_pred = predict(sess, X_test)

    PrintOutput(
        y_test_pred, sample_number,
        os.path.join(
            StoreFolder, timestr + '_' + str(epochs) + '_' + str(batch_size) +
            '_' + str(params) + '_y_test.csv'))

#################################################################################################
#################################################################################################
# SELFEVALUATION
else:
    samples = len(X_train)
    X_train_selfeval = X_train[0:int(Test_split * samples), :]
    y_train_selfeval = y_train[0:int(Test_split * samples)]
    X_test_selfeval = X_train[int(Test_split * samples):samples, :]
예제 #3
0
    def fit(self, epochs, batch_size, params):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #Do not assign whole gpu memory, just use it on the go
        config.allow_soft_placement = True  #If an operation is not defined in the default device, let it execute in another.

        timestr = time.strftime("%Y%m%d-%H%M%S")

        random_seed = 123
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)

        # Data Path
        CallFolder = '../../Raw_Data/'

        StoreFolder = 'Final_Results/'
        if not os.path.isdir(StoreFolder):
            os.makedirs(StoreFolder)

        StoreFolder_selfeval = 'Selfeval_Results/'
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        StoreFolder_Model = 'Models/'
        if os.path.exists(StoreFolder_Model) and os.path.isdir(
                StoreFolder_Model):
            shutil.rmtree(StoreFolder_Model)
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        #########################################################
        # Decide whether self-evaluation or final submission
        final_submission = False
        Test_split = 9.5 / 10
        Val_split = 9.5 / 10

        # You want to preprocess the data?
        preprocessing = True

        # Hyperparameters
        # epochs = 120
        # batch_size = 128
        learning_rate = 0.0002
        # params = 200
        activation = tf.nn.relu

        # At which sample starts the prediction for the test data?
        sample_number = 30000

        #########################################################
        # LOAD AND SHUFFLE DATA!
        DataTrain = np.array(
            pd.read_hdf(CallFolder + "train_labeled.h5", "train"))
        X_train = DataTrain[:, 1:]
        features = X_train.shape[1]
        y_train = DataTrain[:, 0]
        classes = np.max(y_train) + 1

        X_test = np.array(pd.read_hdf(CallFolder + "test.h5", "test"))
        print('Unpreprocessed Data')
        print('X_train_labeled:   ', X_train.shape, end=' ||  ')
        print('y_train:   ', y_train.shape)
        print('X_test:    ', X_test.shape, '\n')

        (X_train, y_train) = shuffle(X_train, y_train)

        #########################################################
        # FINAL DATA
        if final_submission == True:
            if preprocessing == True:
                X_train, X_test = centering(X_train, X_test)
                X_train = normalize(X_train)
                X_test = normalize(X_test)

            samples = len(X_train)
            X_valid = X_train[int(Val_split * samples):samples, :]
            y_valid = y_train[int(Val_split * samples):samples]
            X_train = X_train[0:int(Val_split * samples), :]
            y_train = y_train[0:int(Val_split * samples)]
            print('Final Data')
            print('Shape of X_train:', X_train.shape)
            print('Shape of y_train:', y_train.shape)
            print('Shape of X_valid:', X_valid.shape)
            print('Shape of y_valid:', y_valid.shape, '\n')

            ##################
            # CREATE GRAPH
            g = tf.Graph()
            with g.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

            ##################
            # TRAINING
            print()
            print('Training... ')
            with tf.Session(graph=g, config=config) as sess:
                [avg_loss_plot, valid_accuracy_plot,
                 test_accuracy_plot] = train(path=StoreFolder_Model,
                                             sess=sess,
                                             epochs=epochs,
                                             random_seed=random_seed,
                                             batch_size=batch_size,
                                             training_set=(X_train, y_train),
                                             validation_set=(X_valid, y_valid),
                                             test_set=None)

                np.save(
                    os.path.join(StoreFolder, timestr + '_avg_loss_plot.npy'),
                    avg_loss_plot)
            del g

            ##################
            # CREATE GRAPH
            g2 = tf.Graph()
            with g2.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

                # Saver
                saver = tf.train.Saver()

            ##################
            # PREDICTION
            with tf.Session(graph=g2, config=config) as sess:
                epoch = np.argmax(valid_accuracy_plot) + 1
                load(saver=saver,
                     sess=sess,
                     epoch=epoch,
                     path=StoreFolder_Model)
                y_test_pred = predict(sess, X_test)

            PrintOutput(
                y_test_pred, sample_number,
                os.path.join(
                    StoreFolder, timestr + '_' + str(epochs) + '_' +
                    str(batch_size) + '_' + str(params) + '_y_test.csv'))

        #################################################################################################
        #################################################################################################
        # SELFEVALUATION
        else:
            samples = len(X_train)
            X_train_selfeval = X_train[0:int(Test_split * samples), :]
            y_train_selfeval = y_train[0:int(Test_split * samples)]
            X_test_selfeval = X_train[int(Test_split * samples):samples, :]
            y_test_selfeval = y_train[int(Test_split * samples):samples]
            print('Self-evaluation data')
            print('Shape of X_train:', X_train_selfeval.shape)
            print('Shape of y_train:', y_train_selfeval.shape)
            print('Shape of X_test:', X_test_selfeval.shape)
            print('Shape of y_test:', y_test_selfeval.shape)

            if preprocessing == True:
                X_train_selfeval, X_test_selfeval = centering(
                    X_train_selfeval, X_test_selfeval)
                X_train_selfeval = normalize(X_train_selfeval)
                X_test_selfeval = normalize(X_test_selfeval)

            ##################
            # CREATE GRAPH TRAINING
            g = tf.Graph()
            with g.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

            ##################
            # TRAINING
            print()
            print('Training... ')
            with tf.Session(graph=g, config=config) as sess:
                [avg_loss_plot, valid_accuracy_plot, test_accuracy_plot
                 ] = train(path=StoreFolder_Model,
                           sess=sess,
                           epochs=epochs,
                           random_seed=random_seed,
                           batch_size=batch_size,
                           training_set=(X_train_selfeval, y_train_selfeval),
                           validation_set=None,
                           test_set=(X_test_selfeval, y_test_selfeval))

                np.save(
                    os.path.join(StoreFolder_selfeval,
                                 timestr + '_avg_loss_plot.npy'),
                    avg_loss_plot)
                np.save(
                    os.path.join(StoreFolder_selfeval,
                                 timestr + '_test_accuracy_plot.npy'),
                    test_accuracy_plot)

        ##################
        # POSTPROCESS

        # plt.figure(1)
        # plt.plot(range(1, len(avg_loss_plot) + 1), avg_loss_plot)
        # plt.title('Training loss')
        # plt.xlabel('Epoch')
        # plt.ylabel('Average Training Loss')
        # if final_submission == True:
        #     plt.savefig(os.path.join(StoreFolder, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg'))
        # else:
        #     plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg'))

        # if final_submission == False:
        #     plt.figure(2)
        #     plt.plot(range(1, len(test_accuracy_plot) + 1), test_accuracy_plot, label='Test Accuracy')
        #     plt.title('Test Accuracy')
        #     plt.xlabel('Epoch')
        #     plt.ylabel('Accuracy')
        #     plt.legend()
        #     plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TestAccuracy.jpg'))

        print('\nJob Done!')
        return np.average(test_accuracy_plot[-10:])
예제 #4
0
    ##################
    # CREATE GRAPH
    g2 = tf.Graph()
    with g2.as_default():
        # build the graph
        NN.build_NN(features, classes, learning_rate, params, activation)

        # Saver
        saver = tf.train.Saver()

    ##################
    # PREDICTION
    with tf.Session(graph=g2, config=config) as sess:
        epoch = np.argmax(valid_accuracy_plot) + 1
        load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model)
        y_test_pred = predict(sess, X_test[0:nr_pred,:])
    # Add newest predicted point to the NN
    print('Before concatenating X_train: ', X_train.shape)
    X_train = np.concatenate((X_train, X_test[0:nr_pred,:]), axis=0)
    print('After concatenating X_train: ', X_train.shape)
    print('Before concatenating X_test: ',X_test.shape)
    X_test = X_test[nr_pred:, :]
    print('After concatenating X_test: ',X_test.shape)
    print('Before concatenating y_train_labeled: ',y_train_labeled.shape)   
    y_train_labeled = np.concatenate((y_train_labeled, y_test_pred), axis=0)
    print('After concatenating y_train_labeled: ', y_train_labeled.shape)
    print('Before concatenating y_train: ',y_train.shape)
    y_train = np.concatenate((y_train, y_test_pred), axis=0)
    print('After concatenating y_train: ',y_train.shape)

    del g2
##############################################################################
# PREDICTION
print()
print('Prediction... ')
with tf.Session(graph=g2, config=config) as sess:
    epoch = np.argmax(val_accuracy_plot) + 1
    print(epoch)
    load(saver=saver, sess=sess, epoch=epoch, path=store_folder)

    # LABELS
    y_pred = np.full((len(X_test)), 0)
    X = np.full((1, x_row, y_col, 1), 0.)

    for i in range(len(X_test)):
        X[0, :, :, :] = np.array(Image.open(str(X_test[i])))[:, :, k:(k + 1)]
        y_pred[i] = predict(sess, X, return_proba=False)
    test_acc = 100 * np.sum((y_pred == y_test) / len(y_test))
    print('Test Acc: %7.3f%%' % test_acc)
    with open(
            os.path.join(store_folder,
                         channel + '_' + parameter + '_AccuracyTest.txt'),
            'w') as fp:
        fp.write('%.3f%%' % (test_acc))

    # PROBABILITIES
    np.set_printoptions(precision=3, suppress=True)

    y_pred_proba = np.full((len(X_test), classes), 0.)
    X = np.full((1, x_row, y_col, 1), 0.)

    for i in range(len(X_test)):
예제 #6
0
    ##################
    # TRAINING & PREDICTION
    print()
    print('Training... ')
    with tf.Session(graph=g, config=config) as sess:
        [avg_loss_plot,
         test_accuracy_plot] = train(sess=sess,
                                     epochs=epochs,
                                     random_seed=random_seed,
                                     batch_size=batch_size,
                                     training_set=(X_train, y_train),
                                     test_set=None)

        # np.save(os.path.join(StoreFolder, timestr + '_avg_loss_plot.npy'), avg_loss_plot)

        y_pred = predict(sess, X_test)

    ## Score
    if BAccuracy == 1 and BFinalPrediction == 0:
        scorer = scoring.score()
        score = scorer.Accuracy(y_test, y_pred)
        print('Accuracy score is = ', repr(score))

if BGridSearch == 1 and BFinalPrediction == 0:
    iters = len(epoch_list) * len(params_list) * len(learning_rate_list) * len(
        batch_size_list)
    i = 0
    score_best = 0
    while i < iters:
        epochs = epoch_list[i % len(epoch_list)]
        params = params_list[math.floor(