Exemplo n.º 1
0
def evaluate(all_data, all_labels, fold_number, view_number):
    '''
    Runs baseline classification for a single fold.

    Args:
        all_data (list<numpy.ndarray>): List of all data from each block.
        all_labels (list<numpy.ndarray>): List of all labels from each block.
        fold_number (int): The fold to test on.
    '''

    print '---- Fold #{}, View #{} ----'.format(fold_number, view_number)

    def report_time(t):
        print '\tTook {}\n'.format(format_time(time.time()-t))

    # Find the appropriate blocks for this fold and stack data

    ( trn_blocks,
      tun_blocks,
      tst_blocks ) = util.configure_blocks(fold_number)

    def data_for_blocks(blocks):

        data = list()
        labels = list()

        for block in blocks:
            data.append(all_data[block-1][view_number][0::subsampling_factor,:])
            labels.append(all_labels[block-1][view_number][0::subsampling_factor])

        return np.vstack(data), np.hstack(labels)

    trn_data, trn_labels = data_for_blocks(trn_blocks)
    tun_data, tun_labels = data_for_blocks(tun_blocks)
    tst_data, tst_labels = data_for_blocks(tst_blocks)

    '''
    # Make data zero-mean
    mean = np.mean(trn_data, axis=0)
    trn_data = trn_data-mean
    tun_data = tun_data-mean
    tst_data = tst_data-mean
    '''

    print (
        'Num. samples in training: {}\n'
        'Num. samples in tuning:   {}\n'
        'Num. samples in testing:  {}\n'
    ).format(trn_data.shape[0], tun_data.shape[0], tst_data.shape[0])



    def run(trn, tun, tst):

        if try_knn:
            print 'kNN:'

            runtime = time.time()

            k_neighbors, tun_acc = tune_knn(
                    trn, trn_labels,
                    tun, tun_labels
            )
            tst_acc = knn_accuracy(
                    trn, trn_labels,
                    tst, tst_labels,
                    k_neighbors
            )
            print (
                '\tk-Neighbors:      {}\n'
                '\tTuning Accuracy:  {:.2f}%\n'
                '\tTesting Accuracy: {:.2f}%\n'
            ).format(k_neighbors, 100*tun_acc, 100*tst_acc)

            report_time(runtime)

        if try_svm:
            print 'SVM:'

            runtime = time.time()

            gamma, error, tun_acc = tune_svm(
                    trn_data, trn_labels,
                    tun_data, tun_labels
            )
            tst_acc = svm_accuracy(
                    trn_data, trn_labels,
                    tst_data, tst_labels,
                    gamma, error
            )
            print (
                '\tGamma:            {}\n'
                '\tPenalty:          {}\n'
                '\tTuning Accuracy:  {:.2f}%\n'
                '\tTesting Accuracy: {:.2f}%\n'
            ).format(gamma, error, 100*tun_acc, 100*tst_acc)

            report_time(runtime)

    print 'Raw data...\n'

    run(trn_data, tun_data, tst_data)

    print 'PCA...\n'

    pca = PCA()
    pca.fit(trn_data)

    trn_data_pca = pca.transform(trn_data)
    tun_data_pca = pca.transform(tun_data)
    tst_data_pca = pca.transform(tst_data)

    run(trn_data_pca, tun_data_pca, tst_data_pca)
Exemplo n.º 2
0
def runSingleFold(data_list, file_idx, fold_number):
    print '| ---- ---- Fold #{} ---- ----'.format(fold_number)

    number_of_views = len(data_locations)

    ( training_blocks,
      tuning_blocks,
      testing_blocks ) = util.configure_blocks(fold_number)

    # Pre-process training data to have equal number of observations (n)
    data_pre_processor = DataPreProcessor(data_list, file_idx,
            training_blocks, True)
    training_data_per_view, training_labels_per_view = data_pre_processor.process()

    # Perform GCCA on processed data
    gcca_model = GeneralizedCCA(training_data_per_view, num_of_dimensions)
    G = gcca_model.solve()

    # List for holding U_j for each view
    proj_matrix_per_view = list()

    training_data = np.ndarray(shape=(0, np.shape(G)[1]), dtype=np.float)
    training_labels = np.array([], dtype=np.int)

    if use_full_phones:
        cmap = util.getColorMap(38)
    else:
        cmap = util.getColorMap(len(vowel_labels))

    colors = []

    # Compute U_j (matrix for projecting data into lower dimensional subspace)
    for i in range(number_of_views):
        U = np.linalg.pinv(training_data_per_view[i].transpose()) * np.mat(G)

        projected_data = np.mat(training_data_per_view[i].transpose()) * np.mat(U)

        proj_matrix_per_view.append(U)

        labels = training_labels_per_view[i]
        
        if use_full_phones:
            training_data = np.vstack((training_data, projected_data))
            
            for j in range(len(labels)):
                colors.append(cmap(int(labels[j])))
                training_labels = np.hstack((training_labels, int(labels[j])))
        else:
            for j in range(len(labels)):
                if (labels[j] in vowel_labels):
                    training_data = np.vstack((training_data, projected_data[j,:]))
                    training_labels = np.hstack((training_labels, int(labels[j])))
                    colors.append(cmap(vowel_labels.index(int(labels[j]))))

    #plot = plt.scatter(training_data[:,2], training_data[:,1], color=colors)
    #plt.show()

    # Start tuning/testing
    if classification_model == ClassificationModel.Kernel_SVM_RBF:
        max_accuracy = 0.0
        optimal_gamma = 0

        for i in [500, 600, 700, 800, 900]:
            model = svm.SVC(decision_function_shape='ovo',kernel='rbf',gamma=i,C=1000)
            model.fit(training_data, training_labels)
            accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view)
            if accuracies[len(accuracies) - 1] > max_accuracy:
                max_accuracy = accuracies[len(accuracies) - 1]
                optimal_gamma = i

        print '| Optimal gamma value: {}'.format(optimal_gamma)

        model = svm.SVC(decision_function_shape='ovo',kernel='rbf',gamma=optimal_gamma,C=1000)
    else:
        max_accuracy = 0.0
        optimal_neighbors = 0
        for i in [4, 8, 12, 16]:
            model = neighbors.KNeighborsClassifier(i, weights='distance')
            model.fit(training_data, training_labels)
            accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view)
            if accuracies[len(accuracies) - 1] > max_accuracy:
                max_accuracy = accuracies[len(accuracies) - 1]
                optimal_neighbors = i

        print '| Optimal number of neighbors: {}'.format(optimal_neighbors)

        model = neighbors.KNeighborsClassifier(optimal_neighbors, weights='distance')

    model.fit(training_data, training_labels)
    accuracies = getAccuracies(model, data_list, file_idx, testing_blocks, proj_matrix_per_view)

    for i in range(len(accuracies)):
        if i < len(accuracies) - 1:
            print '| Accuracy for view {}: {:.3f}'.format(i + 1, accuracies[i])
        else:
            print '| Accuracy for whole data: {:.3f}'.format(accuracies[i])

    print '|'
Exemplo n.º 3
0
def runSingleFold(data_list, file_idx, fold_number):
    print '| ---- ---- Fold #{} ---- ----'.format(fold_number)

    number_of_views = len(data_list)

    (training_blocks, tuning_blocks,
     testing_blocks) = util.configure_blocks(fold_number)

    data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks,
                                          False)
    training_data_per_view, training_labels_per_view = data_pre_processor.process(
    )

    data_pre_processor = DataPreProcessor(data_list, file_idx, tuning_blocks,
                                          False)
    tuning_data_per_view, tuning_labels_per_view = data_pre_processor.process()

    data_pre_processor = DataPreProcessor(data_list, file_idx, testing_blocks,
                                          False)
    testing_data_per_view, testing_labels_per_view = data_pre_processor.process(
    )

    for i in range(number_of_views):
        training_data = np.ndarray(shape=(0, np.shape(
            training_data_per_view[i])[0]),
                                   dtype=np.float)
        training_labels = np.array([], dtype=np.int)

        if use_full_phones:
            training_data = training_data_per_view[i].transpose()
            training_labels = training_labels_per_view[i]
        else:
            for j in range(len(training_labels_per_view[i])):
                if (training_labels_per_view[i][j] in vowel_labels):
                    training_data = np.vstack(
                        (training_data,
                         training_data_per_view[i].transpose()[j, :]))
                    training_labels = np.hstack(
                        (training_labels, int(training_labels_per_view[i][j])))

        param_msg = None

        # Start tuning/testing
        if classification_model == ClassificationModel.Kernel_SVM_RBF:
            max_accuracy = 0.0
            optimal_gamma = 0.0

            for j in [3e-08, 3.5e-08, 4e-08, 4.5e-08]:
                model = svm.SVC(decision_function_shape='ovo',
                                kernel='rbf',
                                gamma=j,
                                C=2)
                model.fit(training_data, training_labels)
                accuracy = getAccuracy(model, tuning_data_per_view[i],
                                       tuning_labels_per_view[i])
                if accuracy > max_accuracy:
                    max_accuracy = accuracy
                    optimal_gamma = j

            param_msg = 'Optimal gamma value: {}'.format(optimal_gamma)

            model = svm.SVC(decision_function_shape='ovo',
                            kernel='rbf',
                            gamma=optimal_gamma,
                            C=2)
        else:
            max_accuracy = 0.0
            optimal_neighbors = 0

            for j in [28, 32, 36, 40]:
                model = neighbors.KNeighborsClassifier(j, weights='distance')
                model.fit(training_data, training_labels)
                accuracy = getAccuracy(model, tuning_data_per_view[i],
                                       tuning_labels_per_view[i])
                if accuracy > max_accuracy:
                    max_accuracy = accuracy
                    optimal_neighbors = j

            param_msg = 'Optimal number of neighbors: {}'.format(
                optimal_neighbors)

            model = neighbors.KNeighborsClassifier(optimal_neighbors,
                                                   weights='distance')

        model.fit(training_data, training_labels)
        accuracy = getAccuracy(model, testing_data_per_view[i],
                               testing_labels_per_view[i])

        print '| Accuracy for view {}: {:.3f}'.format(
            i + 1, accuracy) + ', ' + param_msg

    print '|'
Exemplo n.º 4
0
def runSingleFold(data_list, file_idx, fold_number):
    print "| ---- ---- Fold #{} ---- ----".format(fold_number)

    number_of_views = len(data_list)

    (training_blocks, tuning_blocks, testing_blocks) = util.configure_blocks(fold_number)

    data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, False)
    training_data_per_view, training_labels_per_view = data_pre_processor.process()

    data_pre_processor = DataPreProcessor(data_list, file_idx, tuning_blocks, False)
    tuning_data_per_view, tuning_labels_per_view = data_pre_processor.process()

    data_pre_processor = DataPreProcessor(data_list, file_idx, testing_blocks, False)
    testing_data_per_view, testing_labels_per_view = data_pre_processor.process()

    for i in range(number_of_views):
        training_data = np.ndarray(shape=(0, np.shape(training_data_per_view[i])[0]), dtype=np.float)
        training_labels = np.array([], dtype=np.int)

        if use_full_phones:
            training_data = training_data_per_view[i].transpose()
            training_labels = training_labels_per_view[i]
        else:
            for j in range(len(training_labels_per_view[i])):
                if training_labels_per_view[i][j] in vowel_labels:
                    training_data = np.vstack((training_data, training_data_per_view[i].transpose()[j, :]))
                    training_labels = np.hstack((training_labels, int(training_labels_per_view[i][j])))

        param_msg = None

        # Start tuning/testing
        if classification_model == ClassificationModel.Kernel_SVM_RBF:
            max_accuracy = 0.0
            optimal_gamma = 0.0

            for j in [3e-08, 3.5e-08, 4e-08, 4.5e-08]:
                model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=j, C=2)
                model.fit(training_data, training_labels)
                accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i])
                if accuracy > max_accuracy:
                    max_accuracy = accuracy
                    optimal_gamma = j

            param_msg = "Optimal gamma value: {}".format(optimal_gamma)

            model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=optimal_gamma, C=2)
        else:
            max_accuracy = 0.0
            optimal_neighbors = 0

            for j in [28, 32, 36, 40]:
                model = neighbors.KNeighborsClassifier(j, weights="distance")
                model.fit(training_data, training_labels)
                accuracy = getAccuracy(model, tuning_data_per_view[i], tuning_labels_per_view[i])
                if accuracy > max_accuracy:
                    max_accuracy = accuracy
                    optimal_neighbors = j

            param_msg = "Optimal number of neighbors: {}".format(optimal_neighbors)

            model = neighbors.KNeighborsClassifier(optimal_neighbors, weights="distance")

        model.fit(training_data, training_labels)
        accuracy = getAccuracy(model, testing_data_per_view[i], testing_labels_per_view[i])

        print "| Accuracy for view {}: {:.3f}".format(i + 1, accuracy) + ", " + param_msg

    print "|"
Exemplo n.º 5
0
def runSingleFold(data_list, file_idx, fold_number):
    print "| ---- ---- Fold #{} ---- ----".format(fold_number)

    number_of_views = len(data_locations)

    (training_blocks, tuning_blocks, testing_blocks) = util.configure_blocks(fold_number)

    # Pre-process training data to have equal number of observations (n)
    data_pre_processor = DataPreProcessor(data_list, file_idx, training_blocks, True)
    training_data_per_view, training_labels_per_view = data_pre_processor.process()

    # Perform GCCA on processed data
    gcca_model = GeneralizedCCA(training_data_per_view, num_of_dimensions)
    G = gcca_model.solve()

    # List for holding U_j for each view
    proj_matrix_per_view = list()

    training_data = np.ndarray(shape=(0, np.shape(G)[1]), dtype=np.float)
    training_labels = np.array([], dtype=np.int)

    if use_full_phones:
        cmap = util.getColorMap(38)
    else:
        cmap = util.getColorMap(len(vowel_labels))

    colors = []

    # Compute U_j (matrix for projecting data into lower dimensional subspace)
    for i in range(number_of_views):
        U = np.linalg.pinv(training_data_per_view[i].transpose()) * np.mat(G)

        projected_data = np.mat(training_data_per_view[i].transpose()) * np.mat(U)

        proj_matrix_per_view.append(U)

        labels = training_labels_per_view[i]

        if use_full_phones:
            training_data = np.vstack((training_data, projected_data))

            for j in range(len(labels)):
                colors.append(cmap(int(labels[j])))
                training_labels = np.hstack((training_labels, int(labels[j])))
        else:
            for j in range(len(labels)):
                if labels[j] in vowel_labels:
                    training_data = np.vstack((training_data, projected_data[j, :]))
                    training_labels = np.hstack((training_labels, int(labels[j])))
                    colors.append(cmap(vowel_labels.index(int(labels[j]))))

    # plot = plt.scatter(training_data[:,2], training_data[:,1], color=colors)
    # plt.show()

    # Start tuning/testing
    if classification_model == ClassificationModel.Kernel_SVM_RBF:
        max_accuracy = 0.0
        optimal_gamma = 0

        for i in [500, 600, 700, 800, 900]:
            model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=i, C=1000)
            model.fit(training_data, training_labels)
            accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view)
            if accuracies[len(accuracies) - 1] > max_accuracy:
                max_accuracy = accuracies[len(accuracies) - 1]
                optimal_gamma = i

        print "| Optimal gamma value: {}".format(optimal_gamma)

        model = svm.SVC(decision_function_shape="ovo", kernel="rbf", gamma=optimal_gamma, C=1000)
    else:
        max_accuracy = 0.0
        optimal_neighbors = 0
        for i in [4, 8, 12, 16]:
            model = neighbors.KNeighborsClassifier(i, weights="distance")
            model.fit(training_data, training_labels)
            accuracies = getAccuracies(model, data_list, file_idx, tuning_blocks, proj_matrix_per_view)
            if accuracies[len(accuracies) - 1] > max_accuracy:
                max_accuracy = accuracies[len(accuracies) - 1]
                optimal_neighbors = i

        print "| Optimal number of neighbors: {}".format(optimal_neighbors)

        model = neighbors.KNeighborsClassifier(optimal_neighbors, weights="distance")

    model.fit(training_data, training_labels)
    accuracies = getAccuracies(model, data_list, file_idx, testing_blocks, proj_matrix_per_view)

    for i in range(len(accuracies)):
        if i < len(accuracies) - 1:
            print "| Accuracy for view {}: {:.3f}".format(i + 1, accuracies[i])
        else:
            print "| Accuracy for whole data: {:.3f}".format(accuracies[i])

    print "|"