示例#1
0
def classify_separated_test_directory():

    print(
        "\nDo classification with different training directory and test directory"
    )
    print("\nTraining directory: " + config.get_record_dir())
    print("\nTest directory: " + SEPARATED_TEST_RECORD_DIR)

    file_contents, labels = input_parser.parse_input_files(
        config.get_record_dir(), combine_sc_vectors=True)
    test_data, test_labels = input_parser.parse_input_files(
        SEPARATED_TEST_RECORD_DIR, combine_sc_vectors=True)
    if len(file_contents) != len(test_data):
        raise ValueError(
            "Different number of input files in training directory and test directory - must be equal"
        )

    train_index = list(range(0, len(labels)))
    test_index = list(range(len(labels), len(labels) + len(test_labels)))

    # Append test data to training data
    for idx in range(0, len(file_contents)):
        training_file_content = file_contents[idx]
        test_file_content = test_data[idx]
        training_file_content.records.extend(test_file_content.records)
    labels.extend(test_labels)

    X = ft.extract_preconfigured_features(file_contents)
    Y = pd.Series(labels)

    Y_test, predictions, accuracy = app_classifier.do_classification(
        X, Y, train_index, test_index)
    print(classification_report(Y_test, predictions))
示例#2
0
def main():
    timing.start_measurement()

    print("Do combined classification using all input files")
    file_contents, labels = input_parser.parse_input_files(
        config.get_record_dir(), combine_sc_vectors=True)
    X = ft.extract_preconfigured_features(file_contents)
    Y = pd.Series(labels)
    app_classifier.do_kfold_cross_validation(X, Y)

    timing.stop_measurement()
示例#3
0
def main():
    timing.start_measurement()

    print("Do combined classification using all input files")
    file_contents, labels = input_parser.parse_input_files(
        config.get_record_dir(), combine_sc_vectors=True)
    X = ft.extract_preconfigured_features(file_contents)
    Y = pd.Series(labels)
    _, total_first_acc, total_second_acc, total_third_acc, total_single_accuracies = app_classifier.do_kfold_cross_validation(
        X, Y)

    total_acc = [total_first_acc, total_second_acc, total_third_acc]
    plt.plot(total_acc)
    plt.show()

    plt.plot(total_single_accuracies)
    plt.show()

    timing.stop_measurement()
def explorative_classification():

    file_contents, label_list = input_parser.parse_input_files(config.get_record_dir(), combine_sc_vectors=False)
    results = []

    for idx, fc in enumerate(file_contents):
        labels = label_list[idx]

        print("\nEvaluate ", fc.file_name)
        X = [fc]
        Y = pd.Series(labels)

        total_accuracy = app_classifier.do_kfold_cross_validation(X, Y, verbose=False)
        results.append(ClassificationResult(total_accuracy, fc.file_name))

    results.sort(key = lambda classificationResult: classificationResult.accuracy, reverse=True)

    print("\nSummary for files in " + config.get_record_dir() + ":\n")
    for r in results:
        print(r)
def explorative_classification():
    file_contents, label_list = input_parser.parse_input_files(
        config.get_record_dir(), combine_sc_vectors=False)
    results = []
    results_first = []
    results_second = []
    results_third = []
    single_results = []

    # print("file content")
    # for idx, fc in enumerate(file_contents):
    #    print(str(idx) + " " + str(fc.file_name))

    # print("labellist")
    # print (label_list)

    for idx, fc in enumerate(file_contents):
        labels = label_list[idx]

        print("\nEvaluate ", fc.file_name)
        # print("labels")
        # print(labels)

        X = [fc]
        Y = pd.Series(labels)
        # print("Y")
        # print(Y)

        total_accuracy, total_first_acc, total_second_acc, total_third_acc, total_single_accuracies = app_classifier.do_kfold_cross_validation(
            X, Y, verbose=True, file_name=fc.file_name[:-4])
        results.append(
            ClassificationResult(round_float(total_accuracy), fc.file_name))
        results_first.append(
            ClassificationResult(round_float(total_first_acc), fc.file_name))
        results_second.append(
            ClassificationResult(round_float(total_second_acc), fc.file_name))
        results_third.append(
            ClassificationResult(round_float(total_third_acc), fc.file_name))
        single_results.append([])
        for total_single_accuracy in total_single_accuracies:
            single_results[idx].append(
                ClassificationResult(round_float(total_single_accuracy),
                                     fc.file_name))

    results.sort(
        key=lambda classification_result: classification_result.accuracy,
        reverse=True)
    results_first.sort(
        key=lambda classificationResult: classificationResult.accuracy,
        reverse=True)
    results_second.sort(
        key=lambda classificationResult: classificationResult.accuracy,
        reverse=True)
    results_third.sort(
        key=lambda classificationResult: classificationResult.accuracy,
        reverse=True)
    # for single_result in single_results:
    #   single_result.sort(key=lambda classification_result: classification_result.accuracy, reverse=True)

    print("\nSummary for files in " + config.get_record_dir() + ":\n")
    for r in results:
        print(r)
    print("\nSummary of first for files in " + config.get_record_dir() + ":\n")
    for r in results_first:
        print(r)
    print("\nSummary of second for files in " + config.get_record_dir() +
          ":\n")
    for r in results_second:
        print(r)
    print("\nSummary of third for files in " + config.get_record_dir() + ":\n")
    for r in results_third:
        print(r)

    for single_result in zip(single_results):
        if not os.path.exists(config.RECORD_BASE_DIR +
                              config.get_record_dir() + config.RESULTS_DIR):
            os.makedirs(config.RECORD_BASE_DIR + config.get_record_dir() +
                        config.RESULTS_DIR)
        # print("\nSummary of for files in " + config.get_record_dir() + ":\n")
        for r_1 in single_result:
            file = open(
                config.RECORD_BASE_DIR + config.get_record_dir() +
                config.RESULTS_DIR + r_1[0].file_name, "w")
            for idx, r in enumerate(r_1):
                file.write(str(idx + 1) + ", " + str(r.accuracy) + "\n")
            file.close()