def classify_separated_test_directory(): print( "\nDo classification with different training directory and test directory" ) print("\nTraining directory: " + config.get_record_dir()) print("\nTest directory: " + SEPARATED_TEST_RECORD_DIR) file_contents, labels = input_parser.parse_input_files( config.get_record_dir(), combine_sc_vectors=True) test_data, test_labels = input_parser.parse_input_files( SEPARATED_TEST_RECORD_DIR, combine_sc_vectors=True) if len(file_contents) != len(test_data): raise ValueError( "Different number of input files in training directory and test directory - must be equal" ) train_index = list(range(0, len(labels))) test_index = list(range(len(labels), len(labels) + len(test_labels))) # Append test data to training data for idx in range(0, len(file_contents)): training_file_content = file_contents[idx] test_file_content = test_data[idx] training_file_content.records.extend(test_file_content.records) labels.extend(test_labels) X = ft.extract_preconfigured_features(file_contents) Y = pd.Series(labels) Y_test, predictions, accuracy = app_classifier.do_classification( X, Y, train_index, test_index) print(classification_report(Y_test, predictions))
def main(): timing.start_measurement() print("Do combined classification using all input files") file_contents, labels = input_parser.parse_input_files( config.get_record_dir(), combine_sc_vectors=True) X = ft.extract_preconfigured_features(file_contents) Y = pd.Series(labels) app_classifier.do_kfold_cross_validation(X, Y) timing.stop_measurement()
def main(): timing.start_measurement() print("Do combined classification using all input files") file_contents, labels = input_parser.parse_input_files( config.get_record_dir(), combine_sc_vectors=True) X = ft.extract_preconfigured_features(file_contents) Y = pd.Series(labels) _, total_first_acc, total_second_acc, total_third_acc, total_single_accuracies = app_classifier.do_kfold_cross_validation( X, Y) total_acc = [total_first_acc, total_second_acc, total_third_acc] plt.plot(total_acc) plt.show() plt.plot(total_single_accuracies) plt.show() timing.stop_measurement()
def explorative_classification(): file_contents, label_list = input_parser.parse_input_files(config.get_record_dir(), combine_sc_vectors=False) results = [] for idx, fc in enumerate(file_contents): labels = label_list[idx] print("\nEvaluate ", fc.file_name) X = [fc] Y = pd.Series(labels) total_accuracy = app_classifier.do_kfold_cross_validation(X, Y, verbose=False) results.append(ClassificationResult(total_accuracy, fc.file_name)) results.sort(key = lambda classificationResult: classificationResult.accuracy, reverse=True) print("\nSummary for files in " + config.get_record_dir() + ":\n") for r in results: print(r)
def explorative_classification(): file_contents, label_list = input_parser.parse_input_files( config.get_record_dir(), combine_sc_vectors=False) results = [] results_first = [] results_second = [] results_third = [] single_results = [] # print("file content") # for idx, fc in enumerate(file_contents): # print(str(idx) + " " + str(fc.file_name)) # print("labellist") # print (label_list) for idx, fc in enumerate(file_contents): labels = label_list[idx] print("\nEvaluate ", fc.file_name) # print("labels") # print(labels) X = [fc] Y = pd.Series(labels) # print("Y") # print(Y) total_accuracy, total_first_acc, total_second_acc, total_third_acc, total_single_accuracies = app_classifier.do_kfold_cross_validation( X, Y, verbose=True, file_name=fc.file_name[:-4]) results.append( ClassificationResult(round_float(total_accuracy), fc.file_name)) results_first.append( ClassificationResult(round_float(total_first_acc), fc.file_name)) results_second.append( ClassificationResult(round_float(total_second_acc), fc.file_name)) results_third.append( ClassificationResult(round_float(total_third_acc), fc.file_name)) single_results.append([]) for total_single_accuracy in total_single_accuracies: single_results[idx].append( ClassificationResult(round_float(total_single_accuracy), fc.file_name)) results.sort( key=lambda classification_result: classification_result.accuracy, reverse=True) results_first.sort( key=lambda classificationResult: classificationResult.accuracy, reverse=True) results_second.sort( key=lambda classificationResult: classificationResult.accuracy, reverse=True) results_third.sort( key=lambda classificationResult: classificationResult.accuracy, reverse=True) # for single_result in single_results: # single_result.sort(key=lambda classification_result: classification_result.accuracy, reverse=True) print("\nSummary for files in " + config.get_record_dir() + ":\n") for r in results: print(r) print("\nSummary of first for files in " + config.get_record_dir() + ":\n") for r in results_first: print(r) print("\nSummary of second for files in " + config.get_record_dir() + ":\n") for r in results_second: print(r) print("\nSummary of third for files in " + config.get_record_dir() + ":\n") for r in results_third: print(r) for single_result in zip(single_results): if not os.path.exists(config.RECORD_BASE_DIR + config.get_record_dir() + config.RESULTS_DIR): os.makedirs(config.RECORD_BASE_DIR + config.get_record_dir() + config.RESULTS_DIR) # print("\nSummary of for files in " + config.get_record_dir() + ":\n") for r_1 in single_result: file = open( config.RECORD_BASE_DIR + config.get_record_dir() + config.RESULTS_DIR + r_1[0].file_name, "w") for idx, r in enumerate(r_1): file.write(str(idx + 1) + ", " + str(r.accuracy) + "\n") file.close()