def measure_random_performance(): outfile = get_out_file("random_classifier") write_result(outfile, "smell,auc,precision,recall,f1,average_precision\n") for smell in smell_list: data_path = os.path.join(os.path.join(TOKENIZER_OUT_PATH, smell), DIM) input_data = get_all_data(data_path, smell) y_pred = np.random.randint(2, size=len(input_data.eval_labels)) auc, precision, recall, f1, average_precision, fpr, tpr = \ metrics_util.get_all_metrics_(input_data.eval_labels, y_pred) write_result( outfile, smell + "," + str(auc) + "," + str(precision) + "," + str(recall) + "," + str(f1) + "," + str(average_precision) + "\n")
def measure_performance_dummy_classifier(): outfile = get_out_file("dummy_classifier") write_result(outfile, "smell,auc,precision,recall,f1,average_precision\n") for smell in smell_list: data_path = os.path.join(os.path.join(TOKENIZER_OUT_PATH, smell), DIM) input_data = get_all_data(data_path, smell) # clf = DummyClassifier(strategy='stratified', random_state=0) clf = DummyClassifier(strategy='most_frequent', random_state=0) inverted_train_labels = inputs.invert_labels(input_data.train_labels) clf.fit(input_data.train_data, inverted_train_labels) y_pred = clf.predict(input_data.eval_data) auc, precision, recall, f1, average_precision, fpr, tpr = \ metrics_util.get_all_metrics_(input_data.eval_labels, y_pred) write_result( outfile, smell + "," + str(auc) + "," + str(precision) + "," + str(recall) + "," + str(f1) + "," + str(average_precision) + "\n")