def display_min_max_avg_warnings_per_bug_total(): print("\nMin, Max, Avg (warnings per bug) and Total number of warnings") print("\nBuggy versions:\n") rel_path = './b/' ep_all = load_parsed_ep(rel_path + 'ep_parsed.json') inf_all = load_parsed_inf(rel_path + 'inf_parsed.json') sb_all = load_parsed_sb(rel_path + 'sb_parsed.json') print("Tool Min. Max. Avg. Total") print("Errorprone", get_min_max_avg_warnings_per_bug_total(ep_all)) print("Infer", get_min_max_avg_warnings_per_bug_total(inf_all)) print("Spotbugs", get_min_max_avg_warnings_per_bug_total(sb_all)) print( "\nTotal number of warnings by all tools:", get_min_max_avg_warnings_per_bug_total(ep_all)[3] + get_min_max_avg_warnings_per_bug_total(inf_all)[3] + get_min_max_avg_warnings_per_bug_total(sb_all)[3]) '''''' print("\nFixed versions:\n") rel_path = './f/' ep_all = load_parsed_ep(rel_path + 'ep_parsed.json') inf_all = load_parsed_inf(rel_path + 'inf_parsed.json') sb_all = load_parsed_sb(rel_path + 'sb_parsed.json') print("Tool Total Min. Max. Avg.") print("Errorprone", get_min_max_avg_warnings_per_bug_total(ep_all)) print("Infer", get_min_max_avg_warnings_per_bug_total(inf_all)) print("Spotbugs", get_min_max_avg_warnings_per_bug_total(sb_all)) print( "\nTotal number of warnings by all tools:", get_min_max_avg_warnings_per_bug_total(ep_all)[3] + get_min_max_avg_warnings_per_bug_total(inf_all)[3] + get_min_max_avg_warnings_per_bug_total(sb_all)[3])
def get_true_detected_bugs_by_each_tool(): rel_path = './diffs_warnings/' ep_res1 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ1 = load_parsed_ep(rel_path + "ep_succ.json") ep_part1 = load_parsed_ep(rel_path + "ep_part.json") inf_res1 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ1 = load_parsed_inf(rel_path + "inf_succ.json") inf_part1 = load_parsed_inf(rel_path + "inf_part.json") sb_res1 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ1 = load_parsed_sb(rel_path + "sb_succ.json") sb_part1 = load_parsed_sb(rel_path + "sb_part.json") rel_path = './removed_warnings/' ep_res2 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ2 = load_parsed_ep(rel_path + "ep_succ.json") ep_part2 = load_parsed_ep(rel_path + "ep_part.json") inf_res2 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ2 = load_parsed_inf(rel_path + "inf_succ.json") inf_part2 = load_parsed_inf(rel_path + "inf_part.json") sb_res2 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ2 = load_parsed_sb(rel_path + "sb_succ.json") sb_part2 = load_parsed_sb(rel_path + "sb_part.json") print("\nTrue bugs found by each tool\n") ep_succ = get_bugs_from_warnings(ep_succ1) | get_bugs_from_warnings( ep_succ2) | get_bugs_from_warnings(ep_part1) | get_bugs_from_warnings( ep_part2) print("Ep:", len(ep_succ)) with open(os.path.join(os.getcwd(), "ep_detected"), 'w') as f: f.write("\n".join(i for i in ep_succ)) inf_succ = get_bugs_from_warnings(inf_succ1) | get_bugs_from_warnings( inf_succ2) | get_bugs_from_warnings( inf_part1) | get_bugs_from_warnings(inf_part2) print("Inf:", len(inf_succ)) with open(os.path.join(os.getcwd(), "inf_detected"), 'w') as f: f.write("\n".join(i for i in inf_succ)) sb_succ = get_bugs_from_warnings(sb_succ1) | get_bugs_from_warnings( sb_succ2) | get_bugs_from_warnings(sb_part1) | get_bugs_from_warnings( sb_part2) print("Sb:", len(sb_succ)) with open(os.path.join(os.getcwd(), "sb_detected"), 'w') as f: f.write("\n".join(i for i in sb_succ)) print()
def get_warnings_bugs_from_each_approach(): print("\nWarnings and bugs from each automatic matching approach") print( "** warnings for combined approach are not unique (duplicates exist) **\n" ) rel_path = './diffs_warnings/' ep_res1 = load_parsed_ep(rel_path + "ep_warnings.json") inf_res1 = load_parsed_inf(rel_path + "inf_warnings.json") sb_res1 = load_parsed_sb(rel_path + "sb_warnings.json") rel_path = './removed_warnings/' ep_res2 = load_parsed_ep(rel_path + "ep_warnings.json") inf_res2 = load_parsed_inf(rel_path + "inf_warnings.json") sb_res2 = load_parsed_sb(rel_path + "sb_warnings.json") _all_b = [] print("Tool Diff-based Fixed-based Combined") print(" W B W B W B") all_b = [] b_diff = get_bugs_from_warnings(ep_res1) b_fixed = get_bugs_from_warnings(ep_res2) all_b.extend(b_diff) all_b.extend(b_fixed) _all_b.extend(all_b) print("Error Prone ", len(ep_res1), len(b_diff), len(ep_res2), len(b_fixed), len(ep_res1) + len(ep_res2), len(b_diff | b_fixed)) all_b = [] b_diff = get_bugs_from_warnings(inf_res1) b_fixed = get_bugs_from_warnings(inf_res2) all_b.extend(b_diff) all_b.extend(b_fixed) _all_b.extend(all_b) print("Infer ", len(inf_res1), len(b_diff), len(inf_res2), len(b_fixed), len(inf_res1) + len(inf_res2), len(b_diff | b_fixed)) all_b = [] b_diff = get_bugs_from_warnings(sb_res1) b_fixed = get_bugs_from_warnings(sb_res2) all_b.extend(b_diff) all_b.extend(b_fixed) _all_b.extend(all_b) print("SpotBugs ", len(sb_res1), len(b_diff), len(sb_res2), len(b_fixed), len(sb_res1) + len(sb_res2), len(b_diff | b_fixed)) print("\nUnique warnings from each approachcombined approach:\n") rel_path = './diffs_warnings/' ep_res1 = load_json_list(rel_path + "ep_warnings.json") inf_res1 = load_json_list(rel_path + "inf_warnings.json") sb_res1 = load_json_list(rel_path + "sb_warnings.json") rel_path = './removed_warnings/' ep_res2 = load_json_list(rel_path + "ep_warnings.json") inf_res2 = load_json_list(rel_path + "inf_warnings.json") sb_res2 = load_json_list(rel_path + "sb_warnings.json") print("Ep ", len(ep_res1), len(ep_res2), len(get_list_of_uniq_jsons(ep_res1 + ep_res2))) print("Inf", len(inf_res1), len(inf_res2), len(get_list_of_uniq_jsons(inf_res1 + inf_res2))) print("Sb ", len(sb_res1), len(sb_res2), len(get_list_of_uniq_jsons(sb_res1 + sb_res2))) print("\nUnique bugs from combined approach: ", len(set(_all_b)))
def get_cand_detected_bugs_tools_table(): print("\nAll candidate and detected bugs by each tool and each approach\n") rel_path = './diffs_warnings/' ep_res1 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ1 = load_parsed_ep(rel_path + "ep_succ.json") ep_part1 = load_parsed_ep(rel_path + "ep_part.json") ep_fail1 = load_parsed_ep(rel_path + "ep_fail.json") inf_res1 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ1 = load_parsed_inf(rel_path + "inf_succ.json") inf_part1 = load_parsed_inf(rel_path + "inf_part.json") inf_fail1 = load_parsed_inf(rel_path + "inf_fail.json") sb_res1 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ1 = load_parsed_sb(rel_path + "sb_succ.json") sb_part1 = load_parsed_sb(rel_path + "sb_part.json") sb_fail1 = load_parsed_sb(rel_path + "sb_fail.json") rel_path = './removed_warnings/' ep_res2 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ2 = load_parsed_ep(rel_path + "ep_succ.json") ep_part2 = load_parsed_ep(rel_path + "ep_part.json") ep_fail2 = load_parsed_ep(rel_path + "ep_fail.json") inf_res2 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ2 = load_parsed_inf(rel_path + "inf_succ.json") inf_part2 = load_parsed_inf(rel_path + "inf_part.json") inf_fail2 = load_parsed_inf(rel_path + "inf_fail.json") sb_res2 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ2 = load_parsed_sb(rel_path + "sb_succ.json") sb_part2 = load_parsed_sb(rel_path + "sb_part.json") sb_fail2 = load_parsed_sb(rel_path + "sb_fail.json") bugs = [] bugs.extend(w.proj for w in ep_res1) bugs.extend(w.proj for w in inf_res1) bugs.extend(w.proj for w in sb_res1) bugs.extend(w.proj for w in ep_res2) bugs.extend(w.proj for w in inf_res2) bugs.extend(w.proj for w in sb_res2) bugs = sorted(list(set(bugs))) print(" Removed Warnings Diffs-based Combined") print("Tool Ep Inf SB Ep Inf SB Ep Inf SB") for b in bugs: entry = b + " " ##################################### if b in get_bugs_from_warnings(ep_succ1): entry += "& F " elif b in get_bugs_from_warnings(ep_part1): entry += "& P " elif b in get_bugs_from_warnings(ep_fail1): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings(inf_succ1): entry += "& F " elif b in get_bugs_from_warnings(inf_part1): entry += "& P " elif b in get_bugs_from_warnings(inf_fail1): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings(sb_succ1): entry += "& F " elif b in get_bugs_from_warnings(sb_part1): entry += "& P " elif b in get_bugs_from_warnings(sb_fail1): entry += "& M " else: entry += "& - " ##################################### if b in get_bugs_from_warnings(ep_succ2): entry += "& F " elif b in get_bugs_from_warnings(ep_part2): entry += "& P " elif b in get_bugs_from_warnings(ep_fail2): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings(inf_succ2): entry += "& F " elif b in get_bugs_from_warnings(inf_part2): entry += "& P " elif b in get_bugs_from_warnings(inf_fail2): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings(sb_succ2): entry += "& F " elif b in get_bugs_from_warnings(sb_part2): entry += "& P " elif b in get_bugs_from_warnings(sb_fail2): entry += "& M " else: entry += "& - " ##################################### if b in get_bugs_from_warnings( ep_succ1) or b in get_bugs_from_warnings(ep_succ2): entry += "& F " elif b in get_bugs_from_warnings( ep_part1) or b in get_bugs_from_warnings(ep_part2): entry += "& P " elif b in get_bugs_from_warnings( ep_fail1) or b in get_bugs_from_warnings(ep_fail2): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings( inf_succ1) or b in get_bugs_from_warnings(inf_succ2): entry += "& F " elif b in get_bugs_from_warnings( inf_part1) or b in get_bugs_from_warnings(inf_part2): entry += "& P " elif b in get_bugs_from_warnings( inf_fail1) or b in get_bugs_from_warnings(inf_fail2): entry += "& M " else: entry += "& - " if b in get_bugs_from_warnings( sb_succ1) or b in get_bugs_from_warnings(sb_succ2): entry += "& F " elif b in get_bugs_from_warnings( sb_part1) or b in get_bugs_from_warnings(sb_part2): entry += "& P " elif b in get_bugs_from_warnings( sb_fail1) or b in get_bugs_from_warnings(sb_fail2): entry += "& M " else: entry += "& - " entry += "\\\\" print(entry) print()
def get_cand_detected_bugs_tools_sets(): print("\nCandidate and detected bugs by each tool and each approach") rel_path = './diffs_warnings/' ep_res1 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ1 = load_parsed_ep(rel_path + "ep_succ.json") ep_part1 = load_parsed_ep(rel_path + "ep_part.json") inf_res1 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ1 = load_parsed_inf(rel_path + "inf_succ.json") inf_part1 = load_parsed_inf(rel_path + "inf_part.json") sb_res1 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ1 = load_parsed_sb(rel_path + "sb_succ.json") sb_part1 = load_parsed_sb(rel_path + "sb_part.json") rel_path = './removed_warnings/' ep_res2 = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ2 = load_parsed_ep(rel_path + "ep_succ.json") ep_part2 = load_parsed_ep(rel_path + "ep_part.json") inf_res2 = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ2 = load_parsed_inf(rel_path + "inf_succ.json") inf_part2 = load_parsed_inf(rel_path + "inf_part.json") sb_res2 = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ2 = load_parsed_sb(rel_path + "sb_succ.json") sb_part2 = load_parsed_sb(rel_path + "sb_part.json") print("\nCandidate bugs:\n") print("Tool Diff-based Fixed-based Both") ep_cand_diff = get_bugs_from_warnings(ep_res1) ep_cand_fixed = get_bugs_from_warnings(ep_res2) print('"Error Prone"', len(ep_cand_diff), len(ep_cand_fixed), len(ep_cand_diff & ep_cand_fixed)) inf_cand_diff = get_bugs_from_warnings(inf_res1) inf_cand_fixed = get_bugs_from_warnings(inf_res2) print("Infer", len(inf_cand_diff), len(inf_cand_fixed), len(inf_cand_diff & inf_cand_fixed)) sb_cand_diff = get_bugs_from_warnings(sb_res1) sb_cand_fixed = get_bugs_from_warnings(sb_res2) print("Spotbugs", len(sb_cand_diff), len(sb_cand_fixed), len(sb_cand_diff & sb_cand_fixed)) print("\nTrue bugs (fully or partially flagged)\n") print("Tool Diff-based Fixed-based Both") ep_succ_diff = get_bugs_from_warnings(ep_succ1) | get_bugs_from_warnings( ep_part1) ep_succ_fixed = get_bugs_from_warnings(ep_succ2) | get_bugs_from_warnings( ep_part2) print('"Error Prone"', len(ep_succ_diff), len(ep_succ_fixed), len(ep_succ_diff & ep_succ_fixed)) inf_succ_diff = get_bugs_from_warnings(inf_succ1) | get_bugs_from_warnings( inf_part1) inf_succ_fixed = get_bugs_from_warnings( inf_succ2) | get_bugs_from_warnings(inf_part2) print("Infer", len(inf_succ_diff), len(inf_succ_fixed), len(inf_succ_diff & inf_succ_fixed)) sb_succ_diff = get_bugs_from_warnings(sb_succ1) | get_bugs_from_warnings( sb_part1) sb_succ_fixed = get_bugs_from_warnings(sb_succ2) | get_bugs_from_warnings( sb_part2) print("Spotbugs", len(sb_succ_diff), len(sb_succ_fixed), len(sb_succ_diff & sb_succ_fixed)) print("\nTrue bugs found by all tools\n") ep_succ = get_bugs_from_warnings(ep_succ1) | get_bugs_from_warnings( ep_succ2) | get_bugs_from_warnings(ep_part1) | get_bugs_from_warnings( ep_part2) print("Ep:", len(ep_succ)) inf_succ = get_bugs_from_warnings(inf_succ1) | get_bugs_from_warnings( inf_succ2) | get_bugs_from_warnings( inf_part1) | get_bugs_from_warnings(inf_part2) print("Inf:", len(inf_succ)) sb_succ = get_bugs_from_warnings(sb_succ1) | get_bugs_from_warnings( sb_succ2) | get_bugs_from_warnings(sb_part1) | get_bugs_from_warnings( sb_part2) print("Sb:", len(sb_succ)) print("Ep & Inf:", len(ep_succ & inf_succ)) print("Ep & Sb:", len(ep_succ & sb_succ)) print("Inf & Sb:", len(inf_succ & sb_succ)) print("Ep & Inf & Sb:", len(ep_succ & inf_succ & sb_succ))
def get_manually_inspected_warnings_bugs(): print( "\nManual inspection of warnings aggregated on warnings and bugs levels" ) print("\nDiffs-based approach:\n") rel_path = './diffs_warnings/' ep_res = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ = load_parsed_ep(rel_path + "ep_succ.json") ep_part = load_parsed_ep(rel_path + "ep_part.json") ep_fail = load_parsed_ep(rel_path + "ep_fail.json") inf_res = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ = load_parsed_inf(rel_path + "inf_succ.json") inf_part = load_parsed_inf(rel_path + "inf_part.json") inf_fail = load_parsed_inf(rel_path + "inf_fail.json") sb_res = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ = load_parsed_sb(rel_path + "sb_succ.json") sb_part = load_parsed_sb(rel_path + "sb_part.json") sb_fail = load_parsed_sb(rel_path + "sb_fail.json") print("Warnings:\n") print('Tool "Full match" "Partial match" Mismatch Total') print('"Error Prone"', len(ep_succ), len(ep_part), len(ep_fail), len(ep_res)) print("Infer", len(inf_succ), len(inf_part), len(inf_fail), len(inf_res)) print("Spotbugs", len(sb_succ), len(sb_part), len(sb_fail), len(sb_res)) print("\nBugs:\n") print('Tool "Full match" "Partial match" Mismatch Total') print('"Error Prone"', count_bugs_from_warnings(ep_succ), count_bugs_from_warnings(ep_part), count_bugs_from_warnings(ep_fail), count_bugs_from_warnings(ep_res)) print("Infer", count_bugs_from_warnings(inf_succ), count_bugs_from_warnings(inf_part), count_bugs_from_warnings(inf_fail), count_bugs_from_warnings(inf_res)) print("Spotbugs", count_bugs_from_warnings(sb_succ), count_bugs_from_warnings(sb_part), count_bugs_from_warnings(sb_fail), count_bugs_from_warnings(sb_res)) print("\nFixed warnings approach\n") rel_path = './removed_warnings/' ep_res = load_parsed_ep(rel_path + "ep_warnings.json") ep_succ = load_parsed_ep(rel_path + "ep_succ.json") ep_part = load_parsed_ep(rel_path + "ep_part.json") ep_fail = load_parsed_ep(rel_path + "ep_fail.json") inf_res = load_parsed_inf(rel_path + "inf_warnings.json") inf_succ = load_parsed_inf(rel_path + "inf_succ.json") inf_part = load_parsed_inf(rel_path + "inf_part.json") inf_fail = load_parsed_inf(rel_path + "inf_fail.json") sb_res = load_parsed_sb(rel_path + "sb_warnings.json") sb_succ = load_parsed_sb(rel_path + "sb_succ.json") sb_part = load_parsed_sb(rel_path + "sb_part.json") sb_fail = load_parsed_sb(rel_path + "sb_fail.json") print("Warnings:\n") print('Tool "Full match" "Partial match" Mismatch Total') print('"Error Prone"', len(ep_succ), len(ep_part), len(ep_fail), len(ep_res)) print("Infer", len(inf_succ), len(inf_part), len(inf_fail), len(inf_res)) print("Spotbugs", len(sb_succ), len(sb_part), len(sb_fail), len(sb_res)) print("\nBugs:\n") print('Tool "Full match" "Partial match" Mismatch Total') print('"Error Prone"', count_bugs_from_warnings(ep_succ), count_bugs_from_warnings(ep_part), count_bugs_from_warnings(ep_fail), count_bugs_from_warnings(ep_res)) print("Infer", count_bugs_from_warnings(inf_succ), count_bugs_from_warnings(inf_part), count_bugs_from_warnings(inf_fail), count_bugs_from_warnings(inf_res)) print("Spotbugs", count_bugs_from_warnings(sb_succ), count_bugs_from_warnings(sb_part), count_bugs_from_warnings(sb_fail), count_bugs_from_warnings(sb_res)) get_manually_inspected_warnings_bugs_combined_approach()
cls = d.cls ep_list = find_msg_by_proj_and_cls(proj, cls, ep_res_set) diff_ep, lines = match_diff_ep(d, ep_list) if diff_ep: ep_count += len(diff_ep) ep_all_matches.append(LineMatchesToMessages(lines, diff_ep)) diffs_match_ep.extend(diff_ep) # print(ep_count) # return ep_all_matches return diffs_match_ep if __name__ == '__main__': """Get lines matches between each tool and bug fixes diffs""" diffs_file = os.path.join(os.getcwd(), sys.argv[1]) diffs = load_parsed_diffs(diffs_file) ep_file = os.path.join(os.getcwd(), sys.argv[2]) ep_res_set = load_parsed_ep(ep_file) diffs_ep = get_hits_diffs_ep(diffs, ep_res_set) output_file_name = "ep_diffs_warnings.json" with open(output_file_name, "w") as file: json.dump(diffs_ep, file, cls=CustomEncoder, indent=4)
print("\nMin and Max of LoC per bug", min(locs), max(locs)) print("\nSum of all LoC of all bugs", sum(locs)) print("\nBins of Diffs\n") hist, edges = numpy.histogram(diffs, [1, 5, 10, 15, 20, 25, 50, 75, 100, 200, 2000]) for i in range(len(hist)): print(edges[i], edges[i+1]-1, hist[i]) print("\nMin and Max of diff per bug", min(diffs), max(diffs)) ''' D4J stats per bug per tool ''' print("\nStats per bug\n") ep_all_b = load_parsed_ep('./b/ep_parsed.json') ep_b = how_many_warnings_per_bug(ep_all_b) ep_all_f = load_parsed_ep('./f/ep_parsed.json') ep_f = how_many_warnings_per_bug(ep_all_f) inf_all_b = load_parsed_inf('./b/inf_parsed.json') inf_b = how_many_warnings_per_bug(inf_all_b) inf_all_f = load_parsed_inf('./f/inf_parsed.json') inf_f = how_many_warnings_per_bug(inf_all_f) sb_all_b = load_parsed_sb('./b/sb_parsed.json') sb_b = how_many_warnings_per_bug(sb_all_b) sb_all_f = load_parsed_sb('./f/sb_parsed.json') sb_f = how_many_warnings_per_bug(sb_all_f) print("Bug Files LoC Diff Ep Inf Sb (from buggy versions)")
and msg.msg == msg2.msg and msg.code == msg2.code): return True return False def get_removed_warnings_ep(ep_b, ep_f): removed_warnings = [] for b_msg in ep_b: if not match_ep_msg_no_lines(b_msg, ep_f): removed_warnings.append(b_msg) return removed_warnings if __name__ == '__main__': """Get errors/warnings that disappeared in fixed versions""" ep_file = os.path.join(os.getcwd(), sys.argv[1]) ep_res_b = load_parsed_ep(ep_file) ep_file = os.path.join(os.getcwd(), sys.argv[2]) ep_res_f = load_parsed_ep(ep_file) warnings = get_removed_warnings_ep(ep_res_b, ep_res_f) output_file_name = "ep_removed_warnings.json" with open(output_file_name, "w") as file: json.dump(warnings, file, cls=CustomEncoder, indent=4)