def plot_pr_curve_mmr(Metric, fin_list_graphs, out_comp_nm): n_divs = 10 scores_list = [float(pred_complex[1]) for pred_complex in fin_list_graphs] #print(scores_list) min_score = min(scores_list) interval_len = (max(scores_list) - min_score) / float(n_divs) thresholds = [min_score + i * interval_len for i in range(n_divs)] precs = [] recalls = [] for thres in thresholds: # list of indices with scores greater than the threshold col_inds = [j for j, score in enumerate(scores_list) if score >= thres] prec_MMR, recall_MMR, f1_MMR, max_matching_edges = f1_mmr( Metric[:, col_inds]) precs.append(prec_MMR) recalls.append(recall_MMR) fig = plt_figure() plt_plot(recalls, precs, '.-') plt_ylabel("Precision") plt_xlabel("Recall") plt_title("PR curve for MMR measure") plt_savefig(out_comp_nm + '_pr_mmr.png') plt_close(fig)
def plot_f1_scores(best_matches, out_comp_nm, suffix, prefix): # plot histogram of F1 scores max_f1_scores = [match[2] for match in best_matches] avged_f1_score = sum(max_f1_scores) / float(len(max_f1_scores)) f1_score_counts = Counter() for score in max_f1_scores: f1_score_counts[score] += 1 n_perfect_matches = 0 if 1 in f1_score_counts: n_perfect_matches = f1_score_counts[1] n_no_matches = 0 if 0 in f1_score_counts: n_no_matches = f1_score_counts[0] if len(set(max_f1_scores)) > 1: fig = plt_figure(figsize=(12, 10)) plt_rcParams["font.family"] = "Times New Roman" plt_rcParams["font.size"] = 16 sns_distplot(max_f1_scores, hist=True, kde=False) plt_xlabel("F1 score") plt_ylabel('Frequency') plt_title(prefix + "F1 score distribution") plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.eps', dpi=350, format='eps') plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.tiff', dpi=350, format='tiff') plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.jpg', dpi=350, format='jpg') plt_close(fig) with open(out_comp_nm + '_metrics.out', "a") as fid: print(prefix, file=fid) print("Averaged F1 score = %.3f" % avged_f1_score, file=fid) print("No. of perfectly recalled matches = %d" % n_perfect_matches, file=fid) print("No. of matches not recalled at all = %d" % n_no_matches, file=fid) return avged_f1_score
def plot_size_dists(known_complex_nodes_list, fin_list_graphs, sizes_orig, out_comp_nm): sizes_known = [len(comp) for comp in known_complex_nodes_list] # Size distributions sizes_new = [len(comp[0]) for comp in fin_list_graphs] fig = plt_figure(figsize=(8, 6), dpi=96) plt_rc('font', size=14) if len(set(sizes_known)) <= 1: return sns_distplot(sizes_known, hist=False, label="known") if len(set(sizes_orig)) <= 1: return sns_distplot(sizes_orig, hist=False, label="predicted") if len(set(sizes_new)) <= 1: return sns_distplot(sizes_new, hist=False, label="predicted_known_prots") plt_ylabel("Probability density") plt_xlabel("Complex Size (number of proteins)") plt_title("Complex size distributions") plt_savefig(out_comp_nm + '_size_dists_known_pred.png') plt_close(fig)
df = pd.DataFrame(all_sets) df = df.set_index(par) df = df.sort_index() df.to_csv('./' + args.direct + '/' + par + o_f + '_metrics.csv') if args.direct == 'humap': df = df.drop('2stage clustering') df[list(df.columns)] = df[list(df.columns)].astype(float) print(df.idxmax()) #print(df) fig = plt_figure() df.plot(subplots=True, style='-.', layout=(7, 2), figsize=(15, 15)) plt_savefig('./' + args.direct + '/' + par + o_f + '_metrics.png') plt_close(fig) # Plot PR curves for MMR and Qi et al based metrics lists = sorted(zip(*[recalls_MMR, precisions_MMR])) recalls_MMR, precisions_MMR = list(zip(*lists)) lists = sorted(zip(*[recalls_qi, precisions_qi])) recalls_qi, precisions_qi = list(zip(*lists)) fig = plt_figure() plt_plot(recalls_MMR, precisions_MMR, 'k.-') plt_plot(recalls_qi, precisions_qi, 'b.-') plt_xlabel('Recall') plt_ylabel('Precision') plt_title('PR curve over ' + par) plt_legend(['MMR', 'Qi']) plt_savefig('./' + args.direct + '/pr_' + par + o_f + '_MMR_qi.png') plt_close(fig)