Пример #1
0
def plot_pr_curve_mmr(Metric, fin_list_graphs, out_comp_nm):

    n_divs = 10
    scores_list = [float(pred_complex[1]) for pred_complex in fin_list_graphs]
    #print(scores_list)
    min_score = min(scores_list)
    interval_len = (max(scores_list) - min_score) / float(n_divs)
    thresholds = [min_score + i * interval_len for i in range(n_divs)]

    precs = []
    recalls = []
    for thres in thresholds:
        # list of indices with scores greater than the threshold
        col_inds = [j for j, score in enumerate(scores_list) if score >= thres]
        prec_MMR, recall_MMR, f1_MMR, max_matching_edges = f1_mmr(
            Metric[:, col_inds])

        precs.append(prec_MMR)
        recalls.append(recall_MMR)

    fig = plt_figure()
    plt_plot(recalls, precs, '.-')
    plt_ylabel("Precision")
    plt_xlabel("Recall")
    plt_title("PR curve for MMR measure")
    plt_savefig(out_comp_nm + '_pr_mmr.png')
    plt_close(fig)
Пример #2
0
def plot_f1_scores(best_matches, out_comp_nm, suffix, prefix):
    # plot histogram of F1 scores
    max_f1_scores = [match[2] for match in best_matches]

    avged_f1_score = sum(max_f1_scores) / float(len(max_f1_scores))

    f1_score_counts = Counter()

    for score in max_f1_scores:
        f1_score_counts[score] += 1

    n_perfect_matches = 0
    if 1 in f1_score_counts:
        n_perfect_matches = f1_score_counts[1]

    n_no_matches = 0
    if 0 in f1_score_counts:
        n_no_matches = f1_score_counts[0]

    if len(set(max_f1_scores)) > 1:
        fig = plt_figure(figsize=(12, 10))
        plt_rcParams["font.family"] = "Times New Roman"
        plt_rcParams["font.size"] = 16
        sns_distplot(max_f1_scores, hist=True, kde=False)
        plt_xlabel("F1 score")
        plt_ylabel('Frequency')
        plt_title(prefix + "F1 score distribution")
        plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.eps',
                    dpi=350,
                    format='eps')
        plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.tiff',
                    dpi=350,
                    format='tiff')
        plt_savefig(out_comp_nm + suffix + '_f1_scores_histogram.jpg',
                    dpi=350,
                    format='jpg')

        plt_close(fig)

    with open(out_comp_nm + '_metrics.out', "a") as fid:
        print(prefix, file=fid)
        print("Averaged F1 score = %.3f" % avged_f1_score, file=fid)
        print("No. of perfectly recalled matches = %d" % n_perfect_matches,
              file=fid)
        print("No. of matches not recalled at all = %d" % n_no_matches,
              file=fid)
    return avged_f1_score
Пример #3
0
def plot_size_dists(known_complex_nodes_list, fin_list_graphs, sizes_orig,
                    out_comp_nm):
    sizes_known = [len(comp) for comp in known_complex_nodes_list]
    # Size distributions
    sizes_new = [len(comp[0]) for comp in fin_list_graphs]
    fig = plt_figure(figsize=(8, 6), dpi=96)
    plt_rc('font', size=14)

    if len(set(sizes_known)) <= 1:
        return
    sns_distplot(sizes_known, hist=False, label="known")
    if len(set(sizes_orig)) <= 1:
        return
    sns_distplot(sizes_orig, hist=False, label="predicted")
    if len(set(sizes_new)) <= 1:
        return
    sns_distplot(sizes_new, hist=False, label="predicted_known_prots")
    plt_ylabel("Probability density")
    plt_xlabel("Complex Size (number of proteins)")
    plt_title("Complex size distributions")
    plt_savefig(out_comp_nm + '_size_dists_known_pred.png')
    plt_close(fig)
df = pd.DataFrame(all_sets)
df = df.set_index(par)
df = df.sort_index()
df.to_csv('./' + args.direct + '/' + par + o_f + '_metrics.csv')

if args.direct == 'humap':
    df = df.drop('2stage clustering')
df[list(df.columns)] = df[list(df.columns)].astype(float)
print(df.idxmax())
#print(df)
fig = plt_figure()
df.plot(subplots=True, style='-.', layout=(7, 2), figsize=(15, 15))
plt_savefig('./' + args.direct + '/' + par + o_f + '_metrics.png')
plt_close(fig)

# Plot PR curves for MMR and Qi et al based metrics
lists = sorted(zip(*[recalls_MMR, precisions_MMR]))
recalls_MMR, precisions_MMR = list(zip(*lists))

lists = sorted(zip(*[recalls_qi, precisions_qi]))
recalls_qi, precisions_qi = list(zip(*lists))
fig = plt_figure()
plt_plot(recalls_MMR, precisions_MMR, 'k.-')
plt_plot(recalls_qi, precisions_qi, 'b.-')
plt_xlabel('Recall')
plt_ylabel('Precision')
plt_title('PR curve over ' + par)
plt_legend(['MMR', 'Qi'])
plt_savefig('./' + args.direct + '/pr_' + par + o_f + '_MMR_qi.png')
plt_close(fig)