示例#1
0
def show_best_runs_df(best_pr, filters, save_csv=True):
    hd(
        "h3",
        f"Top 50 precisions at min_recall={filters.min_recall} {filters.classifier}",
    )
    if save_csv:
        csv_filename = f"./report_best_pr__{munch_abbreviation_string(filters)}.csv"
        best_pr.to_csv(csv_filename, index=False, float_format="%g")
        print(f"(All {len(best_pr)} rows exported to {csv_filename})")
    pd.set_option("display.max_columns", None)
    display(best_pr.head(50))
示例#2
0
def plot_flu_info(run, flu, peps_prs_df, min_recall=0.005, classifier=None):
    """

    flu: string fluorosequence e.g. '..1......;0,0,0'

    peps_prs_df: the DataFrame from a notebook containing precision,recall,score
                 for the peptides you're interested in.

    classifier: None to use any available preferred classifier, or one of the
                supported classifiers in RunResult::get_available_classifiers(),
                e.g. 'rf', 'nn'
    """
    hd("h2", f"flu_info for: {flu}")

    cb = run.test_call_bag(classifier=classifier)
    pf2 = cb.peps__pepstrs__flustrs__p2()
    pf2 = pf2[pf2.flustr == flu]
    pep_iz = pf2.pep_i.values
    print(f"Peptides: {pep_iz}")

    if peps_prs_df.empty:
        # The peps_prs_df that is passed to us is typically the large
        # DataFrame with PRS information for ALL peptides across ALL
        # runs, but if this is a PTM notebook, it is often the case
        # that PRS info is only loaded for peptides with PTM locations.
        # This DataFrame has lots of other information, which is what
        # we actually want to display, otherwise we'd just recompute
        # the PRS for these peptides here.
        print("No PR info found for peps. (No PTM peps with this flu?)")
    else:
        df_pr = peps_prec_at_min_recall_df(peps_prs_df, min_recall=min_recall)
        display(df_pr)

    # See the fn in plots_dev_mhc which calls the lower-level _plot_pr_curve using the
    # prs info we already have in the df.  The following call recomputes the PR all over
    # again for these peps!
    plots.plot_pr_breakout(
        run,
        pep_iz=pep_iz,
        _size=500,
        f_title=f"PR for peptides with flu {flu} ({cb.classifier_name})",
    )
示例#3
0
def print_titles(filters):
    hd("h1", "Best runs per peptide")
    hd("h3", "Filters")
    json_print(filters)
    print()