def show_best_runs_df(best_pr, filters, save_csv=True): hd( "h3", f"Top 50 precisions at min_recall={filters.min_recall} {filters.classifier}", ) if save_csv: csv_filename = f"./report_best_pr__{munch_abbreviation_string(filters)}.csv" best_pr.to_csv(csv_filename, index=False, float_format="%g") print(f"(All {len(best_pr)} rows exported to {csv_filename})") pd.set_option("display.max_columns", None) display(best_pr.head(50))
def plot_flu_info(run, flu, peps_prs_df, min_recall=0.005, classifier=None): """ flu: string fluorosequence e.g. '..1......;0,0,0' peps_prs_df: the DataFrame from a notebook containing precision,recall,score for the peptides you're interested in. classifier: None to use any available preferred classifier, or one of the supported classifiers in RunResult::get_available_classifiers(), e.g. 'rf', 'nn' """ hd("h2", f"flu_info for: {flu}") cb = run.test_call_bag(classifier=classifier) pf2 = cb.peps__pepstrs__flustrs__p2() pf2 = pf2[pf2.flustr == flu] pep_iz = pf2.pep_i.values print(f"Peptides: {pep_iz}") if peps_prs_df.empty: # The peps_prs_df that is passed to us is typically the large # DataFrame with PRS information for ALL peptides across ALL # runs, but if this is a PTM notebook, it is often the case # that PRS info is only loaded for peptides with PTM locations. # This DataFrame has lots of other information, which is what # we actually want to display, otherwise we'd just recompute # the PRS for these peptides here. print("No PR info found for peps. (No PTM peps with this flu?)") else: df_pr = peps_prec_at_min_recall_df(peps_prs_df, min_recall=min_recall) display(df_pr) # See the fn in plots_dev_mhc which calls the lower-level _plot_pr_curve using the # prs info we already have in the df. The following call recomputes the PR all over # again for these peps! plots.plot_pr_breakout( run, pep_iz=pep_iz, _size=500, f_title=f"PR for peptides with flu {flu} ({cb.classifier_name})", )
def print_titles(filters): hd("h1", "Best runs per peptide") hd("h3", "Filters") json_print(filters) print()