def apply_classifier(self, final_classifier, experiment, all_test_target_scores, all_test_decoy_scores, table): lambda_ = CONFIG.get("final_statistics.lambda") mu, nu, final_score = self.calculate_params_for_d_score( final_classifier, experiment) experiment["d_score"] = (final_score - mu) / nu all_tt_scores = experiment.get_top_target_peaks()["d_score"] df_raw_stat = calculate_final_statistics(all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_) scored_table = self.enrich_table_with_results(table, experiment, df_raw_stat) final_statistics = final_err_table(df_raw_stat) summary_statistics = summary_err_table(df_raw_stat) needed_to_persist = (final_classifier, mu, nu, df_raw_stat.loc[:, ["svalue", "qvalue", "cutoff"]]) return (summary_statistics, final_statistics, scored_table), needed_to_persist
def apply_classifier(self, final_classifier, experiment, all_test_target_scores, all_test_decoy_scores, table, p_score=False): lambda_ = CONFIG.get("final_statistics.lambda") mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment) experiment["d_score"] = (final_score - mu) / nu if (CONFIG.get("final_statistics.fdr_all_pg")): all_tt_scores = experiment.get_target_peaks()["d_score"] else: all_tt_scores = experiment.get_top_target_peaks()["d_score"] df_raw_stat, num_null, num_total = calculate_final_statistics(all_tt_scores, all_test_target_scores, all_test_decoy_scores, lambda_) scored_table = self.enrich_table_with_results(table, experiment, df_raw_stat) if CONFIG.get("compute.probabilities"): logging.info( "" ) logging.info( "Posterior Probability estimation:" ) logging.info( "Estimated number of null %0.2f out of a total of %s. " % (num_null, num_total) ) # Note that num_null and num_total are the sum of the # cross-validated statistics computed before, therefore the total # number of data points selected will be # len(data) / xeval.fraction * xeval.num_iter # prior_chrom_null = num_null * 1.0 / num_total number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df) number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) prior_peakgroup_true = number_true_chromatograms / number_target_pg logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg)) logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) ) logging.info( "Estimated number of true chromatograms: %s out of %s" % (number_true_chromatograms, len(experiment.get_top_target_peaks().df)) ) logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) ) # pg_score = posterior probability for each peakgroup # h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false) # h0_score = posterior probability for the hypothesis that no peakgroup is true pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_) experiment.df[ "pg_score"] = pp_pg_pvalues scored_table = scored_table.join(experiment[["pg_score"]]) allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null) experiment.df[ "h_score"] = allhypothesis experiment.df[ "h0_score"] = h0 scored_table = scored_table.join(experiment[["h_score", "h0_score"]]) final_statistics = final_err_table(df_raw_stat) summary_statistics = summary_err_table(df_raw_stat) needed_to_persist = (final_classifier, mu, nu, df_raw_stat.loc[:, ["svalue", "qvalue", "cutoff"]], num_null, num_total) return (summary_statistics, final_statistics, scored_table), needed_to_persist
def get_error_stats(self): return final_err_table(self.error_stat.df), summary_err_table( self.error_stat.df)
def get_error_stats(self): return final_err_table(self.error_stat.df), summary_err_table(self.error_stat.df)