Пример #1
0
    def apply_classifier(self, final_classifier, experiment,
                         all_test_target_scores, all_test_decoy_scores, table):

        lambda_ = CONFIG.get("final_statistics.lambda")

        mu, nu, final_score = self.calculate_params_for_d_score(
            final_classifier, experiment)
        experiment["d_score"] = (final_score - mu) / nu

        all_tt_scores = experiment.get_top_target_peaks()["d_score"]

        df_raw_stat = calculate_final_statistics(all_tt_scores,
                                                 all_test_target_scores,
                                                 all_test_decoy_scores,
                                                 lambda_)

        scored_table = self.enrich_table_with_results(table, experiment,
                                                      df_raw_stat)

        final_statistics = final_err_table(df_raw_stat)
        summary_statistics = summary_err_table(df_raw_stat)

        needed_to_persist = (final_classifier, mu, nu,
                             df_raw_stat.loc[:,
                                             ["svalue", "qvalue", "cutoff"]])
        return (summary_statistics, final_statistics,
                scored_table), needed_to_persist
Пример #2
0
    def apply_classifier(self, final_classifier, experiment, all_test_target_scores,
                         all_test_decoy_scores, table, p_score=False):

        lambda_ = CONFIG.get("final_statistics.lambda")

        mu, nu, final_score = self.calculate_params_for_d_score(final_classifier, experiment)
        experiment["d_score"] = (final_score - mu) / nu

        if (CONFIG.get("final_statistics.fdr_all_pg")):
            all_tt_scores = experiment.get_target_peaks()["d_score"]
        else:
            all_tt_scores = experiment.get_top_target_peaks()["d_score"]

        df_raw_stat, num_null, num_total = calculate_final_statistics(all_tt_scores, all_test_target_scores,
                                                 all_test_decoy_scores, lambda_)

        scored_table = self.enrich_table_with_results(table, experiment, df_raw_stat)

        if CONFIG.get("compute.probabilities"):
            logging.info( "" )
            logging.info( "Posterior Probability estimation:" )
            logging.info( "Estimated number of null %0.2f out of a total of %s. " % (num_null, num_total) )

            # Note that num_null and num_total are the sum of the
            # cross-validated statistics computed before, therefore the total
            # number of data points selected will be 
            #   len(data) /  xeval.fraction * xeval.num_iter
            # 
            prior_chrom_null = num_null * 1.0 / num_total
            number_true_chromatograms = (1.0-prior_chrom_null) * len(experiment.get_top_target_peaks().df)
            number_target_pg = len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df )
            prior_peakgroup_true = number_true_chromatograms / number_target_pg

            logging.info( "Prior for a peakgroup: %s" % (number_true_chromatograms / number_target_pg))
            logging.info( "Prior for a chromatogram: %s" % str(1-prior_chrom_null) )
            logging.info( "Estimated number of true chromatograms: %s out of %s" % (number_true_chromatograms, len(experiment.get_top_target_peaks().df)) )
            logging.info( "Number of target data: %s" % len( Experiment(experiment.df[(experiment.df.is_decoy == False) ]).df ) )

            # pg_score = posterior probability for each peakgroup
            # h_score = posterior probability for the hypothesis that this peakgroup is true (and all other false)
            # h0_score = posterior probability for the hypothesis that no peakgroup is true

            pp_pg_pvalues = posterior_pg_prob(experiment, prior_peakgroup_true, lambda_=lambda_)
            experiment.df[ "pg_score"]  = pp_pg_pvalues
            scored_table = scored_table.join(experiment[["pg_score"]])

            allhypothesis, h0 = posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null)
            experiment.df[ "h_score"]  = allhypothesis
            experiment.df[ "h0_score"]  = h0
            scored_table = scored_table.join(experiment[["h_score", "h0_score"]])

        final_statistics = final_err_table(df_raw_stat)
        summary_statistics = summary_err_table(df_raw_stat)

        needed_to_persist = (final_classifier, mu, nu,
                             df_raw_stat.loc[:, ["svalue", "qvalue", "cutoff"]], num_null, num_total)
        return (summary_statistics, final_statistics, scored_table), needed_to_persist
Пример #3
0
 def get_error_stats(self):
     return final_err_table(self.error_stat.df), summary_err_table(
         self.error_stat.df)
Пример #4
0
 def get_error_stats(self):
     return final_err_table(self.error_stat.df), summary_err_table(self.error_stat.df)