def generate_decoy_match_results(scored_matches_path, decon_data, model_file_path, prefix_len=0, suffix_len=0, ms1_tolerance=1e-5, ms2_tolerance=2e-5, num_decoys_per_real_mass=1.0, method="full_random_forest", random_only=False, method_init_args=None, method_fit_args=None, n_processes=6, outfile_path=None): logger.info("Creating Decoys") predictions = classify_matches.prepare_model_file(scored_matches_path) decoy_file_name = make_decoys_from_search_space.taskmain( predictions.metadata["db_file_name"], prefix_len=prefix_len, suffix_len=suffix_len, n_processes=n_processes, out=outfile_path) logger.info("Decoy Ion Space: %s", decoy_file_name) match_ions2.match_frags( decoy_file_name, decon_data, ms1_tolerance, ms2_tolerance, n_processes=n_processes) logger.info("Decoy Matches Done") postprocess_file, postprocess_data = postprocess2.main(decoy_file_name) logger.info("Decoys Postprocessed: %s", postprocess_file) classifier = classify_matches.ClassifyTargetWithModelTask(model_file_path, postprocess_file, method=method, method_init_args=method_init_args, method_fit_args=method_fit_args) decoy_matches_path = classifier.run() return decoy_matches_path
def main(scored_matches_path, decon_data=None, model_file_path=None, decoy_matches_path=None, outfile_path=None, num_decoys_per_real_mass=1.0, random_only=False, predicate_fns=None, prefix_len=0, suffix_len=0, by_mod_sig=False, ms1_tolerance=None, ms2_tolerance=None, method="full_random_forest", method_init_args=None, method_fit_args=None, n_processes=6): ''' Call with deconvolution results and a model to generate decoys and score them, or with a pre-existing decoy database. :type predicate_fns: Sequence :param predicate_fns iterable: containing functions with which to partition both the "real" and "decoy" databases. Use `make_predicate` with keyword arguments matching column names and numeric thresholds for ease of use and documentation. :param outfile_path str: defaults to scored_matches_path[:-4] + "_fdr.json" will contain the resulting FDR statistic for each cutoff. ''' scored_matches_frame = classify_matches.prepare_model_file(scored_matches_path) decoy_matches_frame = None if ms1_tolerance is None: ms1_tolerance = scored_matches_frame.metadata["ms1_ppm_tolerance"] if ms2_tolerance is None: ms2_tolerance = scored_matches_frame.metadata["ms2_ppm_tolerance"] if outfile_path is None: outfile_path = scored_matches_path[:-5] + "_fdr.json" if decon_data is not None and model_file_path is not None: logger.info("No decoys given.") decoy_matches_path = generate_decoy_match_results( scored_matches_path, decon_data, model_file_path, prefix_len=prefix_len, suffix_len=suffix_len, ms1_tolerance=ms1_tolerance, ms2_tolerance=ms2_tolerance, num_decoys_per_real_mass=num_decoys_per_real_mass, random_only=random_only, method=method, method_init_args=method_init_args, method_fit_args=method_fit_args, n_processes=n_processes, outfile_path=outfile_path) decoy_matches_frame = classify_matches.prepare_model_file( decoy_matches_path) elif model_file_path is not None and decoy_matches_path is not None: scored_matches_frame = ClassifyTargetWithModelTask( model_file_path, scored_matches_path, method=method).run(False) decoy_matches_frame = ClassifyTargetWithModelTask( model_file_path, decoy_matches_path, method=method).run(False) else: logger.info("Pre-matched Decoys Given") scored_matches_frame = classify_matches.prepare_model_file( scored_matches_path) decoy_matches_frame = classify_matches.prepare_model_file( decoy_matches_path) logger.info("Evaluating predicates") fdr_search = optimize_fdr.CountExclusion( scored_matches_frame, decoy_matches_frame, decoy_matches_frame.metadata["decoy_ratio"], ["MS2_Score"]) fdr_search.optimize() scored_matches_frame.metadata["fdr"] = fdr_search.compress() scored_matches_frame["call"] = scored_matches_frame.Glycopeptide_identifier.isin( scored_matches_frame.kvquery().Glycopeptide_identifier) try: logger.info("Accepted %d predictions with FDR %f", scored_matches_frame.call.sum(), scored_matches_frame.optimize_fdr().iloc[0]["false_discovery_rate"]) except: logger.info("No predictions were found at an acceptable FDR") scored_matches_frame.serialize(outfile_path) return outfile_path