Exemplo n.º 1
0
def parse_mod_data(
    res_dir,
    out_fp,
    valid_sites,
    include_strand,
    samp_lab,
    max_stats,
    ctrl_sites=None,
):
    mod_acc, parsim_acc, aligned_lens = report_acc_metrics(
        res_dir, out_fp, samp_lab)

    ctrl_data = None
    mods_db_fn = mh.get_megalodon_fn(res_dir, mh.PR_MOD_NAME)
    if os.path.exists(mods_db_fn):
        if ctrl_sites is not None:
            all_site_stats = mods.extract_stats_at_valid_sites(
                mods_db_fn,
                valid_sites + ctrl_sites,
                include_strand=include_strand,
                max_stats=max_stats,
            )
            mods_data = all_site_stats[:len(valid_sites)]
            ctrl_data = all_site_stats[len(valid_sites):]
        elif valid_sites is not None:
            mods_data = mods.extract_stats_at_valid_sites(
                mods_db_fn,
                valid_sites,
                include_strand=include_strand,
                max_stats=max_stats,
            )
        else:
            mods_data = [
                mods.extract_all_stats(mods_db_fn, max_stats=max_stats),
            ]
    else:
        mods_data = None

    return VAL_MOD_DATA(mod_acc, parsim_acc, aligned_lens, mods_data,
                        ctrl_data, samp_lab)
Exemplo n.º 2
0
def parse_mod_data(
        res_dir, out_fp, valid_sites, include_strand, samp_lab,
        ctrl_sites=None):
    mod_acc, parsim_acc = report_acc_metrics(res_dir, out_fp, samp_lab)

    ctrl_data = None
    mods_db_fn = mh.get_megalodon_fn(res_dir, mh.PR_MOD_NAME)
    try:
        if ctrl_sites is not None:
            all_site_stats = mods.extract_stats_at_valid_sites(
                mods_db_fn, valid_sites + ctrl_sites,
                include_strand=include_strand)
            mods_data = all_site_stats[:len(valid_sites)]
            ctrl_data = all_site_stats[len(valid_sites):]
        elif valid_sites is not None:
            mods_data = mods.extract_stats_at_valid_sites(
                mods_db_fn, valid_sites, include_strand=include_strand)
        else:
            mods_data = [mods.extract_all_stats(mods_db_fn), ]
    except (FileNotFoundError, mh.MegaError):
        mods_data = None

    return VAL_MOD_DATA(mod_acc, parsim_acc, mods_data, ctrl_data, samp_lab)
def _main(args):
    logging.init_logger(quiet=args.quiet)

    if (args.ground_truth_data is None
            and args.control_megalodon_results_dir is None):
        LOGGER.error(
            "Must provide either --control-megalodon-results-dir or " +
            "--ground-truth-data")
        sys.exit()

    db_fn = mh.get_megalodon_fn(args.megalodon_results_dir, mh.PR_MOD_NAME)
    if args.ground_truth_data is not None:
        LOGGER.info("Parsing ground truth data")
        gt_mod_pos, gt_can_pos = mh.parse_ground_truth_file(
            args.ground_truth_data, include_strand=args.strand_specific_sites)
        LOGGER.info(
            ("Loaded ground truth data with {} modified sites and {} " +
             "canonical sites.").format(len(gt_mod_pos), len(gt_can_pos)))
        LOGGER.info("Reading ground truth modified base statistics from " +
                    "database.")
        all_mod_llrs, all_can_llrs = mods.extract_stats_at_valid_sites(
            db_fn,
            [gt_mod_pos, gt_can_pos],
            include_strand=args.strand_specific_sites,
        )
    else:
        LOGGER.info("Reading ground truth modified base statistics from " +
                    "database")
        all_mod_llrs = mods.extract_all_stats(db_fn)
        LOGGER.info("Reading ground truth modified base statistics from " +
                    "canonical sample database")
        all_can_llrs = mods.extract_all_stats(
            mh.get_megalodon_fn(args.control_megalodon_results_dir,
                                mh.PR_MOD_NAME))

    mod_summary = [(
        mod,
        len(all_mod_llrs[mod]) if mod in all_mod_llrs else 0,
        len(all_can_llrs[mod]) if mod in all_can_llrs else 0,
    ) for mod in set(all_mod_llrs).union(all_can_llrs)]
    LOGGER.info("Data summary:\n\tmod\tmod_N\tcan_N\n" +
                "\n".join("\t" + "\t".join(map(str, x)) for x in mod_summary))
    output_mods_data(
        all_mod_llrs,
        all_can_llrs,
        args.modified_bases_set,
        args.exclude_modified_bases,
        args.out_filename,
    )