Exemplo n.º 1
0
def _main(args):
    try:
        mh.mkdir(args.guppy_logs_output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            "Guppy logs output directory exists. Potentially overwriting " +
            "guppy logs.")
    logging.init_logger(args.guppy_logs_output_directory)
    # add required attributes for loading guppy, but not valid options for
    # this script.
    args.do_not_use_guppy_server = False
    args.output_directory = args.guppy_logs_output_directory
    args.outputs = [mh.PR_VAR_NAME]

    LOGGER.info("Loading model.")
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, args.processes) as model_info:
        LOGGER.info("Loading reference.")
        aligner = mappy.Aligner(str(args.reference),
                                preset=str("map-ont"),
                                best_n=1)

        process_all_reads(
            args.fast5s_dir,
            not args.not_recursive,
            args.num_reads,
            args.read_ids_filename,
            model_info,
            aligner,
            args.processes,
            args.output,
            args.suppress_progress,
            args.compute_false_reference_scores,
        )
def main():
    args = get_parser().parse_args()
    # add required attributes for loading guppy, but not valid options for
    # this script.
    args.do_not_use_guppy_server = False
    args.output_directory = args.guppy_logs_output_directory
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        sys.stderr.write(
            '***** WARNING ***** Guppy logs output directory exists. ' +
            'Potentially overwriting guppy logs.\n')

    sys.stderr.write('Loading model.\n')
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, args.processes) as model_info:
        sys.stderr.write('Loading reference.\n')
        aligner = mapping.alignerPlus(str(args.reference),
                                      preset=str('map-ont'),
                                      best_n=1)

        process_all_reads(args.fast5s_dir, args.num_reads,
                          args.read_ids_filename, model_info, aligner,
                          args.processes, args.output, args.suppress_progress,
                          args.compute_false_reference_scores)
Exemplo n.º 3
0
def open_pyguppy_backend(args):
    args.do_not_use_guppy_server = False
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            "Guppy logs output directory exists. Potentially overwriting guppy "
            "logs."
        )
    backend_params = backends.parse_backend_params(args)
    model_info = None
    try:
        model_info = backends.ModelInfo(backend_params, args.processes)
        # if spawning multiple workers run this inside newly spawned processes
        model_info.prep_model_worker()
        LOGGER.info(model_info.get_alphabet_str())
        LOGGER.info(
            "Model structure:\n\tStride: {}\n\tState size: {}".format(
                model_info.stride, model_info.output_size
            )
        )
        # use model_info.iter_basecalled_reads to basecall reads and return
        # relevant signal anchored information.
        model_info.client.disconnect()
    finally:
        # ensure guppy server is closed in finally block
        if model_info is not None:
            model_info.close()
Exemplo n.º 4
0
def _main(args):
    logging.init_logger(log_fn=args.log_filename, quiet=args.quiet)
    # add required attributes for loading guppy, but not valid options for
    # this script.
    args.do_not_use_guppy_server = False
    args.output_directory = args.guppy_logs_output_directory
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            "Guppy logs output directory exists. Potentially overwriting "
            + "guppy logs."
        )
    args = add_trim_guppy_none(args)
    args.outputs = [mh.PR_MOD_NAME]
    # make edge_buffer >= context_bases to simplify processing
    if args.edge_buffer < args.mod_context_bases:
        LOGGER.warning(
            "[--edge-buffer] less than [--mod-context-bases]. Setting "
            + "[--edge-buffer] to value from [--mod-context-bases]"
        )
        args.edge_buffer = args.mod_context_bases

    LOGGER.info("Loading model.")
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, args.processes) as model_info:
        check_map_sig_alphabet(model_info, args.mapped_signal_file)
        motifs = parse_motifs(args.motif, model_info, args.modified_bases_set)
        can_labs, mod_labs = extract_label_conversions(model_info)
        can_post_indices = model_info.can_indices.astype(np.uintp)
        all_mod_llrs, all_can_llrs = compute_diff_scores(
            args.mapped_signal_file,
            model_info,
            args.mod_context_bases,
            args.edge_buffer,
            args.num_reads,
            motifs,
            can_labs,
            mod_labs,
            can_post_indices,
        )

    mod_summary = [
        (
            mod,
            len(all_mod_llrs[mod]) if mod in all_mod_llrs else 0,
            len(all_can_llrs[mod]) if mod in all_can_llrs else 0,
        )
        for mod in set(all_mod_llrs).union(all_can_llrs)
    ]
    LOGGER.info(
        "Data summary:\n\tmod\tmod_N\tcan_N\n"
        + "\n".join("\t" + "\t".join(map(str, x)) for x in mod_summary)
    )
    output_mods_data(all_mod_llrs, all_can_llrs, args.out_filename)
Exemplo n.º 5
0
def _main(args):
    logging.init_logger()
    # set args that are not relevant to alphabet
    args.devices = None

    # set guppy args
    args.guppy_server_port = None
    args.guppy_timeout = mh.DEFAULT_GUPPY_TIMEOUT
    args.output_directory = args.guppy_logs_output_directory

    # set taiyaki args
    args.chunk_size = 1000
    args.chunk_overlap = 100
    args.max_concurrent_chunks = 200
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            'Guppy logs output directory exists. Potentially overwriting ' +
            'guppy logs.')
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, 1) as model_info:
        if model_info.is_cat_mod:
            can_bs = [
                can_b for mod_b, _ in model_info.mod_long_names
                for can_b, can_mod_bs in model_info.can_base_mods.items()
                if mod_b in can_mod_bs
            ]
            LOGGER.info(
                ('Model contains canonical alphabet {} and modified ' +
                 'bases {}.').format(
                     model_info.can_alphabet,
                     '; '.join('{}={} (alt to {})'.format(mod_b, mln, can_b)
                               for (mod_b, mln), can_b in zip(
                                   model_info.mod_long_names, can_bs))))
        else:
            LOGGER.info('Model contains canonical alphabet {}.'.format(
                model_info.can_alphabet))
def _main(args):
    try:
        mh.mkdir(args.output_directory, False)
    except mh.MegaError:
        LOGGER.warning(
            'Guppy logs output directory exists. Potentially overwriting ' +
            'guppy logs.')
    logging.init_logger(args.log_directory)
    # set args that are not relevant to alphabet
    args.devices = None

    # set guppy args
    args.guppy_server_port = None
    args.guppy_timeout = mh.DEFAULT_GUPPY_TIMEOUT
    args.output_directory = args.guppy_logs_output_directory

    # set taiyaki args
    args.chunk_size = 1000
    args.chunk_overlap = 100
    args.max_concurrent_chunks = 200
    backend_params = backends.parse_backend_params(args)
    with backends.ModelInfo(backend_params, 1) as model_info:
        LOGGER.info(model_info.get_alphabet_str())