Пример #1
0
def run(parser, args):
    # Catch exceptions and only print error line
    sys.excepthook = except_hook

    # Run load config to validate
    run_info, conditions, reference, caller_settings = get_run_info(args.toml)
    print("😻 Looking good!", file=sys.stdout)
    print("Generating experiment description - please be patient!", file=sys.stdout)
    mapper = Mapper(reference)
    for message, sev in describe_experiment(conditions, mapper):
        printer(textwrap.fill(message), sev, file=sys.stdout, end="\n\n")
Пример #2
0
def main():
    # Catch exceptions and only print error line
    sys.excepthook = except_hook

    # Parse single positional argument
    parser = argparse.ArgumentParser(
        "Read Until TOML Validator ({})".format(__file__))
    parser.add_argument("toml", help="TOML file to validate")
    args = parser.parse_args()

    # Run load config to validate
    run_info, conditions, reference, caller_settings = get_run_info(args.toml)
    print("😻 Looking good!", file=sys.stdout)
    print("Generating experiment description - please be patient!",
          file=sys.stdout)
    mapper = Mapper(reference)
    for message, sev in describe_experiment(conditions, mapper):
        printer(textwrap.fill(message), sev, file=sys.stdout, end="\n\n")
Пример #3
0
def simple_analysis(
    client,
    batch_size=512,
    throttle=0.1,
    unblock_duration=0.5,
    cl=None,
    pf=None,
    live_toml_path=None,
    flowcell_size=512,
    dry_run=False,
    run_info=None,
    conditions=None,
    mapper=None,
    caller_kwargs=None,
):
    """Analysis function

    Parameters
    ----------
    client : read_until.ReadUntilClient
        An instance of the ReadUntilClient object
    batch_size : int
        The number of reads to be retrieved from the ReadUntilClient at a time
    throttle : int or float
        The number of seconds interval between requests to the ReadUntilClient
    unblock_duration : int or float
        Time, in seconds, to apply unblock voltage
    cl : logging.Logger
        Log file to log chunk data to
    pf : logging.Logger
        Log file to log alignments to
    live_toml_path : str
        Path to a `live` TOML configuration file for read until. If this exists when
        the run starts it will be deleted
    flowcell_size : int
        The number of channels on the flowcell, 512 for MinION and 3000 for PromethION
    dry_run : bool
        If True unblocks are replaced with `stop_receiving` commands
    run_info : dict
        Dictionary of {channel: index} where index corresponds to an index in `conditions`
    conditions : list
        Experimental conditions as List of namedtuples.
    mapper : mappy.Aligner
    caller_kwargs : dict

    Returns
    -------
    None
    """
    # Init logger for this function
    logger = logging.getLogger(__name__)

    # Delete live TOML file if it exists
    live_toml_path = Path(live_toml_path)
    if live_toml_path.is_file():
        live_toml_path.unlink()

    # TODO: test this
    # Write channels.toml
    d = {
        "conditions": {
            str(v): {
                "channels": [],
                "name": conditions[v].name
            }
            for k, v in run_info.items()
        }
    }
    for k, v in run_info.items():
        d["conditions"][str(v)]["channels"].append(k)

    channels_out = str(client.mk_run_dir / "channels.toml")
    with open(channels_out, "w") as fh:
        fh.write(
            "# This file is written as a record of the condition each channel is assigned.\n"
        )
        fh.write(
            "# It may be changed or overwritten if you restart Read Until.\n")
        fh.write("# In the future this file may become a CSV file.\n")
        toml.dump(d, fh)

    caller = Caller(**caller_kwargs)
    # What if there is no reference or an empty MMI

    # DefaultDict[int: collections.deque[Tuple[str, ndarray]]]
    #  tuple is (read_id, previous_signal)
    # TODO: tuple should use read_number instead
    previous_signal = defaultdict(functools.partial(deque, maxlen=1))
    # count how often a read is seen
    tracker = defaultdict(Counter)
    # decided
    decided_reads = {}
    strand_converter = {1: "+", -1: "-"}

    read_id = ""

    # TODO: partial-ise / lambda unblock to take the unblock duration
    if dry_run:
        decision_dict = {
            "stop_receiving": client.stop_receiving_read,
            "proceed": None,
            "unblock": client.stop_receiving_read,
        }
        send_message(client.connection,
                     "This is a test run. No unblocks will occur.",
                     Severity.WARN)
    else:
        decision_dict = {
            "stop_receiving":
            client.stop_receiving_read,
            "proceed":
            None,
            "unblock":
            lambda c, n: client.unblock_read(c, n, unblock_duration, read_id),
        }
        send_message(client.connection,
                     "This is a live run. Unblocks will occur.", Severity.WARN)
    decision_str = ""
    below_threshold = False
    exceeded_threshold = False

    l_string = (
        "client_iteration",
        "read_in_loop",
        "read_id",
        "channel",
        "read_number",
        "seq_len",
        "counter",
        "mode",
        "decision",
        "condition",
        "min_threshold",
        "count_threshold",
        "start_analysis",
        "end_analysis",
        "timestamp",
    )
    cl.debug("\t".join(l_string))
    l_string = "\t".join(("{}" for _ in l_string))
    loop_counter = 0
    while client.is_running:
        if live_toml_path.is_file():
            # Reload the TOML config from the *_live file
            run_info, conditions, new_reference, _ = get_run_info(
                live_toml_path, flowcell_size)

            # Check the reference path if different from the loaded mapper
            if new_reference != mapper.index:
                old_reference = mapper.index
                # Log to file and MinKNOW interface
                logger.info("Reloading mapper")
                send_message(client.connection,
                             "Reloading mapper. Read Until paused.",
                             Severity.INFO)

                # Update mapper client.
                mapper = CustomMapper(new_reference)
                # Log on success
                logger.info("Reloaded mapper")

                # If we've reloaded a reference, delete the previous one
                if old_reference:
                    logger.info("Deleting old mmi {}".format(old_reference))
                    # We now delete the old mmi file.
                    Path(old_reference).unlink()
                    logger.info("Old mmi deleted.")

        # TODO: Fix the logging to just one of the two in use

        if not mapper.initialised:
            time.sleep(throttle)
            continue

        loop_counter += 1
        t0 = timer()
        r = 0

        for read_info, read_id, seq_len, results in mapper.map_reads_2(
                caller.basecall_minknow(
                    reads=client.get_read_chunks(batch_size=batch_size,
                                                 last=True),
                    signal_dtype=client.signal_dtype,
                    prev_signal=previous_signal,
                    decided_reads=decided_reads,
                )):
            r += 1
            read_start_time = timer()
            channel, read_number = read_info
            if read_number not in tracker[channel]:
                tracker[channel].clear()
            tracker[channel][read_number] += 1

            mode = ""
            exceeded_threshold = False
            below_threshold = False

            log_decision = lambda: cl.debug(
                l_string.format(
                    loop_counter,
                    r,
                    read_id,
                    channel,
                    read_number,
                    seq_len,
                    tracker[channel][read_number],
                    mode,
                    getattr(conditions[run_info[channel]], mode, mode),
                    conditions[run_info[channel]].name,
                    below_threshold,
                    exceeded_threshold,
                    read_start_time,
                    timer(),
                    time.time(),
                ))

            # Control channels
            if conditions[run_info[channel]].control:
                mode = "control"
                log_decision()
                client.stop_receiving_read(channel, read_number)
                continue

            # This is an analysis channel
            # Below minimum chunks
            if tracker[channel][read_number] <= conditions[
                    run_info[channel]].min_chunks:
                below_threshold = True

            # Greater than or equal to maximum chunks
            if tracker[channel][read_number] >= conditions[
                    run_info[channel]].max_chunks:
                exceeded_threshold = True

            # No mappings
            if not results:
                mode = "no_map"

            hits = set()
            for result in results:
                pf.debug("{}\t{}\t{}".format(read_id, seq_len, result))
                hits.add(result.ctg)

            if hits & conditions[run_info[channel]].targets:
                # Mappings and targets overlap
                coord_match = any(
                    between(r.r_st, c) for r in results
                    for c in conditions[run_info[channel]].coords.get(
                        strand_converter.get(r.strand), {}).get(r.ctg, []))
                if len(hits) == 1:
                    if coord_match:
                        # Single match that is within coordinate range
                        mode = "single_on"
                    else:
                        # Single match to a target outside coordinate range
                        mode = "single_off"
                elif len(hits) > 1:
                    if coord_match:
                        # Multiple matches with at least one in the correct region
                        mode = "multi_on"
                    else:
                        # Multiple matches to targets outside the coordinate range
                        mode = "multi_off"

            else:
                # No matches in mappings
                if len(hits) > 1:
                    # More than one, off-target, mapping
                    mode = "multi_off"
                elif len(hits) == 1:
                    # Single off-target mapping
                    mode = "single_off"

            # This is where we make our decision:
            # Get the associated action for this condition
            decision_str = getattr(conditions[run_info[channel]], mode)
            # decision is an alias for the functions "unblock" or "stop_receiving"
            decision = decision_dict[decision_str]

            # If max_chunks has been exceeded AND we don't want to keep sequencing we unblock
            if exceeded_threshold and decision_str != "stop_receiving":
                mode = "exceeded_max_chunks_unblocked"
                client.unblock_read(channel, read_number, unblock_duration,
                                    read_id)

            # TODO: WHAT IS GOING ON?!
            #  I think that this needs to change between enrichment and depletion
            # If under min_chunks AND any mapping mode seen we unblock
            # if below_threshold and mode in {"single_off", "multi_off"}:
            if below_threshold and mode in {
                    "single_on",
                    "single_off",
                    "multi_on",
                    "multi_off",
            }:
                mode = "below_min_chunks_unblocked"
                client.unblock_read(channel, read_number, unblock_duration,
                                    read_id)

            # proceed returns None, so we send no decision; otherwise unblock or stop_receiving
            elif decision is not None:
                decided_reads[channel] = read_id
                decision(channel, read_number)

            log_decision()

        t1 = timer()
        if r > 0:
            s1 = "{}R/{:.5f}s"
            logger.info(s1.format(r, t1 - t0))
        # limit the rate at which we make requests
        if t0 + throttle > t1:
            time.sleep(throttle + t0 - t1)
    else:
        send_message(client.connection, "Read Until Client Stopped.",
                     Severity.WARN)
        caller.disconnect()
        logger.info("Finished analysis of reads as client stopped.")
Пример #4
0
def main():
    extra_args = (
        (
            "--toml",
            dict(
                metavar="TOML",
                required=True,
                help="TOML file specifying experimental parameters",
            ),
        ),
        ("--paf-log", dict(
            help="PAF log",
            default="paflog.log",
        )),
        ("--chunk-log", dict(
            help="Chunk log",
            default="chunk_log.log",
        )),
    )
    parser, args = get_parser(extra_args=extra_args, file=__file__)

    # set up logging to file for DEBUG messages and above
    logging.basicConfig(
        level=logging.DEBUG,
        format="%(asctime)s %(name)s %(message)s",
        filename=args.log_file,
        filemode="w",
    )

    # define a Handler that writes INFO messages or higher to the sys.stderr
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)

    # set a format which is simpler for console use
    formatter = logging.Formatter(args.log_format)
    console.setFormatter(formatter)

    # add the handler to the root logger
    logging.getLogger("").addHandler(console)

    # Start by logging sys.argv and the parameters used
    logger = logging.getLogger("Manager")
    logger.info(" ".join(sys.argv))
    print_args(args, logger=logger)

    # Setup chunk and paf logs
    chunk_logger = setup_logger("DEC", log_file=args.chunk_log)
    paf_logger = setup_logger("PAF", log_file=args.paf_log)

    # Parse configuration TOML
    # TODO: num_channels is not configurable here, should be inferred from client
    run_info, conditions, reference, caller_kwargs = get_run_info(
        args.toml, num_channels=512)
    live_toml = Path("{}_live".format(args.toml))

    # Load Minimap2 index
    logger.info("Initialising minimap2 mapper")
    mapper = CustomMapper(reference)
    logger.info("Mapper initialised")

    read_until_client = read_until.ReadUntilClient(
        mk_host=args.host,
        mk_port=args.port,
        device=args.device,
        # one_chunk=args.one_chunk,
        filter_strands=True,
        # TODO: test cache_type by passing a function here
        cache_type=args.read_cache,
        cache_size=args.cache_size,
    )

    send_message(
        read_until_client.connection,
        "Read Until is controlling sequencing on this device. You use it at your own risk.",
        Severity.WARN,
    )

    for message, sev in describe_experiment(conditions, mapper):
        logger.info(message)

        send_message(
            read_until_client.connection,
            message,
            sev,
        )
    """
    This experiment has N regions on the flowcell.

    using reference: /path/to/ref.mmi

    Region i:NAME (control=bool) has X targets of which Y are found in the reference.
    reads will be unblocked when [u,v], sequenced when [w,x] and polled for more data when [y,z].
    """

    # FIXME: currently flowcell size is not included, this should be pulled from
    #  the read_until_client
    analysis_worker = functools.partial(
        simple_analysis,
        read_until_client,
        unblock_duration=args.unblock_duration,
        throttle=args.throttle,
        batch_size=args.batch_size,
        cl=chunk_logger,
        pf=paf_logger,
        live_toml_path=live_toml,
        dry_run=args.dry_run,
        run_info=run_info,
        conditions=conditions,
        mapper=mapper,
        caller_kwargs=caller_kwargs,
    )

    results = run_workflow(
        read_until_client,
        analysis_worker,
        args.workers,
        args.run_time,
        runner_kwargs={
            # "min_chunk_size": args.min_chunk_size,
            "first_channel": min(args.channels),
            "last_channel": max(args.channels),
        },
    )

    # No results returned
    send_message(
        read_until_client.connection,
        "Read Until is disconnected from this device. Sequencing will proceed normally.",
        Severity.WARN,
    )
Пример #5
0
def simple_analysis(
        client,
        batch_size=512,
        throttle=0.1,
        unblock_duration=0.5,
        chunk_log=None,
        toml_path=None,
        flowcell_size=512,
        dry_run=False,
):
    """Analysis function

    Parameters
    ----------
    client : read_until.ReadUntilClient
        An instance of the ReadUntilClient object
    batch_size : int
        The number of reads to be retrieved from the ReadUntilClient at a time
    throttle : int or float
        The number of seconds interval between requests to the ReadUntilClient
    unblock_duration : int or float
        Time, in seconds, to apply unblock voltage
    chunk_log : str
        Log file to log chunk data to
    toml_path : str
        Path to a TOML configuration file for read until
    flowcell_size : int
        The number of channels on the flowcell, 512 for MinION and 3000 for PromethION
    dry_run : bool
        If True unblocks are replaced with `stop_receiving` commands

    Returns
    -------
    None
    """
    logger = logging.getLogger(__name__)
    toml_dict = toml.load(toml_path)
    live_file = Path("{}_live".format(toml_path))
    if live_file.is_file():
        live_file.unlink()

    # There may or may not be a reference
    run_info, conditions, reference = get_run_info(toml_dict, num_channels=flowcell_size)
    # TODO: test this
    # Write channels.toml
    d = {"conditions": {str(v): {"channels": [], "name": conditions[v].name} for k, v in run_info.items()}}
    for k, v in run_info.items():
        d["conditions"][str(v)]["channels"].append(k)
    channels_out = str(client.mk_run_dir / "channels.toml")
    with open(channels_out, "w") as fh:
        toml.dump(d, fh)

    guppy_kwargs = toml_dict.get(
        "guppy_connection",
        {
            "config": "dna_r9.4.1_450bps_fast",
            "host": "127.0.0.1",
            "port": 5555,
            "procs": 4,
            "inflight": 512,
        }
    )

    caller = Caller(**guppy_kwargs)
    # What if there is no reference or an empty MMI
    mapper = CustomMapper(reference)

    # DefaultDict[int: collections.deque[Tuple[str, ndarray]]]
    #  tuple is (read_id, previous_signal)
    # TODO: tuple should use read_number instead
    previous_signal = defaultdict(functools.partial(deque, maxlen=1))
    # count how often a read is seen
    tracker = defaultdict(Counter)
    # decided
    decided_reads = {}
    strand_converter = {1: "+", -1: "-"}

    read_id = ""

    # TODO: partial-ise / lambda unblock to take the unblock duration
    if dry_run:
        decision_dict = {
            "stop_receiving": client.stop_receiving_read,
            "proceed": None,
            "unblock": client.stop_receiving_read,
        }
    else:
        decision_dict = {
            "stop_receiving": client.stop_receiving_read,
            "proceed": None,
            "unblock": lambda c, n: client.unblock_read(c, n, unblock_duration, read_id),
        }
    decision_str = ""
    below_threshold = False
    exceeded_threshold = False

    cl = setup_logger("DEC", log_file=chunk_log)
    pf = setup_logger("PAF", log_file="paflog.paf")
    l_string = (
        "client_iteration",
        "read_in_loop",
        "read_id",
        "channel",
        "read_number",
        "seq_len",
        "counter",
        "mode",
        "decision",
        "condition",
        "min_threshold",
        "count_threshold",
        "start_analysis",
        "end_analysis",
        "timestamp",
    )
    cl.debug("\t".join(l_string))
    l_string = "\t".join(("{}" for _ in l_string))
    loop_counter = 0
    while client.is_running:
        if live_file.is_file():
            # We may want to update the reference under certain conditions here.
            run_info, conditions, new_reference = get_run_info(live_file, flowcell_size)
            if new_reference != reference:
                logger.info("Reloading mapper")
                # We need to update our mapper client.
                mapper = CustomMapper(new_reference)
                logger.info("Reloaded mapper")
                if reference:
                    logger.info("Deleting old mmi {}".format(reference))
                    # We now delete the old mmi file.
                    Path(reference).unlink()
                    logger.info("Old mmi deleted.")
            reference = new_reference

        # TODO: Fix the logging to just one of the two in use

        if not reference:
            time.sleep(throttle)
            continue
        loop_counter += 1
        t0 = timer()
        r = 0

        for read_info, read_id, seq_len, results in mapper.map_reads_2(
                caller.basecall_minknow(
                    reads=client.get_read_chunks(batch_size=batch_size, last=True),
                    signal_dtype=client.signal_dtype,
                    prev_signal=previous_signal,
                    decided_reads=decided_reads,
                )
        ):
            r += 1
            read_start_time = timer()
            channel, read_number = read_info
            if read_number not in tracker[channel]:
                tracker[channel].clear()
            tracker[channel][read_number] += 1

            mode = ""
            exceeded_threshold = False
            below_threshold = False

            log_decision = lambda: cl.debug(
                l_string.format(
                    loop_counter,
                    r,
                    read_id,
                    channel,
                    read_number,
                    seq_len,
                    tracker[channel][read_number],
                    mode,
                    getattr(conditions[run_info[channel]], mode, mode),
                    conditions[run_info[channel]].name,
                    below_threshold,
                    exceeded_threshold,
                    read_start_time,
                    timer(),
                    time.time(),
                )
            )

            # Control channels
            if conditions[run_info[channel]].control:
                mode = "control"
                log_decision()
                client.stop_receiving_read(channel, read_number)
                continue

            # This is an analysis channel
            # Below minimum chunks
            if tracker[channel][read_number] <= conditions[run_info[channel]].min_chunks:
                below_threshold = True

            # Greater than or equal to maximum chunks
            if tracker[channel][read_number] >= conditions[run_info[channel]].max_chunks:
                exceeded_threshold = True

            # No mappings
            if not results:
                mode = "no_map"

            hits = set()
            for result in results:
                pf.debug("{}\t{}\t{}".format(read_id, seq_len, result))
                hits.add(result.ctg)

            if hits & conditions[run_info[channel]].targets:
                # Mappings and targets overlap
                coord_match = any(
                    between(r.r_st, c)
                    for r in results
                    for c in conditions[run_info[channel]]
                        .coords.get(strand_converter.get(r.strand), {})
                        .get(r.ctg, [])
                )
                if len(hits) == 1:
                    if coord_match:
                        # Single match that is within coordinate range
                        mode = "single_on"
                    else:
                        # Single match to a target outside coordinate range
                        mode = "single_off"
                elif len(hits) > 1:
                    if coord_match:
                        # Multiple matches with at least one in the correct region
                        mode = "multi_on"
                    else:
                        # Multiple matches to targets outside the coordinate range
                        mode = "multi_off"

            else:
                # No matches in mappings
                if len(hits) > 1:
                    # More than one, off-target, mapping
                    mode = "multi_off"
                elif len(hits) == 1:
                    # Single off-target mapping
                    mode = "single_off"

            # This is where we make our decision:
            # Get the associated action for this condition
            decision_str = getattr(conditions[run_info[channel]], mode)
            # decision is an alias for the functions "unblock" or "stop_receiving"
            decision = decision_dict[decision_str]

            # If max_chunks has been exceeded AND we don't want to keep sequencing we unblock
            if exceeded_threshold and decision_str != "stop_receiving":
                mode = "exceeded_max_chunks_unblocked"
                client.unblock_read(channel, read_number, unblock_duration, read_id)

            # TODO: WHAT IS GOING ON?!
            #  I think that this needs to change between enrichment and depletion
            # If under min_chunks AND any mapping mode seen we unblock
            # if below_threshold and mode in {"single_off", "multi_off"}:
            if below_threshold and mode in {
                "single_on",
                "single_off",
                "multi_on",
                "multi_off",
            }:
                mode = "below_min_chunks_unblocked"
                client.unblock_read(channel, read_number, unblock_duration, read_id)

            # proceed returns None, so we send no decision; otherwise unblock or stop_receiving
            elif decision is not None:
                decided_reads[channel] = read_id
                decision(channel, read_number)

            log_decision()

        t1 = timer()
        s1 = "Took {:.5f} to call and map {} reads"
        logger.info(s1.format(t1 - t0, r))
        # limit the rate at which we make requests
        if t0 + throttle > t1:
            time.sleep(throttle + t0 - t1)
    else:
        caller.disconnect()
        logger.info("Finished analysis of reads as client stopped.")