Exemplo n.º 1
0
def main(argv=sys.argv[1:]):
    """
    Basic command line interface to qcat.

    :param argv: Command line arguments
    :type argv: list
    :return: None
    :rtype: NoneType
    """
    try:
        args = parse_args(argv=argv)

        qcat_config = config.get_default_config()

        numeric_level = getattr(logging, args.log.upper(), None)
        if not isinstance(numeric_level, int):
            raise ValueError('Invalid log level: %s' % args.log.upper())
        logging.basicConfig(level=numeric_level, format='%(message)s')

        if args.list_kits:
            kits = get_kits_info()
            for kit in sorted(kits.keys()):
                if kit != "auto" and kit != "DUAL":
                    logging.info("{:<30}{}".format(kit, kits[kit]))
            return

        mode = get_mode(args)
        kit = args.kit
        if mode == "simple":
            kit = args.SIMPLE_BARCODES

        start = time.time()
        qcat_cli(
            reads_fq=args.fastq,
            # no_header=args.no_header,
            kit=kit,
            mode=mode,
            nobatch=args.nobatch,
            out=args.barcode_dir,
            min_qual=args.min_qual,
            tsv=args.tsv,
            output=args.output,
            threads=args.threads,
            trim=args.TRIM,
            adapter_yaml=None,  #=args.adapter_yaml, #args.barcode_fa,
            quiet=args.QUIET,
            filter_barcodes=args.FILTER_BARCODES,
            middle_adapter=args.DETECT_MIDDLE,
            min_read_length=args.min_length,
            qcat_config=qcat_config)
        end = time.time()

        if not args.QUIET:
            logging.info("Demultiplexing finished in {0:.2f}s".format(end -
                                                                      start))
    except IOError as e:
        logging.error(e)
    except ValueError as e:
        logging.error(e)
Exemplo n.º 2
0
def test_scanner_find_best_adapter_template():
    qcat_config = config.get_default_config()

    best_adapter_template, best_adapter_end_position, best_adapter_score =\
        find_best_adapter_template(None, None, qcat_config)

    assert best_adapter_template == -1
    assert best_adapter_end_position == -1

    best_adapter_template, best_adapter_end_position, best_adapter_score = \
        find_best_adapter_template(get_adapter_by_name("RBK001"), read_bc3_exact, qcat_config)

    assert best_adapter_template == 0
    assert best_adapter_end_position == 101
Exemplo n.º 3
0
def test_scanner_detect_barcode_generic():
    qcat_config = config.get_default_config()
    # config.min_quality = 0.1

    detector = BarcodeScannerEPI2ME()

    seq = extract_align_sequence(read, False, qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq, qcat_config=qcat_config)
    assert barcode_dict['barcode'].name == "barcode02"

    seq = extract_align_sequence(read_bc3_exact, False, qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq, qcat_config=qcat_config)
    assert barcode_dict['barcode'].name == "barcode03"

    seq = extract_align_sequence(read_bc3, False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq, qcat_config=qcat_config)
    assert barcode_dict['barcode'].name == "barcode03"

    seq = extract_align_sequence(real_bc03_porechop, False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq)
    assert barcode_dict['barcode'].name == "barcode03"

    seq = extract_align_sequence(read_nobc, False,
                                         qcat_config.max_align_length)

    barcode_dict = detector.detect_barcode(seq)
    assert barcode_dict['barcode'] is None

    seq = extract_align_sequence("", False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq)
    assert barcode_dict['barcode'] is None

    seq = extract_align_sequence(simulated_bc03, False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq)
    assert barcode_dict['barcode'].name == "barcode03"

    seq = extract_align_sequence(real_bc03_porechop, False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq, qcat_config=qcat_config)
    assert barcode_dict['barcode'].name == "barcode03"

    seq = extract_align_sequence(real_bc03_porechop, False,
                                         qcat_config.max_align_length)
    barcode_dict = detector.detect_barcode(seq, qcat_config=qcat_config)
    assert barcode_dict['barcode'].name == "barcode03"
Exemplo n.º 4
0
def parse_args(argv):
    """
    Commandline parser

    :param argv: Command line arguments
    :type argv: List
    :param defaults: qcat config object that holds default values passed to
    ArgumentParser
    :type defaults: qcatConfig
    :return: None
    """
    defaults = config.get_default_config()
    usage = "Python command-line tool for demultiplexing Oxford Nanopore reads from FASTQ files"
    parser = ArgumentParser(description=usage,
                            formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-V",
                        '--version',
                        action='version',
                        version='%(prog)s ' + __version__)
    parser.add_argument('-l',
                        "--log",
                        dest="log",
                        type=str,
                        default="INFO",
                        help="Print debug information")
    parser.add_argument("--quiet",
                        dest="QUIET",
                        action='store_true',
                        help="Don't print summary")

    general_group = parser.add_argument_group('General settings')
    general_group.add_argument("-f",
                               "--fastq",
                               type=str,
                               dest="fastq",
                               help="Barcoded read file")
    general_group.add_argument(
        '-b',
        "--barcode_dir",
        dest="barcode_dir",
        type=str,
        default=None,
        help="If specified, qcat will demultiplex reads "
        "to this folder")
    general_group.add_argument(
        '-o',
        "--output",
        dest="output",
        type=str,
        default=None,
        help="Output file trimmed reads will be written to "
        "(default: stdout).")
    general_group.add_argument(
        "--min-score",
        dest="min_qual",
        type=check_minqual_arg,
        default=None,
        help="Minimum barcode score. Barcode calls with a lower "
        "score will be discarded. "
        "Must be between 0 and 100. (default: 60)")
    general_group.add_argument("--detect-middle",
                               dest="DETECT_MIDDLE",
                               action='store_true',
                               help="Search for adapters in the whole read")
    general_group.add_argument(
        '-t',
        "--threads",
        dest="threads",
        type=int,
        default=1,
        help="Number of threads. Only works with in guppy mode")
    general_group.add_argument("--min-read-length",
                               dest="min_length",
                               type=int,
                               default=100,
                               help="Reads short than <min-read-length> after "
                               "trimming will be discarded.")
    general_group.add_argument(
        "--tsv",
        dest="tsv",
        action='store_true',
        help="Prints a tsv file containing barcode information "
        "each read to stdout.")
    general_group.add_argument(
        "--trim",
        dest="TRIM",
        action='store_true',
        help="Remove adapter and barcode sequences from reads.")
    general_group.add_argument(
        "-k",
        "--kit",
        dest="kit",
        choices=get_kits(),
        type=check_kit_arg,
        default="auto",
        help="Sequencing kit. Specifying the correct kit "
        "will improve sensitivity and specificity and "
        "runtime (default: auto)")
    general_group.add_argument("--list-kits",
                               dest="list_kits",
                               action='store_true',
                               help="List all supported kits")

    # Mode
    mode_group = parser.add_argument_group("Demultiplexing modes")
    group = mode_group.add_mutually_exclusive_group()

    group.add_argument(
        "--guppy",
        dest="MODE_GUPPY",
        action='store_true',
        help="Use Guppy's demultiplexing algorithm (default: false)")
    group.add_argument(
        "--epi2me",
        dest="MODE_EPI2ME",
        action='store_true',
        help="Use EPI2ME's demultiplexing algorithm (default: true)")
    group.add_argument("--dual",
                       dest="MODE_DUAL",
                       action='store_true',
                       help="Use dual barcoding algorithm")
    group.add_argument(
        "--simple",
        dest="MODE_SIMPLE",
        action='store_true',
        help="Use simple demultiplexing algorithm. Only looks for "
        "barcodes, not for adapter sequences. Use only for testing "
        "purposes!")

    # EPI2ME
    epi2me_group = parser.add_argument_group(
        'EPI2ME options (only valid with --epi2me)')
    epi2me_group.add_argument(
        "--no-batch",
        dest="nobatch",
        action='store_true',
        help=
        "Don't use information from multiple reads for kit detection (default: false)"
    )
    epi2me_group.add_argument(
        "--filter-barcodes",
        dest="FILTER_BARCODES",
        action='store_true',
        help="Filter rare barcode calls when run in batch mode")

    simple_group = parser.add_argument_group(
        'Simple options (only valid with --simple)')
    simple_group.add_argument(
        "--simple-barcodes",
        dest="SIMPLE_BARCODES",
        choices=['standard', 'extended'],
        default="standard",
        help="Use 12 (standard) or 96 (extended) barcodes for demultiplexing")

    # parser.add_argument("--adapter-sequences",
    #                     type=str,
    #                     dest="adapter_yaml",
    #                     default=None,
    #                     help="YAML file or folder containing YAML files that"
    #                          "describe the Adapter sequences.")

    # parser.add_argument('--no-header',
    #                     action='store_true',
    #                     dest='no_header',
    #                     help="Do not write header to output tsv file."
    #                          "Useful if results from different runs should be"
    #                          "combined")

    args = parser.parse_args(argv)

    return args
Exemplo n.º 5
0
def main(argv=sys.argv[1:]):
    """
    Basic command line interface to qcat.

    :param argv: Command line arguments
    :type argv: list
    :return: None
    :rtype: NoneType
    """
    args = parse_args(argv=argv)

    reference_file = args.GENOMES

    a = mp.Aligner(reference_file, preset="map-ont")  # load or build index

    if not a: raise Exception("ERROR: failed to load/build index")

    output_file = open(args.TSV, "w")

    detector = BarcodeScanner.factory(kit="NBD10X", min_quality=0)

    print("dataset", "id", "length", "qscore", "score", "barcode", "truebc",
          "truebc_assignment", "middle_adapter", "non_matching_bc", "sequence",
          sep='\t', file=output_file)
    name = args.NAME
    f = args.FASTQ
    i = 0
    print(name, f)
    with pysam.FastxFile(f) as fh:
        for entry in fh:
            i += 1

            info = _parse_reads_info(entry.comment)
            if 'truebc' in info:
                truebc = info['truebc']
                truebc_assignment = "Pre_assigned"
            else:
                truebc, truebc_assignment = get_truebc(a, entry.sequence)

            c = config.get_default_config()
            result = detector.detect_barcode(entry.sequence,
                                             qcat_config=c)

            middle_adapter = detector.scan_middle(entry.sequence, "NBD10X",
                                                  c)

            q_score = _compute_mean_qscore(qstring_to_phred(entry.quality))
            bc = "none"
            score = 0.0
            if result['barcode']:
                bc = result['barcode'].id
                score = result['barcode_score']

            no_seq = False
            seq = "-"
            if not no_seq:
                seq = entry.sequence

            print(name, entry.name, len(entry.sequence), q_score, score, bc,
                  truebc, truebc_assignment, middle_adapter,
                  result['exit_status'] == 1002, seq,
                  sep='\t', file=output_file)

            #             if i == 10000:
            #                 break
            if i % 1000 == 0:
                print(".", end="")
            if i % 50000 == 0:
                print("")

            if i == int(args.MAX):
                print("Reach maximumg, stopping.")
                break
    print("")

    output_file.close()

    print("finished")