示例#1
0
def open_output_files(
    args,
    default_outfile,
    file_opener: FileOpener,
    adapter_names: Sequence[str],
    adapter_names2: Sequence[str],
) -> OutputFiles:
    """
    Return an OutputFiles instance. If demultiplex is True, the untrimmed, untrimmed2, out and out2
    attributes are not opened files, but paths (out and out2 with the '{name}' template).
    """

    rest_file = file_opener.xopen_or_none(args.rest_file, "wb")
    info_file = file_opener.xopen_or_none(args.info_file, "wb")
    wildcard = file_opener.xopen_or_none(args.wildcard_file, "wb")

    too_short = too_short2 = None
    if args.minimum_length is not None:
        too_short, too_short2 = file_opener.xopen_pair(
            args.too_short_output, args.too_short_paired_output, "wb")

    too_long = too_long2 = None
    if args.maximum_length is not None:
        too_long, too_long2 = file_opener.xopen_pair(
            args.too_long_output, args.too_long_paired_output, "wb")

    if int(args.discard_trimmed) + int(args.discard_untrimmed) + int(
            args.untrimmed_output is not None) > 1:
        raise CommandLineError(
            "Only one of the --discard-trimmed, --discard-untrimmed "
            "and --untrimmed-output options can be used at the same time.")

    demultiplex_mode = determine_demultiplex_mode(args)
    if demultiplex_mode and args.discard_trimmed:
        raise CommandLineError(
            "Do not use --discard-trimmed when demultiplexing.")

    if demultiplex_mode == "normal":
        out = out2 = None
        combinatorial_out = combinatorial_out2 = None
        demultiplex_out, demultiplex_out2, untrimmed, untrimmed2 = open_demultiplex_out(
            adapter_names, args, file_opener)
    elif demultiplex_mode == "combinatorial":
        assert '{name1}' in args.output and '{name2}' in args.output
        assert '{name1}' in args.paired_output and '{name2}' in args.paired_output
        out = out2 = None
        demultiplex_out = demultiplex_out2 = None
        combinatorial_out, combinatorial_out2, untrimmed, untrimmed2 = open_combinatorial_out(
            adapter_names, adapter_names2, args, file_opener)
    else:
        combinatorial_out = combinatorial_out2 = None
        demultiplex_out = demultiplex_out2 = None
        untrimmed, untrimmed2 = file_opener.xopen_pair(
            args.untrimmed_output, args.untrimmed_paired_output, "wb")
        out, out2 = file_opener.xopen_pair(args.output, args.paired_output,
                                           "wb")
        if out is None:
            out = default_outfile

    return OutputFiles(
        rest=rest_file,
        info=info_file,
        wildcard=wildcard,
        too_short=too_short,
        too_short2=too_short2,
        too_long=too_long,
        too_long2=too_long2,
        untrimmed=untrimmed,
        untrimmed2=untrimmed2,
        out=out,
        out2=out2,
        demultiplex_out=demultiplex_out,
        demultiplex_out2=demultiplex_out2,
        combinatorial_out=combinatorial_out,
        combinatorial_out2=combinatorial_out2,
        force_fasta=args.fasta,
    )
示例#2
0
def open_output_files(args, default_outfile,
                      file_opener: FileOpener) -> OutputFiles:
    """
    Return an OutputFiles instance. If demultiplex is True, the untrimmed, untrimmed2, out and out2
    attributes are not opened files, but paths (out and out2 with the '{name}' template).
    """

    rest_file = file_opener.xopen_or_none(args.rest_file, "wb")
    info_file = file_opener.xopen_or_none(args.info_file, "wb")
    wildcard = file_opener.xopen_or_none(args.wildcard_file, "wb")

    too_short = too_short2 = None
    if args.minimum_length is not None:
        too_short, too_short2 = file_opener.xopen_pair(
            args.too_short_output, args.too_short_paired_output, "wb")

    too_long = too_long2 = None
    if args.maximum_length is not None:
        too_long, too_long2 = file_opener.xopen_pair(
            args.too_long_output, args.too_long_paired_output, "wb")

    if int(args.discard_trimmed) + int(args.discard_untrimmed) + int(
            args.untrimmed_output is not None) > 1:
        raise CommandLineError(
            "Only one of the --discard-trimmed, --discard-untrimmed "
            "and --untrimmed-output options can be used at the same time.")

    demultiplex_mode = determine_demultiplex_mode(args)
    if demultiplex_mode and args.discard_trimmed:
        raise CommandLineError(
            "Do not use --discard-trimmed when demultiplexing.")

    if demultiplex_mode == "normal":
        out = args.output
        untrimmed = args.output.replace('{name}', 'unknown')
        if args.untrimmed_output:
            untrimmed = args.untrimmed_output
        if args.discard_untrimmed:
            untrimmed = None

        if args.paired_output is not None:
            out2 = args.paired_output
            untrimmed2 = args.paired_output.replace('{name}', 'unknown')
            if args.untrimmed_paired_output:
                untrimmed2 = args.untrimmed_paired_output
            if args.discard_untrimmed:
                untrimmed2 = None
        else:
            untrimmed2 = out2 = None

        assert out is not None and '{name}' in out and (out2 is None
                                                        or '{name}' in out2)
    elif demultiplex_mode == "combinatorial":
        out = args.output
        out2 = args.paired_output
        if args.untrimmed_output or args.untrimmed_paired_output:
            raise CommandLineError(
                "Combinatorial demultiplexing (with {name1} and {name2})"
                " cannot be combined with --untrimmed-output or --untrimmed-paired-output"
            )
        if args.discard_untrimmed:
            untrimmed = untrimmed2 = None
        else:
            untrimmed = untrimmed2 = 'unknown'
    else:
        untrimmed, untrimmed2 = file_opener.xopen_pair(
            args.untrimmed_output, args.untrimmed_paired_output, "wb")
        out, out2 = file_opener.xopen_pair(args.output, args.paired_output,
                                           "wb")
        if out is None:
            out = default_outfile

    return OutputFiles(
        rest=rest_file,
        info=info_file,
        wildcard=wildcard,
        too_short=too_short,
        too_short2=too_short2,
        too_long=too_long,
        too_long2=too_long2,
        untrimmed=untrimmed,
        untrimmed2=untrimmed2,
        out=out,
        out2=out2,
        demultiplex=bool(demultiplex_mode),
        force_fasta=args.fasta,
    )
示例#3
0
def main(cmdlineargs, default_outfile=sys.stdout.buffer) -> Statistics:
    """
    Set up a processing pipeline from the command-line arguments, run it and return
    a Statistics object.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    log_header(cmdlineargs)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()
    paired = determine_paired(args)
    assert paired in (False, True)

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        adapters, adapters2 = adapters_from_args(args)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener,
                                             adapters, adapters2)
        adapter_names = [a.name for a in adapters]  # type: List[str]
        adapter_names2 = [a.name for a in adapters2]  # type: List[str]
        outfiles = open_output_files(args, default_outfile, file_opener,
                                     adapter_names, adapter_names2)
        inpaths = InputPaths(input_filename,
                             path2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, inpaths, outfiles, progress, cores,
                              args.buffer_size, file_opener)
    except CommandLineError as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        parser.error(str(e))
        return

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)
    return stats
示例#4
0
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer):
    """
    Main function that sets up a processing pipeline and runs it.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    if cmdlineargs is None:
        cmdlineargs = sys.argv[1:]
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if args.colorspace:
        parser.error(
            "These colorspace-specific options are no longer supported: "
            "--colorspace, -c, -d, --double-encode, -t, --trim-primer, "
            "--strip-f3, --maq, --bwa, --no-zero-cap. "
            "Use Cutadapt 1.18 or earlier to work with colorspace data.")

    paired = determine_paired(args)
    assert paired in (False, True)

    # Print the header now because some of the functions below create logging output
    log_header(cmdlineargs)
    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener)
        outfiles = open_output_files(args, default_outfile, file_opener)
        infiles = InputFiles(input_filename,
                             file2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, infiles, outfiles, progress, cores,
                              args.buffer_size)
    except CommandLineError as e:
        parser.error(str(e))
        return  # avoid IDE warnings below

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)