def open_output_files( args, default_outfile, file_opener: FileOpener, adapter_names: Sequence[str], adapter_names2: Sequence[str], ) -> OutputFiles: """ Return an OutputFiles instance. If demultiplex is True, the untrimmed, untrimmed2, out and out2 attributes are not opened files, but paths (out and out2 with the '{name}' template). """ rest_file = file_opener.xopen_or_none(args.rest_file, "wb") info_file = file_opener.xopen_or_none(args.info_file, "wb") wildcard = file_opener.xopen_or_none(args.wildcard_file, "wb") too_short = too_short2 = None if args.minimum_length is not None: too_short, too_short2 = file_opener.xopen_pair( args.too_short_output, args.too_short_paired_output, "wb") too_long = too_long2 = None if args.maximum_length is not None: too_long, too_long2 = file_opener.xopen_pair( args.too_long_output, args.too_long_paired_output, "wb") if int(args.discard_trimmed) + int(args.discard_untrimmed) + int( args.untrimmed_output is not None) > 1: raise CommandLineError( "Only one of the --discard-trimmed, --discard-untrimmed " "and --untrimmed-output options can be used at the same time.") demultiplex_mode = determine_demultiplex_mode(args) if demultiplex_mode and args.discard_trimmed: raise CommandLineError( "Do not use --discard-trimmed when demultiplexing.") if demultiplex_mode == "normal": out = out2 = None combinatorial_out = combinatorial_out2 = None demultiplex_out, demultiplex_out2, untrimmed, untrimmed2 = open_demultiplex_out( adapter_names, args, file_opener) elif demultiplex_mode == "combinatorial": assert '{name1}' in args.output and '{name2}' in args.output assert '{name1}' in args.paired_output and '{name2}' in args.paired_output out = out2 = None demultiplex_out = demultiplex_out2 = None combinatorial_out, combinatorial_out2, untrimmed, untrimmed2 = open_combinatorial_out( adapter_names, adapter_names2, args, file_opener) else: combinatorial_out = combinatorial_out2 = None demultiplex_out = demultiplex_out2 = None untrimmed, untrimmed2 = file_opener.xopen_pair( args.untrimmed_output, args.untrimmed_paired_output, "wb") out, out2 = file_opener.xopen_pair(args.output, args.paired_output, "wb") if out is None: out = default_outfile return OutputFiles( rest=rest_file, info=info_file, wildcard=wildcard, too_short=too_short, too_short2=too_short2, too_long=too_long, too_long2=too_long2, untrimmed=untrimmed, untrimmed2=untrimmed2, out=out, out2=out2, demultiplex_out=demultiplex_out, demultiplex_out2=demultiplex_out2, combinatorial_out=combinatorial_out, combinatorial_out2=combinatorial_out2, force_fasta=args.fasta, )
def open_output_files(args, default_outfile, file_opener: FileOpener) -> OutputFiles: """ Return an OutputFiles instance. If demultiplex is True, the untrimmed, untrimmed2, out and out2 attributes are not opened files, but paths (out and out2 with the '{name}' template). """ rest_file = file_opener.xopen_or_none(args.rest_file, "wb") info_file = file_opener.xopen_or_none(args.info_file, "wb") wildcard = file_opener.xopen_or_none(args.wildcard_file, "wb") too_short = too_short2 = None if args.minimum_length is not None: too_short, too_short2 = file_opener.xopen_pair( args.too_short_output, args.too_short_paired_output, "wb") too_long = too_long2 = None if args.maximum_length is not None: too_long, too_long2 = file_opener.xopen_pair( args.too_long_output, args.too_long_paired_output, "wb") if int(args.discard_trimmed) + int(args.discard_untrimmed) + int( args.untrimmed_output is not None) > 1: raise CommandLineError( "Only one of the --discard-trimmed, --discard-untrimmed " "and --untrimmed-output options can be used at the same time.") demultiplex_mode = determine_demultiplex_mode(args) if demultiplex_mode and args.discard_trimmed: raise CommandLineError( "Do not use --discard-trimmed when demultiplexing.") if demultiplex_mode == "normal": out = args.output untrimmed = args.output.replace('{name}', 'unknown') if args.untrimmed_output: untrimmed = args.untrimmed_output if args.discard_untrimmed: untrimmed = None if args.paired_output is not None: out2 = args.paired_output untrimmed2 = args.paired_output.replace('{name}', 'unknown') if args.untrimmed_paired_output: untrimmed2 = args.untrimmed_paired_output if args.discard_untrimmed: untrimmed2 = None else: untrimmed2 = out2 = None assert out is not None and '{name}' in out and (out2 is None or '{name}' in out2) elif demultiplex_mode == "combinatorial": out = args.output out2 = args.paired_output if args.untrimmed_output or args.untrimmed_paired_output: raise CommandLineError( "Combinatorial demultiplexing (with {name1} and {name2})" " cannot be combined with --untrimmed-output or --untrimmed-paired-output" ) if args.discard_untrimmed: untrimmed = untrimmed2 = None else: untrimmed = untrimmed2 = 'unknown' else: untrimmed, untrimmed2 = file_opener.xopen_pair( args.untrimmed_output, args.untrimmed_paired_output, "wb") out, out2 = file_opener.xopen_pair(args.output, args.paired_output, "wb") if out is None: out = default_outfile return OutputFiles( rest=rest_file, info=info_file, wildcard=wildcard, too_short=too_short, too_short2=too_short2, too_long=too_long, too_long2=too_long2, untrimmed=untrimmed, untrimmed2=untrimmed2, out=out, out2=out2, demultiplex=bool(demultiplex_mode), force_fasta=args.fasta, )
def main(cmdlineargs, default_outfile=sys.stdout.buffer) -> Statistics: """ Set up a processing pipeline from the command-line arguments, run it and return a Statistics object. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() args, leftover_args = parser.parse_known_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(logger, stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) log_header(cmdlineargs) profiler = setup_profiler_if_requested(args.profile) if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if leftover_args: warn_if_en_dashes(cmdlineargs) parser.error("unrecognized arguments: " + " ".join(leftover_args)) if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores file_opener = FileOpener(compression_level=args.compression_level, threads=0 if cores == 1 else None) if sys.stderr.isatty() and not args.quiet: progress = Progress() else: progress = DummyProgress() paired = determine_paired(args) assert paired in (False, True) try: is_interleaved_input = args.interleaved and len(args.inputs) == 1 input_filename, input_paired_filename = setup_input_files( args.inputs, paired, is_interleaved_input) check_arguments(args, paired) adapters, adapters2 = adapters_from_args(args) pipeline = pipeline_from_parsed_args(args, paired, file_opener, adapters, adapters2) adapter_names = [a.name for a in adapters] # type: List[str] adapter_names2 = [a.name for a in adapters2] # type: List[str] outfiles = open_output_files(args, default_outfile, file_opener, adapter_names, adapter_names2) inpaths = InputPaths(input_filename, path2=input_paired_filename, interleaved=is_interleaved_input) runner = setup_runner(pipeline, inpaths, outfiles, progress, cores, args.buffer_size, file_opener) except CommandLineError as e: logger.debug("Command line error. Traceback:", exc_info=True) parser.error(str(e)) return logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: with runner as r: stats = r.run() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except BrokenPipeError: sys.exit(1) except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: logger.debug("Command line error. Traceback:", exc_info=True) sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if profiler is not None: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20) return stats
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer): """ Main function that sets up a processing pipeline and runs it. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() if cmdlineargs is None: cmdlineargs = sys.argv[1:] args, leftover_args = parser.parse_known_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(logger, stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) profiler = setup_profiler_if_requested(args.profile) if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if args.colorspace: parser.error( "These colorspace-specific options are no longer supported: " "--colorspace, -c, -d, --double-encode, -t, --trim-primer, " "--strip-f3, --maq, --bwa, --no-zero-cap. " "Use Cutadapt 1.18 or earlier to work with colorspace data.") paired = determine_paired(args) assert paired in (False, True) # Print the header now because some of the functions below create logging output log_header(cmdlineargs) if leftover_args: warn_if_en_dashes(cmdlineargs) parser.error("unrecognized arguments: " + " ".join(leftover_args)) if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores file_opener = FileOpener(compression_level=args.compression_level, threads=0 if cores == 1 else None) if sys.stderr.isatty() and not args.quiet: progress = Progress() else: progress = DummyProgress() try: is_interleaved_input = args.interleaved and len(args.inputs) == 1 input_filename, input_paired_filename = setup_input_files( args.inputs, paired, is_interleaved_input) check_arguments(args, paired) pipeline = pipeline_from_parsed_args(args, paired, file_opener) outfiles = open_output_files(args, default_outfile, file_opener) infiles = InputFiles(input_filename, file2=input_paired_filename, interleaved=is_interleaved_input) runner = setup_runner(pipeline, infiles, outfiles, progress, cores, args.buffer_size) except CommandLineError as e: parser.error(str(e)) return # avoid IDE warnings below logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: with runner as r: stats = r.run() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except BrokenPipeError: sys.exit(1) except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if profiler is not None: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20)