def main(cmdlineargs=None, default_outfile=sys.stdout.buffer): """ Main function that sets up a processing pipeline and runs it. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() if cmdlineargs is None: cmdlineargs = sys.argv[1:] args, leftover_args = parser.parse_known_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(logger, stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) profiler = setup_profiler_if_requested(args.profile) if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if args.colorspace: parser.error( "These colorspace-specific options are no longer supported: " "--colorspace, -c, -d, --double-encode, -t, --trim-primer, " "--strip-f3, --maq, --bwa, --no-zero-cap. " "Use Cutadapt 1.18 or earlier to work with colorspace data.") paired = determine_paired(args) assert paired in (False, True) # Print the header now because some of the functions below create logging output log_header(cmdlineargs) if leftover_args: warn_if_en_dashes(cmdlineargs) parser.error("unrecognized arguments: " + " ".join(leftover_args)) if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores file_opener = FileOpener(compression_level=args.compression_level, threads=0 if cores == 1 else None) if sys.stderr.isatty() and not args.quiet: progress = Progress() else: progress = DummyProgress() try: is_interleaved_input = args.interleaved and len(args.inputs) == 1 input_filename, input_paired_filename = setup_input_files( args.inputs, paired, is_interleaved_input) check_arguments(args, paired) pipeline = pipeline_from_parsed_args(args, paired, file_opener) outfiles = open_output_files(args, default_outfile, file_opener) infiles = InputFiles(input_filename, file2=input_paired_filename, interleaved=is_interleaved_input) runner = setup_runner(pipeline, infiles, outfiles, progress, cores, args.buffer_size) except CommandLineError as e: parser.error(str(e)) return # avoid IDE warnings below logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: with runner as r: stats = r.run() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except BrokenPipeError: sys.exit(1) except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if profiler is not None: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20)
def main(cmdlineargs, default_outfile=sys.stdout.buffer) -> Statistics: """ Set up a processing pipeline from the command-line arguments, run it and return a Statistics object. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() args, leftover_args = parser.parse_known_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(logger, stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) log_header(cmdlineargs) profiler = setup_profiler_if_requested(args.profile) if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if leftover_args: warn_if_en_dashes(cmdlineargs) parser.error("unrecognized arguments: " + " ".join(leftover_args)) if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores file_opener = FileOpener(compression_level=args.compression_level, threads=0 if cores == 1 else None) if sys.stderr.isatty() and not args.quiet: progress = Progress() else: progress = DummyProgress() paired = determine_paired(args) assert paired in (False, True) try: is_interleaved_input = args.interleaved and len(args.inputs) == 1 input_filename, input_paired_filename = setup_input_files( args.inputs, paired, is_interleaved_input) check_arguments(args, paired) adapters, adapters2 = adapters_from_args(args) pipeline = pipeline_from_parsed_args(args, paired, file_opener, adapters, adapters2) adapter_names = [a.name for a in adapters] # type: List[str] adapter_names2 = [a.name for a in adapters2] # type: List[str] outfiles = open_output_files(args, default_outfile, file_opener, adapter_names, adapter_names2) inpaths = InputPaths(input_filename, path2=input_paired_filename, interleaved=is_interleaved_input) runner = setup_runner(pipeline, inpaths, outfiles, progress, cores, args.buffer_size, file_opener) except CommandLineError as e: logger.debug("Command line error. Traceback:", exc_info=True) parser.error(str(e)) return logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: with runner as r: stats = r.run() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except BrokenPipeError: sys.exit(1) except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: logger.debug("Command line error. Traceback:", exc_info=True) sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if profiler is not None: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20) return stats
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer): """ Main function that sets up a processing pipeline and runs it. default_outfile is the file to which trimmed reads are sent if the ``-o`` parameter is not used. """ start_time = time.time() parser = get_argument_parser() if cmdlineargs is None: cmdlineargs = sys.argv[1:] args = parser.parse_args(args=cmdlineargs) # log to stderr if results are to be sent to stdout log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-" # Setup logging only if there are not already any handlers (can happen when # this function is being called externally such as from unit tests) if not logging.root.handlers: setup_logging(logger, stdout=log_to_stdout, quiet=args.quiet, minimal=args.report == 'minimal', debug=args.debug) if args.profile: import cProfile profiler = cProfile.Profile() profiler.enable() if args.quiet and args.report: parser.error( "Options --quiet and --report cannot be used at the same time") if args.colorspace: parser.error( "These colorspace-specific options are no longer supported: " "--colorspace, -c, -d, --double-encode, -t, --trim-primer, " "--strip-f3, --maq, --bwa, --no-zero-cap. " "Use Cutadapt 1.18 or earlier to work with colorspace data.") paired = determine_paired_mode(args) assert paired in (False, True) # Print the header now because some of the functions below create logging output log_header(cmdlineargs) try: is_interleaved_input, is_interleaved_output = determine_interleaved( args) input_filename, input_paired_filename = input_files_from_parsed_args( args.inputs, paired, is_interleaved_input) pipeline = pipeline_from_parsed_args(args, paired, is_interleaved_output) outfiles = open_output_files(args, default_outfile, is_interleaved_output) except CommandLineError as e: parser.error(str(e)) return # avoid IDE warnings below if args.cores < 0: parser.error('Value for --cores cannot be negative') cores = available_cpu_count() if args.cores == 0 else args.cores if cores > 1: if ParallelPipelineRunner.can_output_to(outfiles): runner_class = ParallelPipelineRunner runner_kwargs = dict(n_workers=cores, buffer_size=args.buffer_size) else: parser.error( 'Running in parallel is currently not supported for ' 'the given combination of command-line parameters.\nThese ' 'options are not supported: --info-file, --rest-file, ' '--wildcard-file, --format\n' 'Also, demultiplexing is not supported.\n' 'Omit --cores/-j to continue.') return # avoid IDE warnings below else: runner_class = SerialPipelineRunner runner_kwargs = dict() infiles = InputFiles(input_filename, file2=input_paired_filename, interleaved=is_interleaved_input) if sys.stderr.isatty() and not args.quiet: progress = Progress() else: progress = DummyProgress() try: runner = runner_class(pipeline, infiles, outfiles, progress, **runner_kwargs) except (dnaio.UnknownFileFormat, IOError) as e: parser.error(e) return # avoid IDE warnings below logger.info("Processing reads on %d core%s in %s mode ...", cores, 's' if cores > 1 else '', { False: 'single-end', True: 'paired-end' }[pipeline.paired]) try: stats = runner.run() runner.close() except KeyboardInterrupt: print("Interrupted", file=sys.stderr) sys.exit(130) except BrokenPipeError: sys.exit(1) except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e: sys.exit("cutadapt: error: {}".format(e)) elapsed = time.time() - start_time if args.report == 'minimal': report = minimal_report else: report = full_report logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100)) if args.profile: import pstats profiler.disable() pstats.Stats(profiler).sort_stats('time').print_stats(20)