Exemplo n.º 1
0
def main(cmdlineargs=None, default_outfile=sys.stdout):
	"""
	Main function that sets up a processing pipeline and runs it.

	default_outfile is the file to which trimmed reads are sent if the ``-o``
	parameter is not used.
	"""
	start_time = time.time()
	parser = get_option_parser()
	if cmdlineargs is None:
		cmdlineargs = sys.argv[1:]
	options, args = parser.parse_args(args=cmdlineargs)
	# Setup logging only if there are not already any handlers (can happen when
	# this function is being called externally such as from unit tests)
	if not logging.root.handlers:
		setup_logging(stdout=bool(options.output), quiet=options.quiet or options.report == 'minimal')
	if options.quiet and options.report:
		parser.error("Options --quiet and --report cannot be used at the same time")

	paired = determine_paired_mode(options)
	assert paired in (False, 'first', 'both')

	if paired == 'first':
		# legacy mode
		assert options.pair_filter is None
		pair_filter_mode = 'first'
	elif options.pair_filter is None:
		# default
		pair_filter_mode = 'any'
	else:
		# user-provided behavior
		pair_filter_mode = options.pair_filter

	try:
		is_interleaved_input, is_interleaved_output = determine_interleaved(options, args)
		input_filename, input_paired_filename, quality_filename = input_files_from_parsed_args(args,
			paired, is_interleaved_input)
		pipeline = pipeline_from_parsed_args(options, paired, pair_filter_mode, quality_filename, is_interleaved_output)
		outfiles = open_output_files(options, default_outfile, is_interleaved_output)
	except CommandLineError as e:
		parser.error(e)
		return  # avoid IDE warnings below

	if options.cores < 0:
		parser.error('Value for --cores cannot be negative')
	cores = available_cpu_count() if options.cores == 0 else options.cores
	if cores > 1:
		if (
			ParallelPipelineRunner.can_output_to(outfiles)
			and quality_filename is None
			and not options.colorspace
			and options.format is None
		):
			runner = ParallelPipelineRunner(pipeline, cores, options.buffer_size)
		else:
			logger.error('Running in parallel is currently not supported for '
				'the given combination of command-line parameters.\nThese '
				'options are not supported: --info-file, --rest-file, '
				'--wildcard-file, --untrimmed-output, '
				'--untrimmed-paired-output, --too-short-output, '
				'--too-short-paired-output, --too-long-output, '
				'--too-long-paired-output, --format, --colorspace')
			sys.exit(1)
	else:
		runner = pipeline
	try:
		runner.set_input(input_filename, file2=input_paired_filename,
			qualfile=quality_filename, colorspace=options.colorspace,
			fileformat=options.format, interleaved=is_interleaved_input)
		runner.set_output(outfiles)
	except (seqio.UnknownFileType, IOError) as e:
		parser.error(e)

	implementation = platform.python_implementation()
	opt = ' (' + implementation + ')' if implementation != 'CPython' else ''
	logger.info("This is cutadapt %s with Python %s%s", __version__,
		platform.python_version(), opt)
	logger.info("Command line parameters: %s", " ".join(cmdlineargs))
	logger.info("Processing reads on %d core%s in %s mode ...",
		cores, 's' if cores > 1 else '',
		{False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end'}[pipeline.paired])

	if pipeline.should_warn_legacy:
		logger.warning('\n'.join(textwrap.wrap('Legacy mode is '
			'enabled. Read modification and filtering options *ignore* '
			'the second read. To switch to regular paired-end mode, '
			'provide the --pair-filter=any option or use any of the '
			'-A/-B/-G/-U/--interleaved options.')))

	try:
		stats = runner.run()
		# cProfile.runctx('stats=runner.run()', globals(), locals(), 'profile_main.prof')
		runner.close()
	except KeyboardInterrupt:
		print("Interrupted", file=sys.stderr)
		sys.exit(130)
	except IOError as e:
		if e.errno == errno.EPIPE:
			sys.exit(1)
		raise
	except (seqio.FormatError, seqio.UnknownFileType, EOFError) as e:
		sys.exit("cutadapt: error: {0}".format(e))

	elapsed = time.time() - start_time
	if not options.quiet:
		# send statistics to stderr if result was sent to stdout
		stat_file = sys.stderr if options.output is None else None
		with redirect_standard_output(stat_file):
			if options.report == 'minimal':
				print_minimal_report(stats, elapsed, options.gc_content / 100)
			else:
				print_report(stats, elapsed, options.gc_content / 100)
Exemplo n.º 2
0
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer):
    """
    Main function that sets up a processing pipeline and runs it.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    if cmdlineargs is None:
        cmdlineargs = sys.argv[1:]
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if args.colorspace:
        parser.error(
            "These colorspace-specific options are no longer supported: "
            "--colorspace, -c, -d, --double-encode, -t, --trim-primer, "
            "--strip-f3, --maq, --bwa, --no-zero-cap. "
            "Use Cutadapt 1.18 or earlier to work with colorspace data.")

    paired = determine_paired(args)
    assert paired in (False, True)

    # Print the header now because some of the functions below create logging output
    log_header(cmdlineargs)
    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener)
        outfiles = open_output_files(args, default_outfile, file_opener)
        infiles = InputFiles(input_filename,
                             file2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, infiles, outfiles, progress, cores,
                              args.buffer_size)
    except CommandLineError as e:
        parser.error(str(e))
        return  # avoid IDE warnings below

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)
Exemplo n.º 3
0
def main(cmdlineargs, default_outfile=sys.stdout.buffer) -> Statistics:
    """
    Set up a processing pipeline from the command-line arguments, run it and return
    a Statistics object.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    args, leftover_args = parser.parse_known_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(logger,
                      stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    log_header(cmdlineargs)
    profiler = setup_profiler_if_requested(args.profile)

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if leftover_args:
        warn_if_en_dashes(cmdlineargs)
        parser.error("unrecognized arguments: " + " ".join(leftover_args))

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')

    cores = available_cpu_count() if args.cores == 0 else args.cores
    file_opener = FileOpener(compression_level=args.compression_level,
                             threads=0 if cores == 1 else None)
    if sys.stderr.isatty() and not args.quiet:
        progress = Progress()
    else:
        progress = DummyProgress()
    paired = determine_paired(args)
    assert paired in (False, True)

    try:
        is_interleaved_input = args.interleaved and len(args.inputs) == 1
        input_filename, input_paired_filename = setup_input_files(
            args.inputs, paired, is_interleaved_input)
        check_arguments(args, paired)
        adapters, adapters2 = adapters_from_args(args)
        pipeline = pipeline_from_parsed_args(args, paired, file_opener,
                                             adapters, adapters2)
        adapter_names = [a.name for a in adapters]  # type: List[str]
        adapter_names2 = [a.name for a in adapters2]  # type: List[str]
        outfiles = open_output_files(args, default_outfile, file_opener,
                                     adapter_names, adapter_names2)
        inpaths = InputPaths(input_filename,
                             path2=input_paired_filename,
                             interleaved=is_interleaved_input)
        runner = setup_runner(pipeline, inpaths, outfiles, progress, cores,
                              args.buffer_size, file_opener)
    except CommandLineError as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        parser.error(str(e))
        return

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        with runner as r:
            stats = r.run()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except BrokenPipeError:
        sys.exit(1)
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        logger.debug("Command line error. Traceback:", exc_info=True)
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if profiler is not None:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)
    return stats
Exemplo n.º 4
0
def main(cmdlineargs=None, default_outfile=sys.stdout.buffer):
    """
    Main function that sets up a processing pipeline and runs it.

    default_outfile is the file to which trimmed reads are sent if the ``-o``
    parameter is not used.
    """
    start_time = time.time()
    parser = get_argument_parser()
    if cmdlineargs is None:
        cmdlineargs = sys.argv[1:]
    args = parser.parse_args(args=cmdlineargs)
    # log to stderr if results are to be sent to stdout
    log_to_stdout = args.output is not None and args.output != "-" and args.paired_output != "-"
    # Setup logging only if there are not already any handlers (can happen when
    # this function is being called externally such as from unit tests)
    if not logging.root.handlers:
        setup_logging(stdout=log_to_stdout,
                      quiet=args.quiet,
                      minimal=args.report == 'minimal',
                      debug=args.debug)
    if args.profile:
        import cProfile
        profiler = cProfile.Profile()
        profiler.enable()

    if args.quiet and args.report:
        parser.error(
            "Options --quiet and --report cannot be used at the same time")

    if args.colorspace:
        parser.error(
            "These colorspace-specific options are no longer supported: "
            "--colorspace, -c, -d, --double-encode, -t, --trim-primer, "
            "--strip-f3, --maq, --bwa, --no-zero-cap. "
            "Use Cutadapt 1.18 or earlier to work with colorspace data.")

    paired = determine_paired_mode(args)
    assert paired in (False, True)

    # Print the header now because some of the functions below create logging output
    log_header(cmdlineargs)
    try:
        is_interleaved_input, is_interleaved_output = determine_interleaved(
            args)
        input_filename, input_paired_filename = input_files_from_parsed_args(
            args.inputs, paired, is_interleaved_input)
        pipeline = pipeline_from_parsed_args(args, paired,
                                             is_interleaved_output)
        outfiles = open_output_files(args, default_outfile,
                                     is_interleaved_output)
    except CommandLineError as e:
        parser.error(e)
        return  # avoid IDE warnings below

    if args.cores < 0:
        parser.error('Value for --cores cannot be negative')
    cores = available_cpu_count() if args.cores == 0 else args.cores
    if cores > 1:
        if ParallelPipelineRunner.can_output_to(outfiles):
            runner_class = ParallelPipelineRunner
            runner_kwargs = dict(n_workers=cores, buffer_size=args.buffer_size)
        else:
            logger.error(
                'Running in parallel is currently not supported for '
                'the given combination of command-line parameters.\nThese '
                'options are not supported: --info-file, --rest-file, '
                '--wildcard-file, --untrimmed-output, '
                '--untrimmed-paired-output, --too-short-output, '
                '--too-short-paired-output, --too-long-output, '
                '--too-long-paired-output, --format\n'
                'Also, demultiplexing is not supported.\n'
                'Omit --cores/-j to continue.')
            return  # avoid IDE warnings below
    else:
        runner_class = SerialPipelineRunner
        runner_kwargs = dict()
    infiles = InputFiles(input_filename,
                         file2=input_paired_filename,
                         interleaved=is_interleaved_input)
    try:
        runner = runner_class(pipeline, infiles, outfiles, **runner_kwargs)
    except (dnaio.UnknownFileFormat, IOError) as e:
        parser.error(e)
        return  # avoid IDE warnings below

    logger.info("Processing reads on %d core%s in %s mode ...", cores,
                's' if cores > 1 else '', {
                    False: 'single-end',
                    True: 'paired-end'
                }[pipeline.paired])
    try:
        stats = runner.run()
        runner.close()
    except KeyboardInterrupt:
        print("Interrupted", file=sys.stderr)
        sys.exit(130)
    except IOError as e:
        if e.errno == errno.EPIPE:
            sys.exit(1)
        raise
    except (dnaio.FileFormatError, dnaio.UnknownFileFormat, EOFError) as e:
        sys.exit("cutadapt: error: {}".format(e))

    elapsed = time.time() - start_time
    if args.report == 'minimal':
        report = minimal_report
    else:
        report = full_report
    logger.log(REPORT, '%s', report(stats, elapsed, args.gc_content / 100))
    if args.profile:
        import pstats
        profiler.disable()
        pstats.Stats(profiler).sort_stats('time').print_stats(20)