def stipulate(args): """ REQUIRED TO CREATE ITERABLE FUNCTIONS TO RUN IN CUTADAPT 2.7. THIS FUNCTION IS CALLED ONLY ONE TIME. """ modifiers = [] pipeline_add = modifiers.append adapter_parser = AdapterParser( max_error_rate=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) adapters = adapter_parser.parse_multi(args.adapters) warn_duplicate_adapters(adapters) if args.nextseq_trim is not None: pipeline_add(NextseqQualityTrimmer(args.nextseq_trim, args.phred64)) if args.quality_cutoff is not None: cutoffs = parse_cutoffs(args.quality_cutoff) pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.phred64)) adapter_cutter = None if adapters: adapter_cutter = AdapterCutter(adapters, args.times, args.action) pipeline_add(adapter_cutter) if args.trim_n: pipeline_add(NEndTrimmer()) add_unconditional_cutters(pipeline_add, args.cut) print("modifiers (cutadapt):", modifiers) return modifiers
def adapters_from_args(args) -> Tuple[List[Adapter], List[Adapter]]: adapter_parser = AdapterParser( max_errors=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) try: adapters = adapter_parser.parse_multi(args.adapters) adapters2 = adapter_parser.parse_multi(args.adapters2) except (FileNotFoundError, ValueError, InvalidCharacter) as e: raise CommandLineError(e) warn_duplicate_adapters(adapters) warn_duplicate_adapters(adapters2) if args.debug == "trace": for adapter in adapters + adapters2: adapter.enable_debug() return adapters, adapters2
def pipeline_from_parsed_args(args, paired, is_interleaved_output): """ Setup a processing pipeline from parsed command-line arguments. If there are any problems parsing the arguments, a CommandLineError is raised. Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline) """ check_arguments(args, paired, is_interleaved_output) if args.action == 'none': args.action = None adapter_parser = AdapterParser( max_error_rate=args.error_rate, min_overlap=args.overlap, read_wildcards=args.match_read_wildcards, adapter_wildcards=args.match_adapter_wildcards, indels=args.indels, ) try: adapters = adapter_parser.parse_multi(args.adapters) adapters2 = adapter_parser.parse_multi(args.adapters2) except (FileNotFoundError, ValueError) as e: raise CommandLineError(e) warn_duplicate_adapters(adapters) warn_duplicate_adapters(adapters2) if args.debug: for adapter in adapters + adapters2: adapter.enable_debug() # Create the processing pipeline if paired: pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter pipeline = PairedEndPipeline(pair_filter_mode) else: pipeline = SingleEndPipeline() # When adapters are being trimmed only in R1 or R2, override the pair filter mode # as using the default of 'any' would regard all read pairs as untrimmed. if paired and (not adapters2 or not adapters) and (args.discard_untrimmed or args.untrimmed_output or args.untrimmed_paired_output): pipeline.override_untrimmed_pair_filter = True add_unconditional_cutters(pipeline, args.cut, args.cut2, paired) pipeline_add = pipeline.add_both if paired else pipeline.add if args.nextseq_trim is not None: pipeline_add( NextseqQualityTrimmer(args.nextseq_trim, args.quality_base)) if args.quality_cutoff is not None: cutoffs = parse_cutoffs(args.quality_cutoff) pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base)) if args.pair_adapters: try: cutter = PairedAdapterCutter(adapters, adapters2, args.action) except PairedAdapterCutterError as e: raise CommandLineError("--pair-adapters: " + str(e)) pipeline.add_paired_modifier(cutter) else: adapter_cutter, adapter_cutter2 = None, None if adapters: adapter_cutter = AdapterCutter(adapters, args.times, args.action) if adapters2: adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action) if paired: if adapter_cutter or adapter_cutter2: pipeline.add(adapter_cutter, adapter_cutter2) else: if adapter_cutter: pipeline.add(adapter_cutter) for modifier in modifiers_applying_to_both_ends_if_paired(args): pipeline_add(modifier) # Set filtering parameters # Minimum/maximum length for attr in 'minimum_length', 'maximum_length': param = getattr(args, attr) if param is not None: lengths = parse_lengths(param) if not paired and len(lengths) == 2: raise CommandLineError( 'Two minimum or maximum lengths given for single-end data') if paired and len(lengths) == 1: lengths = (lengths[0], lengths[0]) setattr(pipeline, attr, lengths) pipeline.max_n = args.max_n pipeline.discard_casava = args.discard_casava pipeline.discard_trimmed = args.discard_trimmed pipeline.discard_untrimmed = args.discard_untrimmed return pipeline