Пример #1
0
def stipulate(args):
    """
    REQUIRED TO CREATE ITERABLE FUNCTIONS TO RUN IN CUTADAPT 2.7. THIS FUNCTION IS CALLED ONLY ONE TIME. 
    """
    modifiers = []
    pipeline_add = modifiers.append
    adapter_parser = AdapterParser(
        max_error_rate=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    adapters = adapter_parser.parse_multi(args.adapters)
    warn_duplicate_adapters(adapters)

    if args.nextseq_trim is not None:
        pipeline_add(NextseqQualityTrimmer(args.nextseq_trim, args.phred64))
    if args.quality_cutoff is not None:
        cutoffs = parse_cutoffs(args.quality_cutoff)
        pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.phred64))

    adapter_cutter = None
    if adapters:
        adapter_cutter = AdapterCutter(adapters, args.times, args.action)
        pipeline_add(adapter_cutter)
    if args.trim_n:
        pipeline_add(NEndTrimmer())
    add_unconditional_cutters(pipeline_add, args.cut)

    print("modifiers (cutadapt):", modifiers)
    return modifiers
Пример #2
0
def adapters_from_args(args) -> Tuple[List[Adapter], List[Adapter]]:
    adapter_parser = AdapterParser(
        max_errors=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    try:
        adapters = adapter_parser.parse_multi(args.adapters)
        adapters2 = adapter_parser.parse_multi(args.adapters2)
    except (FileNotFoundError, ValueError, InvalidCharacter) as e:
        raise CommandLineError(e)
    warn_duplicate_adapters(adapters)
    warn_duplicate_adapters(adapters2)
    if args.debug == "trace":
        for adapter in adapters + adapters2:
            adapter.enable_debug()
    return adapters, adapters2
Пример #3
0
def pipeline_from_parsed_args(args, paired, is_interleaved_output):
    """
    Setup a processing pipeline from parsed command-line arguments.

    If there are any problems parsing the arguments, a CommandLineError is raised.

    Return an instance of Pipeline (SingleEndPipeline or PairedEndPipeline)
    """
    check_arguments(args, paired, is_interleaved_output)
    if args.action == 'none':
        args.action = None

    adapter_parser = AdapterParser(
        max_error_rate=args.error_rate,
        min_overlap=args.overlap,
        read_wildcards=args.match_read_wildcards,
        adapter_wildcards=args.match_adapter_wildcards,
        indels=args.indels,
    )
    try:
        adapters = adapter_parser.parse_multi(args.adapters)
        adapters2 = adapter_parser.parse_multi(args.adapters2)
    except (FileNotFoundError, ValueError) as e:
        raise CommandLineError(e)
    warn_duplicate_adapters(adapters)
    warn_duplicate_adapters(adapters2)
    if args.debug:
        for adapter in adapters + adapters2:
            adapter.enable_debug()

    # Create the processing pipeline
    if paired:
        pair_filter_mode = 'any' if args.pair_filter is None else args.pair_filter
        pipeline = PairedEndPipeline(pair_filter_mode)
    else:
        pipeline = SingleEndPipeline()

    # When adapters are being trimmed only in R1 or R2, override the pair filter mode
    # as using the default of 'any' would regard all read pairs as untrimmed.
    if paired and (not adapters2
                   or not adapters) and (args.discard_untrimmed
                                         or args.untrimmed_output
                                         or args.untrimmed_paired_output):
        pipeline.override_untrimmed_pair_filter = True

    add_unconditional_cutters(pipeline, args.cut, args.cut2, paired)

    pipeline_add = pipeline.add_both if paired else pipeline.add

    if args.nextseq_trim is not None:
        pipeline_add(
            NextseqQualityTrimmer(args.nextseq_trim, args.quality_base))
    if args.quality_cutoff is not None:
        cutoffs = parse_cutoffs(args.quality_cutoff)
        pipeline_add(QualityTrimmer(cutoffs[0], cutoffs[1], args.quality_base))

    if args.pair_adapters:
        try:
            cutter = PairedAdapterCutter(adapters, adapters2, args.action)
        except PairedAdapterCutterError as e:
            raise CommandLineError("--pair-adapters: " + str(e))
        pipeline.add_paired_modifier(cutter)
    else:
        adapter_cutter, adapter_cutter2 = None, None
        if adapters:
            adapter_cutter = AdapterCutter(adapters, args.times, args.action)
        if adapters2:
            adapter_cutter2 = AdapterCutter(adapters2, args.times, args.action)
        if paired:
            if adapter_cutter or adapter_cutter2:
                pipeline.add(adapter_cutter, adapter_cutter2)
        else:
            if adapter_cutter:
                pipeline.add(adapter_cutter)

    for modifier in modifiers_applying_to_both_ends_if_paired(args):
        pipeline_add(modifier)

    # Set filtering parameters
    # Minimum/maximum length
    for attr in 'minimum_length', 'maximum_length':
        param = getattr(args, attr)
        if param is not None:
            lengths = parse_lengths(param)
            if not paired and len(lengths) == 2:
                raise CommandLineError(
                    'Two minimum or maximum lengths given for single-end data')
            if paired and len(lengths) == 1:
                lengths = (lengths[0], lengths[0])
            setattr(pipeline, attr, lengths)
    pipeline.max_n = args.max_n
    pipeline.discard_casava = args.discard_casava
    pipeline.discard_trimmed = args.discard_trimmed
    pipeline.discard_untrimmed = args.discard_untrimmed

    return pipeline