示例#1
0
 def __call__(self):
     kmer_size = self.kmer_size or 12
     n_reads = self.max_reads
     overrep_cutoff = 100
     include = self.include_contaminants or "all"
     known_contaminants = None
     if include != 'unknown':
         known_contaminants = self.load_known_adapters()
     
     detector = self.detector
     if not detector:
         if known_contaminants and include == 'known':
             detector = 'known'
         elif n_reads <= 50000:
             detector = 'heuristic'
         else:
             detector = 'khmer'
     
     detector_args = dict(known_contaminants=known_contaminants)
         
     if detector == 'known':
         logging.getLogger().debug(
             "Detecting contaminants using the known-only algorithm")
         detector_class = KnownContaminantDetector
         detector_args['min_kmer_match_frac'] = self.min_kmer_match_frac
     elif detector == 'heuristic':
         logging.getLogger().debug(
             "Detecting contaminants using the heuristic algorithm")
         detector_class = HeuristicDetector
         detector_args['min_frequency'] = self.min_frequency
         detector_args['min_contaminant_match_frac'] = \
             self.min_contaminant_match_frac
     elif detector == 'khmer':
         logging.getLogger().debug(
             "Detecting contaminants using the kmer-based algorithm")
         detector_class = KhmerDetector
     
     summary_args = dict(
         kmer_size=kmer_size, n_reads=n_reads, 
         overrep_cutoff=overrep_cutoff, include=include,
         past_end_bases=self.past_end_bases)
     detector_args.update(summary_args)
     
     if self.paired:
         detector = PairedDetector(detector_class, **detector_args)
     else:
         detector = detector_class(**detector_args)
     
     self.summary['detect'] = summary_args
     if known_contaminants:
         self.summary['detect']['known_contaminants'] = \
             known_contaminants.summarize()
     
     logging.getLogger().info(
         "Detecting adapters and other potential contaminant "
         "sequences based on %d-mers in %d reads", kmer_size, n_reads)
     
     # currently only single-threaded operation is supproted
     self.summary.update(mode='serial', threads=1)
     return run_interruptible(detector, self, raise_on_error=True)
示例#2
0
    def run(self):
        """Run the pipeline.

        Returns:
            The return code.
        """
        retcode = run_interruptible(self)
        self.terminate(retcode)
        return retcode
示例#3
0
 def run(self):
     """Run the pipeline.
     
     Returns:
         The return code.
     """
     retcode = run_interruptible(self)
     self.terminate(retcode)
     return retcode
示例#4
0
 def __call__(self):
     if self.paired:
         pipeline_class = PairedEndQcPipeline
     else:
         pipeline_class = SingleEndQcPipeline
     pipeline_args = dict(
         qualities=self.delivers_qualities,
         quality_base=self.quality_base)
     if self.stats:
         pipeline_args.update(self.stats)
     
     if self.threads is None:
         self.summary.update(mode='serial', threads=1)
         pipeline = pipeline_class(**pipeline_args)
         return run_interruptible(pipeline, self)
     else:
         self.summary.update(mode='parallel', threads=self.threads)
         return self.run_parallel(pipeline_class, pipeline_args)
示例#5
0
    def __call__(self):
        if not self.delivers_qualities:
            raise ValueError(
                "Cannot estimate error rate without base qualities")

        if self.algorithm == 'quality':
            estimator_class = BaseQualityErrorEstimator
        elif self.algorithm == 'shadow':
            estimator_class = ShadowRegressionErrorEstimator

        estimator_args = dict(max_read_len=self.max_bases)
        if self.paired:
            estimator = PairedErrorEstimator(estimator_class=estimator_class,
                                             **estimator_args)
        else:
            estimator = estimator_class(**estimator_args)

        self.summary['errorrate'] = estimator_args

        # currently only single-threaded operation is supproted
        self.summary.update(mode='serial', threads=1)
        return run_interruptible(estimator, self, raise_on_error=True)
示例#6
0
 def __call__(self):
     if not self.delivers_qualities:
         raise ValueError(
             "Cannot estimate error rate without base qualities")
     
     if self.algorithm == 'quality':
         estimator_class = BaseQualityErrorEstimator
     elif self.algorithm == 'shadow':
         estimator_class = ShadowRegressionErrorEstimator
     
     estimator_args = dict(max_read_len=self.max_bases)
     if self.paired:
         estimator = PairedErrorEstimator(
             estimator_class=estimator_class, **estimator_args)
     else:
         estimator = estimator_class(**estimator_args)
     
     self.summary['errorrate'] = estimator_args
     
     # currently only single-threaded operation is supproted
     self.summary.update(mode='serial', threads=1)
     return run_interruptible(estimator, self, raise_on_error=True)
示例#7
0
文件: trim.py 项目: llllaaaa/atropos
def run_parallel(reader,
                 pipeline,
                 formatters,
                 writers,
                 threads=2,
                 timeout=30,
                 preserve_order=False,
                 input_queue_size=0,
                 result_queue_size=0,
                 use_writer_process=True,
                 compression=None):
    """
    Execute atropos in parallel mode.
    
    reader 				:: iterator over batches of reads (most likely a BatchIterator)
    pipeline 			::
    formatters          ::
    writers				::
    threads				:: number of worker threads to use; additional threads are used
                        for the main proccess and the writer process (if requested).
    timeout				:: number of seconds after which waiting processes escalate their
                        messages from DEBUG to ERROR.
    preserve_order 		:: whether to preserve the input order of reads when writing
                        (only valid when `use_writer_process=True`)
    input_queue_size 	:: max number of items that can be in the input queue, or 0 for
                        no limit (be warned that this could explode memory usage)
    result_queue_size	:: max number of items that can be in the result queue, or 0 for
                        no limit (be warned that this could explode memory usage)
    use_writer_process	:: if True, a separate thread will be used to write results to
                        disk. Otherwise, each worker thread will write its results to
                        an output file with a '.N' extension, where N is the thread index.
                        This is useful in cases where the I/O is the main bottleneck.
    compression	        If "writer", the writer process perform data compression, otherwise
                        the worker processes performs compression.
    """
    logging.getLogger().debug(
        "Starting atropos in parallel mode with threads={}, timeout={}".format(
            threads, timeout))

    if threads < 2:
        raise ValueError("'threads' must be >= 2")

    # Reserve a thread for the writer process if it will be doing the compression and if one is available.
    if compression is None:
        compression = "writer" if use_writer_process and can_use_system_compression(
        ) else "worker"
    if compression == "writer" and threads > 2:
        threads -= 1

    timeout = max(timeout, RETRY_INTERVAL)

    # Queue by which batches of reads are sent to worker processes
    input_queue = Queue(input_queue_size)
    # Queue by which results are sent from the worker processes to the writer process
    result_queue = Queue(result_queue_size)
    # Queue for processes to send summary information back to main process
    summary_queue = Queue(threads)
    # Aggregate summary
    summary = Summary(trimmer_classes=pipeline.modifiers.get_trimmer_classes())

    if use_writer_process:
        worker_result_handler = QueueResultHandler(result_queue)
        if compression == "writer":
            worker_result_handler = WorkerResultHandler(worker_result_handler)
        else:
            worker_result_handler = CompressingWorkerResultHandler(
                worker_result_handler)

        # Shared variable for communicating with writer thread
        writer_control = Control(CONTROL_ACTIVE)
        # result handler
        if preserve_order:
            writer_result_handler = OrderPreservingWriterResultHandler(
                writers, compressed=compression == "worker")
        else:
            writer_result_handler = WriterResultHandler(
                writers, compressed=compression == "worker")
        # writer process
        writer_process = ResultProcess(writer_result_handler, result_queue,
                                       writer_control, timeout)
        writer_process.start()
    else:
        worker_result_handler = WorkerResultHandler(
            WriterResultHandler(writers, use_suffix=True))

    # Start worker processes, reserve a thread for the reader process,
    # which we will get back after it completes
    worker_args = (input_queue, summary_queue, timeout, worker_result_handler,
                   pipeline, formatters)
    worker_processes = launch_workers(threads - 1, TrimWorkerProcess,
                                      worker_args)

    def ensure_alive():
        ensure_processes(worker_processes)
        if (use_writer_process
                and not (writer_process.is_alive()
                         and writer_control.check_value(CONTROL_ACTIVE))):
            raise Exception("Writer process exited")

    def _run(worker_processes):
        # Add batches of reads to the input queue. Provide a timeout callback
        # to check that subprocesses are alive.
        num_batches = enqueue_all(enumerate(reader, 1), input_queue, timeout,
                                  ensure_alive)
        logging.getLogger().debug(
            "Main loop complete; saw {} batches".format(num_batches))

        # Tell the worker processes no more input is coming
        enqueue_all((None, ) * threads, input_queue, timeout, ensure_alive)

        # Tell the writer thread the max number of batches to expect
        if use_writer_process:
            writer_control.set_value(num_batches)

        # Now that the reader process is done, it essentially
        # frees up another thread to use for a worker
        worker_processes.extend(
            launch_workers(1,
                           TrimWorkerProcess,
                           worker_args,
                           offset=threads - 1))

        # Wait for all summaries to be available on queue
        def summary_timeout_callback():
            try:
                ensure_processes(
                    worker_processes,
                    "Workers are still alive and haven't returned summaries: {}",
                    alive=False)
            except Exception as e:
                logging.getLogger().error(e)

        wait_on(lambda: summary_queue.full(),
                wait_message="Waiting on worker summaries {}",
                timeout=timeout,
                wait=True,
                timeout_callback=summary_timeout_callback)

        # Process summary information from worker processes
        logging.getLogger().debug(
            "Processing summary information from worker processes")
        seen_summaries = set()
        seen_batches = set()

        def summary_fail_callback():
            missing_summaries = set(range(1, threads)) - seen_summaries
            raise Exception("Missing summaries from processes {}".format(
                ",".join(str(s) for s in missing)))

        for i in range(1, threads + 1):
            batch = dequeue(summary_queue, fail_callback=summary_fail_callback)
            worker_index, worker_batches, process_stats, adapter_stats = batch
            if process_stats is None or adapter_stats is None:
                raise Exception(
                    "Worker process {} died unexpectedly".format(worker_index))
            else:
                logging.getLogger().debug(
                    "Processing summary for worker {}".format(worker_index))
            seen_summaries.add(worker_index)
            seen_batches |= worker_batches
            summary.add_process_stats(process_stats)
            summary.add_adapter_stats(adapter_stats)

        # Check if any batches were missed
        if num_batches > 0:
            missing_batches = set(range(1, num_batches + 1)) - seen_batches
            if len(missing_batches) > 0:
                raise Exception("Workers did not process batches {}".format(
                    ",".join(str(b) for b in missing_batches)))

        if use_writer_process:
            # Wait for writer to complete
            wait_on_process(writer_process, timeout)

    try:
        rc = run_interruptible(_run, worker_processes)
    finally:
        # notify all threads that they should stop
        logging.getLogger().debug("Exiting all processes")

        def kill(process):
            if rc <= 1:
                wait_on_process(process, timeout, terminate=True)
            elif process.is_alive():
                process.terminate()

        for process in worker_processes:
            kill(process)
        if use_writer_process:
            kill(writer_process)

    report = summary.finish() if rc == 0 else None
    details = dict(mode='parallel', hreads=threads)
    return (rc, report, details)
示例#8
0
 def __call__(self):
     options = self.options
     match_probability = RandomMatchProbability()
     
     # Create Adapters
     
     has_adapters1 = options.adapters or options.anywhere or options.front
     has_adapters2 = options.adapters2 or options.anywhere2 or options.front2
     
     adapters1 = adapters2 = []
     if has_adapters1 or has_adapters2:
         adapter_cache = super().load_known_adapters()
         parser_args = dict(
             colorspace=options.colorspace,
             max_error_rate=options.error_rate,
             min_overlap=options.overlap,
             read_wildcards=options.match_read_wildcards,
             adapter_wildcards=options.match_adapter_wildcards,
             indels=options.indels, indel_cost=options.indel_cost,
             cache=adapter_cache, gc_content=options.gc_content,
             match_probability=match_probability, alphabet=options.alphabet)
         if options.adapter_max_rmp:
             parser_args['max_rmp'] = options.adapter_max_rmp
         adapter_parser = AdapterParser(**parser_args)
         
         if has_adapters1:
             adapters1 = adapter_parser.parse_multi(
                 options.adapters, options.anywhere, options.front)
         if has_adapters2:
             adapters2 = adapter_parser.parse_multi(
                 options.adapters2, options.anywhere2, options.front2)
         
         if options.cache_adapters:
             adapter_cache.save()
     
     # Create Modifiers
     
     # TODO: can this be replaced with an argparse required group?
     if (
             not adapters1 and not adapters2 and
             not options.quality_cutoff and
             options.nextseq_trim is None and
             options.cut == [] and options.cut2 == [] and
             options.cut_min == [] and options.cut_min2 == [] and
             (
                 options.minimum_length is None or
                 options.minimum_length <= 0) and
             options.maximum_length == sys.maxsize and not options.trim_n and
             not self.has_qualfile and options.max_n is None and
             (not options.paired or options.overwrite_low_quality is None)):
         raise ValueError(
             "You need to provide at least one adapter sequence.")
     
     if (
             options.aligner == 'insert' and any(
                 not a or len(a) != 1 or a[0].where != BACK
                 for a in (adapters1, adapters2))):
         raise ValueError(
             "Insert aligner requires a single 3' adapter for each read")
     
     if options.debug:
         for adapter in adapters1 + adapters2:
             adapter.enable_debug()
     
     if options.paired:
         modifiers = PairedEndModifiers(options.paired)
     else:
         modifiers = SingleEndModifiers()
     
     for oper in options.op_order:
         if oper == 'W' and options.overwrite_low_quality:
             lowq, highq, window = options.overwrite_low_quality
             modifiers.add_modifier(
                 OverwriteRead,
                 worse_read_min_quality=lowq, better_read_min_quality=highq,
                 window_size=window, base=options.quality_base)
             
         elif oper == 'A' and (adapters1 or adapters2):
             # TODO: generalize this using some kind of factory class
             if options.aligner == 'insert':
                 # Use different base probabilities if we're trimming
                 # bisulfite data.
                 # TODO: this doesn't seem to help things, so commenting it
                 # out for now
                 #if options.bisulfite:
                 #   base_probs = dict(match_prob=0.33, mismatch_prob=0.67)
                 # else:
                 #   base_probs = dict(match_prob=0.25, mismatch_prob=0.75)
                 modifiers.add_modifier(
                     InsertAdapterCutter,
                     adapter1=adapters1[0], adapter2=adapters2[0],
                     action=options.action,
                     mismatch_action=options.correct_mismatches,
                     max_insert_mismatch_frac=\
                         options.insert_match_error_rate,
                     max_adapter_mismatch_frac=\
                         options.insert_match_adapter_error_rate,
                     match_probability=match_probability,
                     insert_max_rmp=options.insert_max_rmp,
                     read_wildcards=options.match_read_wildcards,
                     adapter_wildcards=options.match_adapter_wildcards)
             else:
                 a1_args = dict(
                     adapters=adapters1,
                     times=options.times,
                     action=options.action) if adapters1 else None
                 a2_args = dict(
                     adapters=adapters2,
                     times=options.times,
                     action=options.action) if adapters2 else None
                 modifiers.add_modifier_pair(AdapterCutter, a1_args, a2_args)
         elif oper == 'C' and (options.cut or options.cut2):
             modifiers.add_modifier_pair(
                 UnconditionalCutter,
                 dict(lengths=options.cut),
                 dict(lengths=options.cut2))
         elif oper == 'G' and (options.nextseq_trim is not None):
             modifiers.add_modifier(
                 NextseqQualityTrimmer,
                 cutoff=options.nextseq_trim,
                 base=options.quality_base)
         elif oper == 'Q' and options.quality_cutoff:
             modifiers.add_modifier(
                 QualityTrimmer,
                 cutoff_front=options.quality_cutoff[0],
                 cutoff_back=options.quality_cutoff[1],
                 base=options.quality_base)
     
     if options.bisulfite:
         if isinstance(options.bisulfite, str):
             if "non-directional" in options.bisulfite:
                 modifiers.add_modifier(
                     NonDirectionalBisulfiteTrimmer,
                     rrbs=options.bisulfite=="non-directional-rrbs")
             elif options.bisulfite == "rrbs":
                 modifiers.add_modifier(RRBSTrimmer)
             elif options.bisulfite in ("epignome", "truseq"):
                 # Trimming leads to worse results
                 #modifiers.add_modifier(TruSeqBisulfiteTrimmer)
                 pass
             elif options.bisulfite == "swift":
                 modifiers.add_modifier(SwiftBisulfiteTrimmer)
         else:
             if options.bisulfite[0]:
                 modifiers.add_modifier(
                     MinCutter, read=1, **(options.bisulfite[0]))
             if len(options.bisulfite) > 1 and options.bisulfite[1]:
                 modifiers.add_modifier(
                     MinCutter, read=2, **(options.bisulfite[1]))
     
     if options.trim_n:
         modifiers.add_modifier(NEndTrimmer)
     
     if options.cut_min or options.cut_min2:
         modifiers.add_modifier_pair(
             MinCutter,
             dict(lengths=options.cut_min),
             dict(lengths=options.cut_min2))
     
     if options.length_tag:
         modifiers.add_modifier(
             LengthTagModifier, length_tag=options.length_tag)
     
     if options.strip_suffix:
         modifiers.add_modifier(SuffixRemover, suffixes=options.strip_suffix)
     
     if options.prefix or options.suffix:
         modifiers.add_modifier(
             PrefixSuffixAdder, prefix=options.prefix, suffix=options.suffix)
     
     if options.double_encode:
         modifiers.add_modifier(DoubleEncoder)
     
     if options.zero_cap and self.delivers_qualities:
         modifiers.add_modifier(
             ZeroCapper, quality_base=options.quality_base)
     
     if options.trim_primer:
         modifiers.add_modifier(PrimerTrimmer)
     
     if options.merge_overlapping:
         modifiers.add_modifier(
             MergeOverlapping,
             min_overlap=options.merge_min_overlap,
             error_rate=options.merge_error_rate,
             mismatch_action=options.correct_mismatches)
     
     # Create Filters and Formatters
     
     min_affected = 2 if options.pair_filter == 'both' else 1
     filters = Filters(FilterFactory(options.paired, min_affected))
     
     output1 = output2 = None
     interleaved = False
     if options.interleaved_output:
         output1 = options.interleaved_output
         interleaved = True
     else:
         output1 = options.output
         output2 = options.paired_output
     
     seq_formatter_args = dict(
         qualities=self.delivers_qualities,
         colorspace=options.colorspace,
         interleaved=interleaved
     )
     formatters = Formatters(output1, seq_formatter_args)
     force_create = []
         
     if options.merge_overlapping:
         filters.add_filter(MergedReadFilter)
         if options.merged_output:
             formatters.add_seq_formatter(
                 MergedReadFilter, options.merged_output)
         
     if options.minimum_length is not None and options.minimum_length > 0:
         filters.add_filter(TooShortReadFilter, options.minimum_length)
         if options.too_short_output:
             formatters.add_seq_formatter(
                 TooShortReadFilter,
                 options.too_short_output, options.too_short_paired_output)
     
     if options.maximum_length < sys.maxsize:
         filters.add_filter(TooLongReadFilter, options.maximum_length)
         if options.too_long_output is not None:
             formatters.add_seq_formatter(
                 TooLongReadFilter,
                 options.too_long_output, options.too_long_paired_output)
     
     if options.max_n is not None:
         filters.add_filter(NContentFilter, options.max_n)
     
     if options.discard_trimmed:
         filters.add_filter(TrimmedFilter)
     
     if not formatters.multiplexed:
         if output1 is not None:
             formatters.add_seq_formatter(NoFilter, output1, output2)
             if output1 != STDOUT and options.writer_process:
                 force_create.append(output1)
                 if output2 is not None:
                     force_create.append(output2)
         elif not (options.discard_trimmed and options.untrimmed_output):
             formatters.add_seq_formatter(NoFilter, options.default_outfile)
             if options.default_outfile != STDOUT and options.writer_process:
                 force_create.append(options.default_outfile)
     
     if options.discard_untrimmed or options.untrimmed_output:
         filters.add_filter(UntrimmedFilter)
     
     if not options.discard_untrimmed:
         if formatters.multiplexed:
             untrimmed = options.untrimmed_output or output1.format(
                 name='unknown')
             formatters.add_seq_formatter(UntrimmedFilter, untrimmed)
             formatters.add_seq_formatter(NoFilter, untrimmed)
         elif options.untrimmed_output:
             formatters.add_seq_formatter(
                 UntrimmedFilter,
                 options.untrimmed_output, options.untrimmed_paired_output)
     
     if options.rest_file:
         formatters.add_info_formatter(RestFormatter(options.rest_file))
     if options.info_file:
         formatters.add_info_formatter(InfoFormatter(options.info_file))
     if options.wildcard_file:
         formatters.add_info_formatter(
             WildcardFormatter(options.wildcard_file))
     
     if options.paired:
         mixin_class = PairedEndPipelineMixin
     else:
         mixin_class = SingleEndPipelineMixin
     writers = Writers(force_create)
     record_handler = RecordHandler(modifiers, filters, formatters)
     if options.stats:
         record_handler = StatsRecordHandlerWrapper(
             record_handler, options.paired, options.stats,
             qualities=self.delivers_qualities,
             quality_base=self.quality_base)
     
     logger = logging.getLogger()
     num_adapters = sum(len(a) for a in modifiers.get_adapters())
     logger.info(
         "Trimming %s adapter%s with at most %.1f%% errors in %s mode ...",
         num_adapters,
         's' if num_adapters > 1 else '', options.error_rate * 100,
         {
             False: 'single-end',
             'first': 'paired-end legacy',
             'both': 'paired-end'
         }[options.paired])
     if (
             options.paired == 'first' and (
                 len(record_handler.modifiers.get_modifiers(read=2)) > 0 or
                 options.quality_cutoff)):
         logger.warning('\n'.join(textwrap.wrap(
             'Requested read modifications are applied only to the '
             'first read since backwards compatibility mode is enabled. '
             'To modify both reads, also use any of the -A/-B/-G/-U '
             'options. Use a dummy adapter sequence when necessary: '
             '-A XXX')))
     
     if options.threads is None:
         # Run single-threaded version
         result_handler = WorkerResultHandler(WriterResultHandler(writers))
         pipeline_class = type(
             'TrimPipelineImpl', (mixin_class, TrimPipeline), {})
         pipeline = pipeline_class(record_handler, result_handler)
         self.summary.update(mode='serial', threads=1)
         return run_interruptible(pipeline, self, raise_on_error=True)
     else:
         # Run multiprocessing version
         self.summary.update(mode='parallel', threads=options.threads)
         return self.run_parallel(record_handler, writers, mixin_class)
示例#9
0
    def __call__(self):
        options = self.options
        match_probability = RandomMatchProbability()

        # Create Adapters

        has_adapters1 = options.adapters or options.anywhere or options.front
        has_adapters2 = options.adapters2 or options.anywhere2 or options.front2

        adapters1 = adapters2 = []
        if has_adapters1 or has_adapters2:
            adapter_cache = super().load_known_adapters()
            parser_args = dict(
                colorspace=options.colorspace,
                max_error_rate=options.error_rate,
                min_overlap=options.overlap,
                read_wildcards=options.match_read_wildcards,
                adapter_wildcards=options.match_adapter_wildcards,
                indels=options.indels,
                indel_cost=options.indel_cost,
                cache=adapter_cache,
                gc_content=options.gc_content,
                match_probability=match_probability,
                alphabet=options.alphabet)
            if options.adapter_max_rmp:
                parser_args['max_rmp'] = options.adapter_max_rmp
            adapter_parser = AdapterParser(**parser_args)

            if has_adapters1:
                adapters1 = adapter_parser.parse_multi(options.adapters,
                                                       options.anywhere,
                                                       options.front)
            if has_adapters2:
                adapters2 = adapter_parser.parse_multi(options.adapters2,
                                                       options.anywhere2,
                                                       options.front2)

            if options.cache_adapters:
                adapter_cache.save()

        # Create Modifiers

        # TODO: can this be replaced with an argparse required group?
        if (not adapters1 and not adapters2 and not options.quality_cutoff
                and options.nextseq_trim is None and options.cut == []
                and options.cut2 == [] and options.cut_min == []
                and options.cut_min2 == [] and
            (options.minimum_length is None or options.minimum_length <= 0)
                and options.maximum_length == sys.maxsize
                and not options.trim_n and not self.has_qualfile
                and options.max_n is None and
            (not options.paired or options.overwrite_low_quality is None)):
            raise ValueError(
                "You need to provide at least one adapter sequence.")

        if (options.aligner == 'insert'
                and any(not a or len(a) != 1 or a[0].where != BACK
                        for a in (adapters1, adapters2))):
            raise ValueError(
                "Insert aligner requires a single 3' adapter for each read")

        if options.debug:
            for adapter in adapters1 + adapters2:
                adapter.enable_debug()

        if options.paired:
            modifiers = PairedEndModifiers(options.paired)
        else:
            modifiers = SingleEndModifiers()

        for oper in options.op_order:
            if oper == 'W' and options.overwrite_low_quality:
                lowq, highq, window = options.overwrite_low_quality
                modifiers.add_modifier(OverwriteRead,
                                       worse_read_min_quality=lowq,
                                       better_read_min_quality=highq,
                                       window_size=window,
                                       base=options.quality_base)

            elif oper == 'A' and (adapters1 or adapters2):
                # TODO: generalize this using some kind of factory class
                if options.aligner == 'insert':
                    # Use different base probabilities if we're trimming
                    # bisulfite data.
                    # TODO: this doesn't seem to help things, so commenting it
                    # out for now
                    #if options.bisulfite:
                    #   base_probs = dict(match_prob=0.33, mismatch_prob=0.67)
                    # else:
                    #   base_probs = dict(match_prob=0.25, mismatch_prob=0.75)
                    modifiers.add_modifier(
                        InsertAdapterCutter,
                        adapter1=adapters1[0], adapter2=adapters2[0],
                        action=options.action,
                        mismatch_action=options.correct_mismatches,
                        max_insert_mismatch_frac=\
                            options.insert_match_error_rate,
                        max_adapter_mismatch_frac=\
                            options.insert_match_adapter_error_rate,
                        match_probability=match_probability,
                        insert_max_rmp=options.insert_max_rmp,
                        read_wildcards=options.match_read_wildcards,
                        adapter_wildcards=options.match_adapter_wildcards)
                else:
                    a1_args = dict(
                        adapters=adapters1,
                        times=options.times,
                        action=options.action) if adapters1 else None
                    a2_args = dict(
                        adapters=adapters2,
                        times=options.times,
                        action=options.action) if adapters2 else None
                    modifiers.add_modifier_pair(AdapterCutter, a1_args,
                                                a2_args)
            elif oper == 'C' and (options.cut or options.cut2):
                modifiers.add_modifier_pair(UnconditionalCutter,
                                            dict(lengths=options.cut),
                                            dict(lengths=options.cut2))
            elif oper == 'G' and (options.nextseq_trim is not None):
                modifiers.add_modifier(NextseqQualityTrimmer,
                                       cutoff=options.nextseq_trim,
                                       base=options.quality_base)
            elif oper == 'Q' and options.quality_cutoff:
                modifiers.add_modifier(QualityTrimmer,
                                       cutoff_front=options.quality_cutoff[0],
                                       cutoff_back=options.quality_cutoff[1],
                                       base=options.quality_base)

        if options.bisulfite:
            if isinstance(options.bisulfite, str):
                if "non-directional" in options.bisulfite:
                    modifiers.add_modifier(
                        NonDirectionalBisulfiteTrimmer,
                        rrbs=options.bisulfite == "non-directional-rrbs")
                elif options.bisulfite == "rrbs":
                    modifiers.add_modifier(RRBSTrimmer)
                elif options.bisulfite in ("epignome", "truseq"):
                    # Trimming leads to worse results
                    #modifiers.add_modifier(TruSeqBisulfiteTrimmer)
                    pass
                elif options.bisulfite == "swift":
                    modifiers.add_modifier(SwiftBisulfiteTrimmer)
            else:
                if options.bisulfite[0]:
                    modifiers.add_modifier(MinCutter,
                                           read=1,
                                           **(options.bisulfite[0]))
                if len(options.bisulfite) > 1 and options.bisulfite[1]:
                    modifiers.add_modifier(MinCutter,
                                           read=2,
                                           **(options.bisulfite[1]))

        if options.trim_n:
            modifiers.add_modifier(NEndTrimmer)

        if options.cut_min or options.cut_min2:
            modifiers.add_modifier_pair(MinCutter,
                                        dict(lengths=options.cut_min),
                                        dict(lengths=options.cut_min2))

        if options.length_tag:
            modifiers.add_modifier(LengthTagModifier,
                                   length_tag=options.length_tag)

        if options.strip_suffix:
            modifiers.add_modifier(SuffixRemover,
                                   suffixes=options.strip_suffix)

        if options.prefix or options.suffix:
            modifiers.add_modifier(PrefixSuffixAdder,
                                   prefix=options.prefix,
                                   suffix=options.suffix)

        if options.double_encode:
            modifiers.add_modifier(DoubleEncoder)

        if options.zero_cap and self.delivers_qualities:
            modifiers.add_modifier(ZeroCapper,
                                   quality_base=options.quality_base)

        if options.trim_primer:
            modifiers.add_modifier(PrimerTrimmer)

        if options.merge_overlapping:
            modifiers.add_modifier(MergeOverlapping,
                                   min_overlap=options.merge_min_overlap,
                                   error_rate=options.merge_error_rate,
                                   mismatch_action=options.correct_mismatches)

        # Create Filters and Formatters

        min_affected = 2 if options.pair_filter == 'both' else 1
        filters = Filters(FilterFactory(options.paired, min_affected))

        output1 = output2 = None
        interleaved = False
        if options.interleaved_output:
            output1 = options.interleaved_output
            interleaved = True
        else:
            output1 = options.output
            output2 = options.paired_output

        seq_formatter_args = dict(qualities=self.delivers_qualities,
                                  colorspace=options.colorspace,
                                  interleaved=interleaved)
        formatters = Formatters(output1, seq_formatter_args)
        force_create = []

        if options.merge_overlapping:
            filters.add_filter(MergedReadFilter)
            if options.merged_output:
                formatters.add_seq_formatter(MergedReadFilter,
                                             options.merged_output)

        if options.minimum_length is not None and options.minimum_length > 0:
            filters.add_filter(TooShortReadFilter, options.minimum_length)
            if options.too_short_output:
                formatters.add_seq_formatter(TooShortReadFilter,
                                             options.too_short_output,
                                             options.too_short_paired_output)

        if options.maximum_length < sys.maxsize:
            filters.add_filter(TooLongReadFilter, options.maximum_length)
            if options.too_long_output is not None:
                formatters.add_seq_formatter(TooLongReadFilter,
                                             options.too_long_output,
                                             options.too_long_paired_output)

        if options.max_n is not None:
            filters.add_filter(NContentFilter, options.max_n)

        if options.discard_trimmed:
            filters.add_filter(TrimmedFilter)

        if not formatters.multiplexed:
            if output1 is not None:
                formatters.add_seq_formatter(NoFilter, output1, output2)
                if output1 != STDOUT and options.writer_process:
                    force_create.append(output1)
                    if output2 is not None:
                        force_create.append(output2)
            elif not (options.discard_trimmed and options.untrimmed_output):
                formatters.add_seq_formatter(NoFilter, options.default_outfile)
                if options.default_outfile != STDOUT and options.writer_process:
                    force_create.append(options.default_outfile)

        if options.discard_untrimmed or options.untrimmed_output:
            filters.add_filter(UntrimmedFilter)

        if not options.discard_untrimmed:
            if formatters.multiplexed:
                untrimmed = options.untrimmed_output or output1.format(
                    name='unknown')
                formatters.add_seq_formatter(UntrimmedFilter, untrimmed)
                formatters.add_seq_formatter(NoFilter, untrimmed)
            elif options.untrimmed_output:
                formatters.add_seq_formatter(UntrimmedFilter,
                                             options.untrimmed_output,
                                             options.untrimmed_paired_output)

        if options.rest_file:
            formatters.add_info_formatter(RestFormatter(options.rest_file))
        if options.info_file:
            formatters.add_info_formatter(InfoFormatter(options.info_file))
        if options.wildcard_file:
            formatters.add_info_formatter(
                WildcardFormatter(options.wildcard_file))

        if options.paired:
            mixin_class = PairedEndPipelineMixin
        else:
            mixin_class = SingleEndPipelineMixin
        writers = Writers(force_create)
        record_handler = RecordHandler(modifiers, filters, formatters)
        if options.stats:
            record_handler = StatsRecordHandlerWrapper(
                record_handler,
                options.paired,
                options.stats,
                qualities=self.delivers_qualities,
                quality_base=self.quality_base)

        logger = logging.getLogger()
        num_adapters = sum(len(a) for a in modifiers.get_adapters())
        logger.info(
            "Trimming %s adapter%s with at most %.1f%% errors in %s mode ...",
            num_adapters, 's' if num_adapters > 1 else '',
            options.error_rate * 100, {
                False: 'single-end',
                'first': 'paired-end legacy',
                'both': 'paired-end'
            }[options.paired])
        if (options.paired == 'first'
                and (len(record_handler.modifiers.get_modifiers(read=2)) > 0
                     or options.quality_cutoff)):
            logger.warning('\n'.join(
                textwrap.wrap(
                    'Requested read modifications are applied only to the '
                    'first read since backwards compatibility mode is enabled. '
                    'To modify both reads, also use any of the -A/-B/-G/-U '
                    'options. Use a dummy adapter sequence when necessary: '
                    '-A XXX')))

        if options.threads is None:
            # Run single-threaded version
            result_handler = WorkerResultHandler(WriterResultHandler(writers))
            pipeline_class = type('TrimPipelineImpl',
                                  (mixin_class, TrimPipeline), {})
            pipeline = pipeline_class(record_handler, result_handler)
            self.summary.update(mode='serial', threads=1)
            return run_interruptible(pipeline, self, raise_on_error=True)
        else:
            # Run multiprocessing version
            self.summary.update(mode='parallel', threads=options.threads)
            return self.run_parallel(record_handler, writers, mixin_class)