def run(self, threshold=3.0): # Dereplicate # sh.usearch7("--derep_fulllength", self.reads, '-output', self.derep, '-sizeout') # Order by size and kill singeltons # sh.usearch7("--sortbysize", self.derep, '-output', self.sorted, '-minsize', 2) # Compute the centers # sh.usearch7("--cluster_otus", self.sorted, '-otus', self.centers, '-otu_radius_pct', threshold) # Rename the centers # self.centers.rename_with_num('OTU_') # Map the reads back to the centers # identity = (100 - threshold) / 100 sh.usearch7("-usearch_global", self.reads, '-db', self.centers, '-strand', 'plus', '-id', identity, '-uc', self.readmap)
def run(self, threshold=None): # Optional threshold # if threshold is None: threshold = self.threshold identity = (100 - threshold) / 100 # Dereplicate (uparse version 32bit version runs out of memory) # if False: sh.usearch7("--derep_fulllength", self.reads, '-output', self.derep, '-sizeout') sh.fasta_make_unique(self.reads, self.derep) # Order by size and kill singeltons # sh.usearch7("--sortbysize", self.derep, '-output', self.sorted, '-minsize', 2) # Compute the centers # sh.usearch7("--cluster_otus", self.sorted, '-otus', self.centers, '-otu_radius_pct', threshold) # Rename the centers # self.centers.rename_with_num('OTU-') # Map the reads back to the centers # sh.usearch7("-usearch_global", self.reads, '-db', self.centers, '-strand', 'plus', '-id', identity, '-uc', self.readmap)
# Internal modules # import illumitag from plumbing.common import natural_sort from plumbing.autopaths import AutoPaths, FilePath from plumbing.cache import property_cached, LazyString from fasta import FASTA, SizesFASTA from illumitag.clustering.otu import OTUs from illumitag.clustering.taxonomy.crest import CrestTaxonomy from illumitag.clustering.taxonomy.rdp import RdpTaxonomy from illumitag.clustering.source.seqenv_wrapper import Seqenv # Third party modules # import sh, pandas # Constants # uparse_version = LazyString(lambda: sh.usearch7('-version').stdout[8:].strip('\n')) ############################################################################### class UparseOTUs(OTUs): """Will use uparse to create OTU clusters from a given FASTA file http://www.nature.com/doifinder/10.1038/nmeth.2604""" short_name = 'uparse' title = 'UPARSE denovo picking' article = "http://www.nature.com/doifinder/10.1038/nmeth.2604" version = uparse_version threshold = 3.0 all_paths = """ /derep.fasta /sorted.fasta