class StaphAureusN315Recipe(BaseRecipe):

    def __init__(self):
        super(StaphAureusN315Recipe, self).__init__()
        self.source = SourceFile(self.repo, "staph_aureus_n315.fna.gz",
            "https://www.ncbi.nlm.nih.gov/nuccore/NC_002745.2?report=fasta&log$=seqview&format=text")
        self.bt2 = ConstructedFile(self.repo, "staph_aureus_n315.bt2")

    def name(self):
        return 'staph_aureus_n315'

    def fileTypes(self):
        return ['gz_fasta_nucl', 'bt2_index']

    def resultSchema(self):
        return {
            'fasta': 'gz_fasta_nucl',
            'bt2': ['bt2_index'] * 6
        }

    def makeRecipe(self):
        self.source.resolve()
        self.repo.saveFiles(self,
                            'fasta',
                            self.source.filepath())
        self.bt2.resolve()
        bt2Indices = glob(self.bt2.filepath() + '*')
        self.repo.saveFiles(self,
                            'bt2',
                            *bt2Indices)
Пример #2
0
class Uniref90Recipe(BaseRecipe):
    '''
    Recipe for uniref90 with diamond index
    '''

    def __init__(self):
        super(Uniref90Recipe, self).__init__()
        #self.source = SourceFile(self.repo, "uniref90.faa.gz")
        self.dmnd = ConstructedFile(self.repo, "uniref90_annotated.1.1.dmnd")

    def name(self):
        return 'uniref90'

    def fileTypes(self):
        return ['gz_fasta_aa', 'dmnd-db']

    def resultSchema(self):
        return {
            'fasta': 'gz_fasta_aa',
            'dmnd': 'dmnd-db'
        }

    def makeRecipe(self):
        #self.source.resolve()
        #self.repo.saveFiles(self,
                            #'fasta',
                            #self.source.filepath())
        self.dmnd.resolve()
        self.repo.saveFiles(self,
                            'dmnd',
                            self.dmnd.filepath())
Пример #3
0
class CARDRecipe(BaseRecipe):
    '''
    Recipe for the comprehensive antibiotic resistance
    database
    '''
    def __init__(self):
        super(CARDRecipe, self).__init__()
        self.source = SourceFile(self.repo, "card.faa.gz")
        self.sbred = ConstructedFile(self.repo, "card.shortbred_markers.faa")
        self.dmnd = ConstructedFile(self.repo, "card.dmnd")

    def name(self):
        return 'card'

    def fileTypes(self):
        return ['gz_fasta_aa', 'fasta_aa', 'dmnd-db']

    def resultSchema(self):
        return {
            'fasta': 'gz_fasta_aa',
            'sbred': 'fasta_aa',
            'dmnd': 'dmnd-db',
        }

    def makeRecipe(self):
        self.source.resolve()
        self.repo.saveFiles(self, 'fasta', self.source.filepath())
        self.sbred.resolve()
        self.repo.saveFiles(self, 'sbred', self.sbred.filepath())
        self.dmnd.resolve()
        self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
Пример #4
0
class HG38UCSCGenomeRecipe(BaseRecipe):
    '''
    Recipe for the hg38 genome from UCSC.
    This genome is used in the CAP because:
    - hg38 is the most recent as of December 2017
    - it includes alt contigs. Bad for variant calling, good for filtering
    - it uses human readable chromosome names
    - it uses the rCRS mitochondrial sequence
    '''

    def __init__(self):
        super(HG38UCSCGenomeRecipe, self).__init__()
        self.source = SourceFile(self.repo, "hg38_ucsc.fna.gz",
            "http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz")
        self.bt2 = ConstructedFile(self.repo, "hg38_ucsc.bt2")

    def name(self):
        return 'hg38_ucsc'

    def fileTypes(self):
        return ['gz_fasta_nucl', 'bt2_index']

    def resultSchema(self):
        return {
            'fasta': 'gz_fasta_nucl',
            'bt2': ['bt2_index'] * 6
        }

    def makeRecipe(self):
        self.source.resolve()
        self.repo.saveFiles(self,
                            'fasta',
                            self.source.filepath())
        self.bt2.resolve()
        bt2Indices = glob(self.bt2.filepath() + '*')
        self.repo.saveFiles(self,
                            'bt2',
                            *bt2Indices)
Пример #5
0
class MeganRecipe(BaseRecipe):
    '''
    Recipe for uniref90 with diamond index
    '''
    def __init__(self):
        super(MeganRecipe, self).__init__()
        self.acc2taxa = SourceFile(self.repo, "prot_acc2tax-May2017.abin")
        self.blast2lca = ConstructedFile(self.repo, "blast2lca")

    def name(self):
        return 'megan'

    def fileTypes(self):
        return ['megan_table']

    def resultSchema(self):
        return {'acc2taxa': 'megan_table', 'blast2lca': 'megan_table'}

    def makeRecipe(self):
        self.acc2taxa.resolve()
        self.repo.saveFiles(self, 'acc2taxa', self.acc2taxa.filepath())
        self.blast2lca.resolve()
        self.repo.saveFiles(self, 'blast2lca', self.blast2lca.filepath())
Пример #6
0
class NRProteinRecipe(BaseRecipe):
    '''
    Recipe for uniref90 with diamond index
    '''
    def __init__(self):
        super(NRProteinRecipe, self).__init__()
        self.fasta = SourceFile(self.repo, "nr.faa.gz")
        self.dmnd = ConstructedFile(self.repo, "nr.dmnd")

    def name(self):
        return 'nr_protein'

    def fileTypes(self):
        return ['gz_fasta_aa', 'dmnd-db']

    def resultSchema(self):
        return {'fasta': 'gz_fasta_aa', 'dmnd': 'dmnd-db'}

    def makeRecipe(self):
        self.fasta.resolve()
        self.repo.saveFiles(self, 'fasta', self.fasta.filepath())
        self.dmnd.resolve()
        self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
Пример #7
0
class MethyltransferaseRecipe(BaseRecipe):
    '''
    Recipe for the comprehensive antibiotic resistance
    database
    '''
    def __init__(self):
        super(MethyltransferaseRecipe, self).__init__()
        self.fasta = SourceFile(self.repo, "methyls.faa")
        self.dmnd = ConstructedFile(self.repo, "methyls.dmnd")

    def name(self):
        return 'methyl'

    def fileTypes(self):
        return ['gz_fasta_aa', 'dmnd-db']

    def resultSchema(self):
        return {'fasta': 'gz_fasta_aa', 'dmnd': 'dmnd-db'}

    def makeRecipe(self):
        self.fasta.resolve()
        self.repo.saveFiles(self, 'fasta', self.fasta.filepath())
        self.dmnd.resolve()
        self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
class CommonMacrobialRecipe(BaseRecipe):
    '''
    Recipe for database of common macrobial genomes
    '''
    def __init__(self):
        super(CommonMacrobialRecipe, self).__init__()
        self.source = SourceFile(self.repo, "macrobes.fna.gz")
        self.bt2 = ConstructedFile(self.repo, "common_macrobial.bt2")

    def name(self):
        return 'common_macrobial'

    def fileTypes(self):
        return ['gz_fasta_nucl', 'bt2_index']

    def resultSchema(self):
        return {'fasta': 'gz_fasta_nucl', 'bt2': ['bt2_index'] * 6}

    def makeRecipe(self):
        self.source.resolve()
        self.repo.saveFiles(self, 'fasta', self.source.filepath())
        self.bt2.resolve()
        bt2Indices = glob(self.bt2.filepath() + '*')
        self.repo.saveFiles(self, 'bt2', *bt2Indices)
Пример #9
0
class MegaresRecipe(BaseRecipe):
    def __init__(self):
        super(MegaresRecipe, self).__init__()
        self.fasta = SourceFile(self.repo, "megares.fa")
        self.bt2 = ConstructedFile(self.repo, "megares.bt2")
        self.csv = SourceFile(self.repo, "megares.csv")

    def name(self):
        return 'megares'

    def fileTypes(self):
        return ['fasta_nucl', 'bt2_index', 'csv']

    def resultSchema(self):
        return {'fasta': 'fasta_nucl', 'bt2': ['bt2_index'] * 6, 'csv': 'csv'}

    def makeRecipe(self):
        self.fasta.resolve()
        self.repo.saveFiles(self, 'fasta', self.fasta.filepath())
        self.csv.resolve()
        self.repo.saveFiles(self, 'csv', self.csv.filepath())
        self.bt2.resolve()
        bt2Indices = glob(self.bt2.filepath() + '*')
        self.repo.saveFiles(self, 'bt2', *bt2Indices)