示例#1
0
    def __workerThread(self, ubiquityThreshold, singleCopyThreshold,
                       minGenomes, colocatedDistThreshold,
                       colocatedGenomeThreshold, metadata, queueIn, queueOut):
        """Process each data item in parallel."""

        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        while True:
            lineage = queueIn.get(block=True, timeout=None)
            if lineage == None:
                break

            if lineage == 'Universal':
                genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata)
            else:
                genomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            if len(genomeIds) >= minGenomes:
                markerSet = markerSetBuilder.buildMarkerSet(
                    genomeIds, ubiquityThreshold, singleCopyThreshold,
                    colocatedDistThreshold)
                colocatedSets = markerSet.markerSet
            else:
                colocatedSets = None

            # allow results to be processed or written to file
            queueOut.put((lineage, colocatedSets, len(genomeIds)))
    def __workerThread(self, ubiquityThreshold, singleCopyThreshold, 
                       minGenomes, 
                       colocatedDistThreshold, colocatedGenomeThreshold, 
                       metadata, 
                       queueIn, queueOut):
        """Process each data item in parallel."""
        
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        while True:
            lineage = queueIn.get(block=True, timeout=None)
            if lineage == None:
                break

            if lineage == 'Universal':
                genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata)
            else:
                genomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            if len(genomeIds) >= minGenomes:
                markerSet = markerSetBuilder.buildMarkerSet(genomeIds, ubiquityThreshold, singleCopyThreshold, colocatedDistThreshold)
                colocatedSets = markerSet.markerSet
            else:
                colocatedSets = None

            # allow results to be processed or written to file
            queueOut.put((lineage, colocatedSets, len(genomeIds)))
示例#3
0
    def __getUniversalMarkerGenes(self, phyloUbiquityThreshold,
                                  phyloSingleCopyThreshold, outputGeneDir):
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        metadata = img.genomeMetadata()

        allTrustedGenomeIds = set()
        phyloMarkerGenes = {}
        for lineage in ['Archaea', 'Bacteria']:
            # get all genomes in lineage
            print('\nIdentifying all ' + lineage + ' genomes.')
            trustedGenomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            print('  Trusted genomes in lineage: ' +
                  str(len(trustedGenomeIds)))
            if len(trustedGenomeIds) < 1:
                print(
                    '  Skipping lineage due to insufficient number of genomes.'
                )
                continue

            allTrustedGenomeIds.update(trustedGenomeIds)

            print('  Building marker set.')
            markerGenes = markerSetBuilder.buildMarkerGenes(
                trustedGenomeIds, phyloUbiquityThreshold,
                phyloSingleCopyThreshold)
            phyloMarkerGenes[lineage] = markerGenes

            #print lineage
            #print len(markerGenes)
            #print 'pfam01379: ', ('pfam01379' in markerGenes)
            #print '--------------------'

        # universal marker genes
        universalMarkerGenes = None
        for markerGenes in list(phyloMarkerGenes.values()):
            if universalMarkerGenes == None:
                universalMarkerGenes = markerGenes
            else:
                universalMarkerGenes.intersection_update(markerGenes)

        fout = open(os.path.join(outputGeneDir, 'phylo_marker_set.txt'), 'w')
        fout.write(str(universalMarkerGenes))
        fout.close()

        print('')
        print('  Universal marker genes: ' + str(len(universalMarkerGenes)))

        return allTrustedGenomeIds, universalMarkerGenes
    def __getUniversalMarkerGenes(self, phyloUbiquityThreshold, phyloSingleCopyThreshold, outputGeneDir):
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        metadata = img.genomeMetadata()
                        
        allTrustedGenomeIds = set()
        phyloMarkerGenes = {}
        for lineage in ['Archaea', 'Bacteria']:
            # get all genomes in lineage
            print '\nIdentifying all ' + lineage + ' genomes.'
            trustedGenomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            print '  Trusted genomes in lineage: ' + str(len(trustedGenomeIds))
            if len(trustedGenomeIds) < 1:
                print '  Skipping lineage due to insufficient number of genomes.'
                continue
            
            allTrustedGenomeIds.update(trustedGenomeIds)
            
            print '  Building marker set.'
            markerGenes = markerSetBuilder.buildMarkerGenes(trustedGenomeIds, phyloUbiquityThreshold, phyloSingleCopyThreshold)
            phyloMarkerGenes[lineage] = markerGenes
            
            #print lineage
            #print len(markerGenes)
            #print 'pfam01379: ', ('pfam01379' in markerGenes)
            #print '--------------------'

        # universal marker genes
        universalMarkerGenes = None
        for markerGenes in phyloMarkerGenes.values():
            if universalMarkerGenes == None:
                universalMarkerGenes = markerGenes
            else:
                universalMarkerGenes.intersection_update(markerGenes)

        fout = open(os.path.join(outputGeneDir, 'phylo_marker_set.txt'), 'w')
        fout.write(str(universalMarkerGenes))
        fout.close()

        print ''
        print '  Universal marker genes: ' + str(len(universalMarkerGenes))
        
        return allTrustedGenomeIds, universalMarkerGenes