def run(self, outputFile): img = IMG() print 'Identifying all IMG prokaryotic genomes with valid data.' metadata = img.genomeMetadata() genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata) genomeMissingData = img.genomesWithMissingData(genomeIds) genomeIds -= genomeMissingData print ' Identified %d valid genomes.' % (len(genomeIds)) print 'Calculating gene copy number for each genome.' countTable = img.geneCountTable(genomeIds) counts = [] for _, count in countTable['pfam00318'].iteritems(): counts.append(count) print len(genomeIds) print len(counts) print mean(counts) fout = open(outputFile, 'w') fout.write(str(countTable)) fout.close() print 'Gene count dictionary to: ' + outputFile
def run(self, outputFile): img = IMG() print('Identifying all IMG prokaryotic genomes with valid data.') metadata = img.genomeMetadata() genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata) genomeMissingData = img.genomesWithMissingData(genomeIds) genomeIds -= genomeMissingData print(' Identified %d valid genomes.' % (len(genomeIds))) print('Calculating gene copy number for each genome.') countTable = img.geneCountTable(genomeIds) counts = [] for _, count in countTable['pfam00318'].iteritems(): counts.append(count) print(len(genomeIds)) print(len(counts)) print(mean(counts)) fout = open(outputFile, 'w') fout.write(str(countTable)) fout.close() print('Gene count dictionary to: ' + outputFile)