def test_consensusEcGraph_difference(self): FEV_KEGG.startProcessPool() enterobacteriales_organisms_abbreviations = [ 'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe', 'buc' ] enterobacteriales_organisms = Organism.Group( organismAbbreviations=enterobacteriales_organisms_abbreviations) enterobacteriales_organisms_abbreviations = [ 'eco', 'ses', 'sfl', 'ent', 'esa', 'kpn', 'cko', 'ype', 'spe', 'buc' ] gammaproteobacteria_organisms_abbreviations = [ 'hin', 'mht', 'xcc', 'vch', 'pae', 'acb', 'son', 'pha', 'amc', 'lpn', 'ftu', 'aha' ] gammaproteobacteria_organisms_abbreviations.extend( enterobacteriales_organisms_abbreviations ) # extend with the sub-set, because they are also part of the set gammaproteobacteria_organisms = Organism.Group( organismAbbreviations=gammaproteobacteria_organisms_abbreviations) enterobacteriales_EC_graph = enterobacteriales_organisms.consensusEcGraph( noMultifunctional=True) gammaproteobacteria_EC_graph = gammaproteobacteria_organisms.consensusEcGraph( noMultifunctional=True) enterobacteriales_EC_set = enterobacteriales_EC_graph.getECs() gammaproteobacteria_EC_set = gammaproteobacteria_EC_graph.getECs() only_enterobacteriales_EC_set = enterobacteriales_EC_set.difference( gammaproteobacteria_EC_set) output = [] for ec in only_enterobacteriales_EC_set: output.append(ec.__str__()) result = len(output) print(str(result) + ' results') self.assertEqual(result, 87) enterobacteriales_enzyme_graph = enterobacteriales_organisms.collectiveEnzymeGraphByEcConsensus( noMultifunctional=True) gammaproteobacteria_enzyme_graph = gammaproteobacteria_organisms.collectiveEnzymeGraphByEcConsensus( noMultifunctional=True) enterobacteriales_enzyme_graph.removeMultifunctionalEnzymes() gammaproteobacteria_enzyme_graph.removeMultifunctionalEnzymes() enterobacteriales_enzymes = enterobacteriales_enzyme_graph.getEnzymes() gammaproteobacteria_enzymes = gammaproteobacteria_enzyme_graph.getEnzymes( ) enterobacteriales_EC_set_2 = set() for enzyme in enterobacteriales_enzymes: ecNumbers = enzyme.ecNumbers enterobacteriales_EC_set_2.update(ecNumbers) gammaproteobacteria_EC_set_2 = set() for enzyme in gammaproteobacteria_enzymes: ecNumbers = enzyme.ecNumbers gammaproteobacteria_EC_set_2.update(ecNumbers) only_enterobacteriales_EC_set_2 = enterobacteriales_EC_set_2.difference( gammaproteobacteria_EC_set_2) output = [] for ec in only_enterobacteriales_EC_set_2: output.append(ec.__str__()) result2 = len(output) print(str(result2) + ' results') self.assertEqual(result2, result) output = [] for ec in only_enterobacteriales_EC_set_2.symmetric_difference( only_enterobacteriales_EC_set): output.append(ec.__str__()) result3 = len(output) print(str(result3) + ' results') self.assertEqual(result3, 0) for ecString in output: print(ecString)
'pha', 'pin', 'plu', 'ppr', 'rma', 'saz', 'sde', 'sdn', 'shm', 'tcx', 'vfi', 'vvu', 'xca' ] organisms = representativeOrganisms output.append('Representative:') elif i == 2: #- 2. get group of organisms 'Gammaproteobacteria', excluding unclassified organisms = taxonomy.getOrganismAbbreviationsByPath( 'Gammaproteobacteria', exceptPaths='unclassified', oneOrganismPerSpecies=False) output.append('\nGammaproteobacteria without unclassified:') group = Organism.Group(organisms) #- REPEAT for varying majority-percentages: for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]: #- calculate EC numbers occuring in group's core metabolism ourECnumbers = group.majorityEcGraph( majorityPercentage=percentage, noMultifunctional=False).getECs() #- reduce set of EC numbers to first three levels ourECnumbers = EcNumber.insertWildcards(ourECnumbers, keepLevels=3, allowHigherWildcards=False) #- overlap Poot-Hernandez' set with ours and print amount of EC numbers inside the intersection and falling off either side
""" from FEV_KEGG.Evolution.Events import SimpleGeneDuplication, ChevronGeneDuplication, NeofunctionalisedECs,\ NeofunctionalisedEnzymes from FEV_KEGG.Evolution.Taxonomy import NCBI import FEV_KEGG.KEGG.Organism as Organism if __name__ == '__main__': output = [] #- get NCBI taxonomy tree taxonomy = NCBI.getTaxonomy() #- get group of organisms 'Archaea/Thaumarchaeota' group = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Archaea/Thaumarchaeota', oneOrganismPerSpecies=False)) #- get supergroup of organisms 'Archaea' supergroup = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Archaea', oneOrganismPerSpecies=False)) #- calculate new EC numbers occuring in group's core metabolism compared to supergroup's core metabolism newECs = group.consensusEcGraph( noMultifunctional=True).getECs().difference( supergroup.consensusEcGraph(noMultifunctional=True).getECs()) output.append('new EC numbers: ' + str(len(newECs))) #- calculate neofunctionalised EC numbers in group's core metabolism descendantEnzymeGraph = group.collectiveEnzymeGraphByEcConsensus(
'2.5.1.15', '2.5.1.19', '2.5.1.7', '2.5.1.9', '2.6.1.16', '2.7.1.107', '2.7.1.130', '2.7.1.23', '2.7.1.24', '2.7.1.26', '2.7.1.33', '2.7.2.3', '2.7.4.6', '2.7.4.8', '2.7.4.9', '2.7.6.3', '2.7.7.18', '2.7.7.2', '2.7.7.23', '2.7.7.27', '2.7.7.3', '2.7.7.38', '2.7.7.41', '2.7.8.5', '2.7.8.8', '3.1.3.45', '3.5.4.16', '3.5.4.25', '3.5.4.26', '3.6.1.1', '3.6.1.34', '3.6.1.45', '4.1.1.36', '4.1.1.65', '4.1.2.13', '4.1.2.16', '4.1.2.25', '4.2.1.10', '4.2.1.11', '4.6.1.3', '4.6.1.4', '5.1.1.3', '5.3.1.1', '5.3.1.13', '6.3.2.12', '6.3.2.13', '6.3.2.15', '6.3.2.4', '6.3.2.5', '6.3.2.8', '6.3.2.9' ] theirECnumbers = set() for string in theirECnumberStrings: theirECnumbers.add(EcNumber(string)) #- get group of organisms 'Escherichia coli' eco = Organism.Organism('eco') #- calculate EC numbers occuring in eco's core metabolism ourECnumbersWithWildcard = eco.substanceEcGraph( noMultifunctional=True).getECs() ourECnumbers = EcNumber.removeWildcards(ourECnumbersWithWildcard) #- overlap Almaas' set with ours and print amount of EC numbers inside the intersection and falling off either side onlyInTheirs = theirECnumbers.difference(ourECnumbers) inBoth = theirECnumbers.intersection(ourECnumbers) onlyInOurs = ourECnumbers.difference(theirECnumbers) output.append( str(len(onlyInTheirs)) + '\t' + str(len(inBoth)) + '\t' + str(len(onlyInOurs)))
'2.7.4.6', '2.7.4.8', '2.7.4.9', '2.7.6.3', '2.7.7.18', '2.7.7.2', '2.7.7.23', '2.7.7.27', '2.7.7.3', '2.7.7.38', '2.7.7.41', '2.7.8.5', '2.7.8.8', '3.1.3.45', '3.5.4.16', '3.5.4.25', '3.5.4.26', '3.6.1.1', '3.6.1.34', '3.6.1.45', '4.1.1.36', '4.1.1.65', '4.1.2.13', '4.1.2.16', '4.1.2.25', '4.2.1.10', '4.2.1.11', '4.6.1.3', '4.6.1.4', '5.1.1.3', '5.3.1.1', '5.3.1.13', '6.3.2.12', '6.3.2.13', '6.3.2.15', '6.3.2.4', '6.3.2.5', '6.3.2.8', '6.3.2.9' ] theirECnumbers = set() for string in theirECnumberStrings: theirECnumbers.add(EcNumber(string)) #- get group of organisms 'Escherichia coli' taxonomy = NCBI.getTaxonomy() group = Organism.Group( taxonomy.getOrganismAbbreviationsByPath('Escherichia coli', oneOrganismPerSpecies=False)) #- REPEAT for varying majority-percentages: for percentage in [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1]: #- calculate EC numbers occuring in group's core metabolism ourECnumbersWithWildcard = group.majorityEcGraph( majorityPercentage=percentage, noMultifunctional=True).getECs() ourECnumbers = EcNumber.removeWildcards(ourECnumbersWithWildcard) #- overlap Almaas' set with ours and print amount of EC numbers inside the intersection and falling off either side onlyInTheirs = theirECnumbers.difference(ourECnumbers) inBoth = theirECnumbers.intersection(ourECnumbers) onlyInOurs = ourECnumbers.difference(theirECnumbers)