def test(): tax = Taxonomy.newTaxonomy() tax.newTaxon('Mouse', 'species', 'about:blank') tax.newTaxon('Dog', 'species', 'about:blank') tax.newTaxon('Mammal', 'class', 'about:blank') new_claims = [ Has_child('Mammal', 'Mouse', 'about:blank'), Whether_same('Mouse', 'Mus', True), Whether_same('Muus', 'Mouse', True), ] expectations = [ Has_child('Mammal', 'Mouse'), Has_child('Mammal', With_ancestor('Mouse', 'Mammal')), Has_child(With_descendant('Mammal', 'Mouse'), 'Mouse'), Whether_same('Mammal', 'Mammal', True), Whether_same('Mammal', 'Meemmal', False), Whether_same('Mouse', 'Mus', True), Whether_same('Mus', 'Mouse', True), Whether_same('Mus', 'Horse', False), ] surprises = [ Has_child('Mouse', 'Mammal'), Has_child('Mammal', 'Dog'), # fails Whether_same('Mammal', 'Meemmal', True), Whether_same('Mammal', 'Mammal', False), ] make_claims(tax, new_claims) passed = test_claims(tax, expectations) passed = test_claims(tax, new_claims) find_surprises(tax, surprises) return passed
# Jython script to build the "model village" taxonomy. from org.opentreeoflife.smasher import Taxonomy # Create model taxonomy tax = Taxonomy.newTaxonomy() # Establish homonym-resolution skeleton (not really used here) skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() tax.absorb(ncbi) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() tax.absorb(gbif) # "Old" patch system with tab-delimited files tax.edit('t/edits/') # Example of referring to a taxon fam = tax.taxon("Phellinaceae")
from org.opentreeoflife.smasher import Taxonomy from org.opentreeoflife.smasher import Reportx import taxonomies ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) def report(tax, tag): ott.markDivisions(tax) # Reportx.bogotypes(tax) taxonomies.checkDivisions(tax) Reportx.report(tax, tag + '-mrca-report.tsv') if True: report(taxonomies.loadIrmng(), 'irmng') else: silva = taxonomies.loadSilva() ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'), skel.taxon('Ctenophora')) report(silva, 'silva') report(taxonomies.loadH2007(), 'h2007') report(taxonomies.loadFung(), 'if') report(taxonomies.loadNcbi(), 'ncbi') report(taxonomies.loadGbif(), 'gbif') report(taxonomies.loadIrmng(), 'irmng') report(taxonomies.loadOtt(), 'ott')
# Jython script to build the Open Tree reference taxonomy # coding=utf-8 # Unless specified otherwise issues are in the reference-taxonomy repo: # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/... import sys from org.opentreeoflife.smasher import Taxonomy import taxonomies sys.path.append("feed/misc/") from chromista_spreadsheet import fixChromista ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) # ----- SILVA microbial taxonomy ----- def doSilva(): silva = taxonomies.loadSilva() # - Deal with parent/child homonyms in SILVA - # Arbitrary choices here to eliminate ambiguities down the road when NCBI gets merged. # (If the homonym is retained, then the merge algorithm will have no # way to choose between them, and refuse to match either. It will # then create a third homonym.) # Note order dependence between the following two silva.taxon('Intramacronucleata','Intramacronucleata').rename('Intramacronucleata inf.') silva.taxon('Spirotrichea','Intramacronucleata inf.').rename('Spirotrichea inf.') silva.taxon('Cyanobacteria','Bacteria').rename('Cyanobacteria sup.')
# Jython script to build the "model village" taxonomy. from org.opentreeoflife.smasher import Taxonomy # Create model taxonomy tax = Taxonomy.newTaxonomy() # Establish homonym-resolution skeleton (not really used here) skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() tax.absorb(ncbi) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() tax.absorb(gbif) # "Old" patch system with tab-delimited files tax.edit('t/edits/') # Example of referring to a taxon
def create_ott(): ott = Taxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus','Eukaryota','992764'), ('Epiphloea','Halymeniaceae','5342325'), ('Hessea','Archaeplastida','600099'), ('Morganella','Arthropoda','6400'), ('Rhynchonelloidea','Rhynchonellidae','5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott
def create_ott(): ott = Taxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus', 'Eukaryota', '992764'), ('Epiphloea', 'Halymeniaceae', '5342325'), ('Hessea', 'Archaeplastida', '600099'), ('Morganella', 'Arthropoda', '6400'), ('Rhynchonelloidea', 'Rhynchonellidae', '5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott