# One-off script prepared to provide data to David Hibbett and Romina Gazis. # Lists numbers of species in each fungal order. from org.opentreeoflife.taxa import Taxonomy import csv, sys from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng taxonomies = [('fung', load_fung(), 'Index Fungorum'), ('ncbi', load_ncbi(), 'NCBI'), ('gbif', load_gbif(), 'GBIF'), ('irmng', load_irmng(), 'IRMNG'), ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'), ] def main(): infile = open('order-counts-orders.csv', 'r') reader = csv.reader(infile) reader.next() #header row taxa = ['Fungi'] for tuple in reader: taxa.append(tuple[0]) infile.close() write_counts(taxa) def write_counts(taxa): outfile = open('order-counts.csv', 'w') writer = csv.writer(outfile) header = ['order'] for (name, taxonomy, label) in taxonomies: header += [label + ' bin', label + ' sp', label + ' tip']
def prepare_gbif(ott): gbif = taxonomies.load_gbif() ott.addSource(gbif) gbif.taxon('Viruses').hide() # Fungi suppressed at David Hibbett's request gbif.taxon('Fungi').hideDescendantsToRank('species') # Suppressed at Laura Katz's request gbif.taxon('Bacteria','life').hideDescendants() gbif.taxon('Archaea','life').hideDescendants() # - Alignment - #ott.same(gbif.taxon('Cyanobacteria'), silva.taxon('Cyanobacteria','Cyanobacteria')) #'D88288/#3' # Automatic alignment makes the wrong choice for this one # ott.same(ncbi.taxon('5878'), gbif.taxon('10')) # Ciliophora ott.same(ott.taxon('Ciliophora', 'Alveolata'), gbif.taxon('10')) # in Protozoa # Not needed? # ott.same(ott.taxon('Ciliophora', 'Ascomycota'), gbif.taxon('3269382')) # in Fungi # Automatic alignment makes the wrong choice for this one # NCBI says ncbi:29178 is in Rhizaria in Eukaryota and contains Allogromida (which is not in GBIF) # OTT 2.8 has 936399 = in Retaria (which isn't in NCBI) extinct_inherited ? - no good. # GBIF 389 is in Protozoa... but it contains nothing!! No way to identify it other than by id. # amoeboid ... ott.same(ott.taxon('Foraminifera', 'Rhizaria'), gbif.taxon('389')) # Foraminifera gbif:4983431 # Tetrasphaera is a messy multi-way homonym #### Check: was ncbi.taxon ott.same(ott.taxon('Tetrasphaera','Intrasporangiaceae'), gbif.taxon('Tetrasphaera','Intrasporangiaceae')) # Bad alignments to NCBI # #### THESE NEED TO BE CHECKED - was ncbi.taxon # Labyrinthomorpha (synonym for Labyrinthulomycetes) # No longer in GBIF... the one in IRMNG is a Cambrian sponge-like thing # ott.notSame(ott.taxon('Labyrinthomorpha', 'Stramenopiles'), gbif.taxon('Labyrinthomorpha')) # ott.notSame(ott.taxon('Ophiurina', 'Echinodermata'), gbif.taxon('Ophiurina','Ophiurinidae')) # taken care of in taxonomies.py # There is a test for this. The GBIF taxon no longer exists. # ott.notSame(ott.taxon('Rhynchonelloidea', 'Brachiopoda'), gbif.taxon('Rhynchonelloidea')) # There are tests. Seems OK ott.notSame(ott.taxonThatContains('Neoptera', 'Lepidoptera'), gbif.taxon('Neoptera', 'Diptera')) # ott.notSame(gbif.taxon('Tipuloidea', 'Chiliocyclidae'), ott.taxon('Tipuloidea', 'Diptera')) # genus Tipuloidea # taken care of in taxonomies.py # ### CHECK: was silva.taxon # SILVA = GN013951 = Tetrasphaera (bacteria) ott.notSame(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'), gbif.taxon('Gorkadinium', 'Dinophyta')) # = Tetrasphaera in Protozoa # Rick Ree 2014-03-28 https://github.com/OpenTreeOfLife/reference-taxonomy/issues/37 # ### CHECK: was ncbi.taxon ott.same(ott.taxon('Calothrix', 'Rivulariaceae'), gbif.taxon('Calothrix', 'Rivulariaceae')) ott.same(ott.taxon('Chlorella', 'Chlorellaceae'), gbif.taxon('Chlorella', 'Chlorellaceae')) ott.same(ott.taxon('Myrmecia', 'Microthamniales'), gbif.taxon('Myrmecia', 'Microthamniales')) # JAR 2014-04-18 attempt to resolve ambiguous alignment of # Trichosporon in IF and GBIF based on common member # ott.same(fungorum.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'), # gbif.taxonThatContains('Trichosporon', 'Trichosporon cutaneum')) # doesn't work. brute force. # was: ott.same(fungorum.taxon('10296'), gbif.taxon('2518163')) = ott:364222 ##### RECOVER THIS IF NECESSARY # ott.same(fungi.taxon('10296'), ott.taxon('364222')) # Obviously the same genus, can't tell what's going on # if:17806 = Hygrocybe = ott:282216 # #### CHECK: was fungi ott.same(gbif.taxon('Hygrocybe'), ott.taxon('Hygrocybe', 'Hygrophoraceae')) # JAR 2014-04-23 More sample contamination in SILVA 115 # redundant # ott.same(gbif.taxon('Lamprospora'), fungi.taxon('Lamprospora')) # JAR 2014-04-23 IF update fallout # ### CHECK: was ncbi.taxon ott.same(gbif.taxonThatContains('Penicillium', 'Penicillium expansum'), ott.taxonThatContains('Penicillium', 'Penicillium expansum')) # https://github.com/OpenTreeOfLife/feedback/issues/45 # ### CHECK: was ncbi.taxon if False: ott.same(gbif.taxon('Choanoflagellida'), ott.taxon('Choanoflagellida', 'Opisthokonta')) return gbif
# One-off script prepared to provide data to David Hibbett and Romina Gazis. # Lists numbers of species in each fungal order. from org.opentreeoflife.smasher import Taxonomy import csv, sys from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng taxonomies = [ ("fung", load_fung(), "Index Fungorum"), ("ncbi", load_ncbi(), "NCBI"), ("gbif", load_gbif(), "GBIF"), ("irmng", load_irmng(), "IRMNG"), ("ott", Taxonomy.getTaxonomy("tax/ott/"), "OTT 2.9"), ] def main(): infile = open("order-counts-orders.csv", "r") reader = csv.reader(infile) reader.next() # header row taxa = ["Fungi"] for tuple in reader: taxa.append(tuple[0]) infile.close() write_counts(taxa) def write_counts(taxa): outfile = open("order-counts.csv", "w") writer = csv.writer(outfile)
def prepare_gbif(ott): gbif = taxonomies.load_gbif() gbif.setTarget(ott) gbif.taxon('Viruses').hide() # Fungi suppressed at David Hibbett's request gbif.taxon('Fungi').hideDescendantsToRank('species') # Suppressed at Laura Katz's request gbif.taxon('Bacteria', 'life').hideDescendants() gbif.taxon('Archaea', 'life').hideDescendants() # - Alignment - #ott.same(gbif.taxon('Cyanobacteria'), silva.taxon('Cyanobacteria','Cyanobacteria')) #'D88288/#3' # Automatic alignment makes the wrong choice for this one # ott.same(ncbi.taxon('5878'), gbif.taxon('10')) # Ciliophora ott.same(ott.taxon('Ciliophora', 'Alveolata'), gbif.taxon('10')) # in Protozoa # Not needed? # ott.same(ott.taxon('Ciliophora', 'Ascomycota'), gbif.taxon('3269382')) # in Fungi # Automatic alignment makes the wrong choice for this one # NCBI says ncbi:29178 is in Rhizaria in Eukaryota and contains Allogromida (which is not in GBIF) # OTT 2.8 has 936399 = in Retaria (which isn't in NCBI) extinct_inherited ? - no good. # GBIF 389 is in Protozoa... but it contains nothing!! No way to identify it other than by id. # amoeboid ... ott.same(ott.taxon('Foraminifera', 'Rhizaria'), gbif.taxon('389')) # Foraminifera gbif:4983431 # Tetrasphaera is a messy multi-way homonym #### Check: was ncbi.taxon ott.same(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'), gbif.taxon('Tetrasphaera', 'Intrasporangiaceae')) # Bad alignments to NCBI # #### THESE NEED TO BE CHECKED - was ncbi.taxon # Labyrinthomorpha (synonym for Labyrinthulomycetes) # No longer in GBIF... the one in IRMNG is a Cambrian sponge-like thing # ott.notSame(ott.taxon('Labyrinthomorpha', 'Stramenopiles'), gbif.taxon('Labyrinthomorpha')) # ott.notSame(ott.taxon('Ophiurina', 'Echinodermata'), gbif.taxon('Ophiurina','Ophiurinidae')) # taken care of in taxonomies.py # There is a test for this. The GBIF taxon no longer exists. # ott.notSame(ott.taxon('Rhynchonelloidea', 'Brachiopoda'), gbif.taxon('Rhynchonelloidea')) # There are tests. Seems OK ott.notSame(ott.taxonThatContains('Neoptera', 'Lepidoptera'), gbif.taxon('Neoptera', 'Diptera')) # ott.notSame(gbif.taxon('Tipuloidea', 'Chiliocyclidae'), ott.taxon('Tipuloidea', 'Diptera')) # genus Tipuloidea # taken care of in taxonomies.py # ### CHECK: was silva.taxon # SILVA = GN013951 = Tetrasphaera (bacteria) ott.notSame(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'), gbif.taxon('Gorkadinium', 'Dinophyta')) # = Tetrasphaera in Protozoa # Rick Ree 2014-03-28 https://github.com/OpenTreeOfLife/reference-taxonomy/issues/37 # ### CHECK: was ncbi.taxon ott.same(ott.taxon('Calothrix', 'Rivulariaceae'), gbif.taxon('Calothrix', 'Rivulariaceae')) ott.same(ott.taxon('Chlorella', 'Chlorellaceae'), gbif.taxon('Chlorella', 'Chlorellaceae')) ott.same(ott.taxon('Myrmecia', 'Microthamniales'), gbif.taxon('Myrmecia', 'Microthamniales')) # JAR 2014-04-18 attempt to resolve ambiguous alignment of # Trichosporon in IF and GBIF based on common member # ott.same(fungorum.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'), # gbif.taxonThatContains('Trichosporon', 'Trichosporon cutaneum')) # doesn't work. brute force. # was: ott.same(fungorum.taxon('10296'), gbif.taxon('2518163')) = ott:364222 ##### RECOVER THIS IF NECESSARY # ott.same(fungi.taxon('10296'), ott.taxon('364222')) # Obviously the same genus, can't tell what's going on # if:17806 = Hygrocybe = ott:282216 # #### CHECK: was fungi ott.same(gbif.taxon('Hygrocybe'), ott.taxon('Hygrocybe', 'Hygrophoraceae')) # JAR 2014-04-23 More sample contamination in SILVA 115 # redundant # ott.same(gbif.taxon('Lamprospora'), fungi.taxon('Lamprospora')) # JAR 2014-04-23 IF update fallout # ### CHECK: was ncbi.taxon ott.same(gbif.taxonThatContains('Penicillium', 'Penicillium expansum'), ott.taxonThatContains('Penicillium', 'Penicillium expansum')) # https://github.com/OpenTreeOfLife/feedback/issues/45 # ### CHECK: was ncbi.taxon if False: ott.same(gbif.taxon('Choanoflagellida'), ott.taxon('Choanoflagellida', 'Opisthokonta')) return gbif