Пример #1
0
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.taxa import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [('fung', load_fung(), 'Index Fungorum'),
              ('ncbi', load_ncbi(), 'NCBI'),
              ('gbif', load_gbif(), 'GBIF'),
              ('irmng', load_irmng(), 'IRMNG'),
              ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'),
          ]

def main():
    infile = open('order-counts-orders.csv', 'r')
    reader = csv.reader(infile)
    reader.next()   #header row
    taxa = ['Fungi']
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)

def write_counts(taxa):
    outfile = open('order-counts.csv', 'w')
    writer = csv.writer(outfile)
    header = ['order']
    for (name, taxonomy, label) in taxonomies:
        header += [label + ' bin', label + ' sp', label + ' tip']
def prepare_gbif(ott):

    gbif = taxonomies.load_gbif()
    ott.addSource(gbif)

    gbif.taxon('Viruses').hide()

    # Fungi suppressed at David Hibbett's request
    gbif.taxon('Fungi').hideDescendantsToRank('species')

    # Suppressed at Laura Katz's request
    gbif.taxon('Bacteria','life').hideDescendants()
    gbif.taxon('Archaea','life').hideDescendants()

    # - Alignment -

    #ott.same(gbif.taxon('Cyanobacteria'), silva.taxon('Cyanobacteria','Cyanobacteria')) #'D88288/#3'

    # Automatic alignment makes the wrong choice for this one
    # ott.same(ncbi.taxon('5878'), gbif.taxon('10'))    # Ciliophora
    ott.same(ott.taxon('Ciliophora', 'Alveolata'), gbif.taxon('10'))  # in Protozoa
    # Not needed?
    # ott.same(ott.taxon('Ciliophora', 'Ascomycota'), gbif.taxon('3269382')) # in Fungi

    # Automatic alignment makes the wrong choice for this one
    # NCBI says ncbi:29178 is in Rhizaria in Eukaryota and contains Allogromida (which is not in GBIF)
    # OTT 2.8 has 936399 = in Retaria (which isn't in NCBI) extinct_inherited ? - no good.
    # GBIF 389 is in Protozoa... but it contains nothing!!  No way to identify it other than by id.
    #   amoeboid ...
    ott.same(ott.taxon('Foraminifera', 'Rhizaria'), gbif.taxon('389'))  # Foraminifera gbif:4983431

    # Tetrasphaera is a messy multi-way homonym
    #### Check: was ncbi.taxon
    ott.same(ott.taxon('Tetrasphaera','Intrasporangiaceae'), gbif.taxon('Tetrasphaera','Intrasporangiaceae'))

    # Bad alignments to NCBI
    # #### THESE NEED TO BE CHECKED - was ncbi.taxon

    # Labyrinthomorpha (synonym for Labyrinthulomycetes)
    # No longer in GBIF... the one in IRMNG is a Cambrian sponge-like thing
    # ott.notSame(ott.taxon('Labyrinthomorpha', 'Stramenopiles'), gbif.taxon('Labyrinthomorpha'))

    # ott.notSame(ott.taxon('Ophiurina', 'Echinodermata'), gbif.taxon('Ophiurina','Ophiurinidae'))
    #  taken care of in taxonomies.py

    # There is a test for this.  The GBIF taxon no longer exists.
    # ott.notSame(ott.taxon('Rhynchonelloidea', 'Brachiopoda'), gbif.taxon('Rhynchonelloidea'))

    # There are tests.  Seems OK
    ott.notSame(ott.taxonThatContains('Neoptera', 'Lepidoptera'), gbif.taxon('Neoptera', 'Diptera'))

    # ott.notSame(gbif.taxon('Tipuloidea', 'Chiliocyclidae'), ott.taxon('Tipuloidea', 'Diptera')) # genus Tipuloidea
    #  taken care of in taxonomies.py
    # ### CHECK: was silva.taxon
    # SILVA = GN013951 = Tetrasphaera (bacteria)

    ott.notSame(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'),
                gbif.taxon('Gorkadinium', 'Dinophyta')) # = Tetrasphaera in Protozoa

    # Rick Ree 2014-03-28 https://github.com/OpenTreeOfLife/reference-taxonomy/issues/37
    # ### CHECK: was ncbi.taxon
    ott.same(ott.taxon('Calothrix', 'Rivulariaceae'), gbif.taxon('Calothrix', 'Rivulariaceae'))
    ott.same(ott.taxon('Chlorella', 'Chlorellaceae'), gbif.taxon('Chlorella', 'Chlorellaceae'))
    ott.same(ott.taxon('Myrmecia', 'Microthamniales'), gbif.taxon('Myrmecia', 'Microthamniales'))

    # JAR 2014-04-18 attempt to resolve ambiguous alignment of
    # Trichosporon in IF and GBIF based on common member
    # ott.same(fungorum.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'),
    #          gbif.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'))
    # doesn't work.  brute force.
    # was: ott.same(fungorum.taxon('10296'), gbif.taxon('2518163')) = ott:364222
    ##### RECOVER THIS IF NECESSARY
    # ott.same(fungi.taxon('10296'), ott.taxon('364222'))

    # Obviously the same genus, can't tell what's going on
    # if:17806 = Hygrocybe = ott:282216
    # #### CHECK: was fungi
    ott.same(gbif.taxon('Hygrocybe'), ott.taxon('Hygrocybe', 'Hygrophoraceae'))

    # JAR 2014-04-23 More sample contamination in SILVA 115
    # redundant
    # ott.same(gbif.taxon('Lamprospora'), fungi.taxon('Lamprospora'))

    # JAR 2014-04-23 IF update fallout
    # ### CHECK: was ncbi.taxon
    ott.same(gbif.taxonThatContains('Penicillium', 'Penicillium expansum'),
             ott.taxonThatContains('Penicillium', 'Penicillium expansum'))

    # https://github.com/OpenTreeOfLife/feedback/issues/45
    # ### CHECK: was ncbi.taxon
    if False:
        ott.same(gbif.taxon('Choanoflagellida'),
                 ott.taxon('Choanoflagellida', 'Opisthokonta'))

    return gbif
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.smasher import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [
    ("fung", load_fung(), "Index Fungorum"),
    ("ncbi", load_ncbi(), "NCBI"),
    ("gbif", load_gbif(), "GBIF"),
    ("irmng", load_irmng(), "IRMNG"),
    ("ott", Taxonomy.getTaxonomy("tax/ott/"), "OTT 2.9"),
]


def main():
    infile = open("order-counts-orders.csv", "r")
    reader = csv.reader(infile)
    reader.next()  # header row
    taxa = ["Fungi"]
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)


def write_counts(taxa):
    outfile = open("order-counts.csv", "w")
    writer = csv.writer(outfile)
Пример #4
0
def prepare_gbif(ott):

    gbif = taxonomies.load_gbif()
    gbif.setTarget(ott)

    gbif.taxon('Viruses').hide()

    # Fungi suppressed at David Hibbett's request
    gbif.taxon('Fungi').hideDescendantsToRank('species')

    # Suppressed at Laura Katz's request
    gbif.taxon('Bacteria', 'life').hideDescendants()
    gbif.taxon('Archaea', 'life').hideDescendants()

    # - Alignment -

    #ott.same(gbif.taxon('Cyanobacteria'), silva.taxon('Cyanobacteria','Cyanobacteria')) #'D88288/#3'

    # Automatic alignment makes the wrong choice for this one
    # ott.same(ncbi.taxon('5878'), gbif.taxon('10'))    # Ciliophora
    ott.same(ott.taxon('Ciliophora', 'Alveolata'),
             gbif.taxon('10'))  # in Protozoa
    # Not needed?
    # ott.same(ott.taxon('Ciliophora', 'Ascomycota'), gbif.taxon('3269382')) # in Fungi

    # Automatic alignment makes the wrong choice for this one
    # NCBI says ncbi:29178 is in Rhizaria in Eukaryota and contains Allogromida (which is not in GBIF)
    # OTT 2.8 has 936399 = in Retaria (which isn't in NCBI) extinct_inherited ? - no good.
    # GBIF 389 is in Protozoa... but it contains nothing!!  No way to identify it other than by id.
    #   amoeboid ...
    ott.same(ott.taxon('Foraminifera', 'Rhizaria'),
             gbif.taxon('389'))  # Foraminifera gbif:4983431

    # Tetrasphaera is a messy multi-way homonym
    #### Check: was ncbi.taxon
    ott.same(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'),
             gbif.taxon('Tetrasphaera', 'Intrasporangiaceae'))

    # Bad alignments to NCBI
    # #### THESE NEED TO BE CHECKED - was ncbi.taxon

    # Labyrinthomorpha (synonym for Labyrinthulomycetes)
    # No longer in GBIF... the one in IRMNG is a Cambrian sponge-like thing
    # ott.notSame(ott.taxon('Labyrinthomorpha', 'Stramenopiles'), gbif.taxon('Labyrinthomorpha'))

    # ott.notSame(ott.taxon('Ophiurina', 'Echinodermata'), gbif.taxon('Ophiurina','Ophiurinidae'))
    #  taken care of in taxonomies.py

    # There is a test for this.  The GBIF taxon no longer exists.
    # ott.notSame(ott.taxon('Rhynchonelloidea', 'Brachiopoda'), gbif.taxon('Rhynchonelloidea'))

    # There are tests.  Seems OK
    ott.notSame(ott.taxonThatContains('Neoptera', 'Lepidoptera'),
                gbif.taxon('Neoptera', 'Diptera'))

    # ott.notSame(gbif.taxon('Tipuloidea', 'Chiliocyclidae'), ott.taxon('Tipuloidea', 'Diptera')) # genus Tipuloidea
    #  taken care of in taxonomies.py
    # ### CHECK: was silva.taxon
    # SILVA = GN013951 = Tetrasphaera (bacteria)

    ott.notSame(ott.taxon('Tetrasphaera', 'Intrasporangiaceae'),
                gbif.taxon('Gorkadinium',
                           'Dinophyta'))  # = Tetrasphaera in Protozoa

    # Rick Ree 2014-03-28 https://github.com/OpenTreeOfLife/reference-taxonomy/issues/37
    # ### CHECK: was ncbi.taxon
    ott.same(ott.taxon('Calothrix', 'Rivulariaceae'),
             gbif.taxon('Calothrix', 'Rivulariaceae'))
    ott.same(ott.taxon('Chlorella', 'Chlorellaceae'),
             gbif.taxon('Chlorella', 'Chlorellaceae'))
    ott.same(ott.taxon('Myrmecia', 'Microthamniales'),
             gbif.taxon('Myrmecia', 'Microthamniales'))

    # JAR 2014-04-18 attempt to resolve ambiguous alignment of
    # Trichosporon in IF and GBIF based on common member
    # ott.same(fungorum.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'),
    #          gbif.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'))
    # doesn't work.  brute force.
    # was: ott.same(fungorum.taxon('10296'), gbif.taxon('2518163')) = ott:364222
    ##### RECOVER THIS IF NECESSARY
    # ott.same(fungi.taxon('10296'), ott.taxon('364222'))

    # Obviously the same genus, can't tell what's going on
    # if:17806 = Hygrocybe = ott:282216
    # #### CHECK: was fungi
    ott.same(gbif.taxon('Hygrocybe'), ott.taxon('Hygrocybe', 'Hygrophoraceae'))

    # JAR 2014-04-23 More sample contamination in SILVA 115
    # redundant
    # ott.same(gbif.taxon('Lamprospora'), fungi.taxon('Lamprospora'))

    # JAR 2014-04-23 IF update fallout
    # ### CHECK: was ncbi.taxon
    ott.same(gbif.taxonThatContains('Penicillium', 'Penicillium expansum'),
             ott.taxonThatContains('Penicillium', 'Penicillium expansum'))

    # https://github.com/OpenTreeOfLife/feedback/issues/45
    # ### CHECK: was ncbi.taxon
    if False:
        ott.same(gbif.taxon('Choanoflagellida'),
                 ott.taxon('Choanoflagellida', 'Opisthokonta'))

    return gbif