def doit():
    rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    union = UnionTaxonomy.newTaxonomy('ott')
    union.absorb(rug)
    union.absorb(ott)
    union.dump('scratch/compare_Ruggiero/', '\t')
def conflict(spec1, space1, spec2, space2):

    # Reference tree
    ref = Taxonomy.getTaxonomy(spec1, space1)

    # Input tree
    input = Taxonomy.getTaxonomy(spec2, space2)

    a = AlignmentByName(input, ref)
    a.align();

    if False:
        for node in input.taxa():
            print node, a.getTaxon(node)

    print 'Conflict analysis'
    ca = ConflictAnalysis(input, ref, a, False)
    print '  input root:', ca.inputRoot
    print '  ref root:', ca.refRoot
    print '  induced root:', ca.inducedRoot
    print '  ingroup:', ca.ingroup
    print '  induced ingroup:', ca.inducedIngroup
    print '  map size:', ca.map.size()
    print '  comap size:', ca.comap.size()

    mapped_tip_count = 0
    unmapped_tip_count = 0
    none_count = 0

    rel_counts = {}

    if ca.inducedRoot != None:
        for node in ca.ingroup.descendants(True):
            if node.hasChildren():
                art = ca.articulation(node)
                if art != None:
                    n = art.disposition.name
                    print node, n, art.witness
                    rel_counts[n] = rel_counts.get(n, 0) + 1
                else:
                    print node, 'no articulation'
                    none_count += 1
            elif a.getTaxon(node) != None:
                mapped_tip_count += 1
            else:
                unmapped_tip_count += 1
                print node, 'unmapped'
    else:
        print 'no induced root!'

    print
    for n in rel_counts:
        print '%s: %s' % (n, rel_counts[n])
    print 'Mapped tips:', mapped_tip_count
    print 'Unmapped tips:', unmapped_tip_count
    print 'Other:', none_count
示例#3
0
def tst(noise, target, source):
    print '##', noise
    sep = Taxonomy.getRawTaxonomy('tax/skel/', 'ott')
    t = Taxonomy.getRawTaxonomy(target, 'target')
    s = Taxonomy.getRawTaxonomy(source, 'source')
    u = combine(sep, t, s, blustery)
    u.dumpChoices('/tmp/align_tests_choices.tsv')
    subprocess.call(['cat', '/tmp/align_tests_choices.tsv'])
    if False:
        u.dumpLog('/tmp/align_tests_log.tsv')
        subprocess.call(['cat', '/tmp/align_tests_log.tsv'])
    print
def load_tree(path):
    tree = Taxonomy.getTaxonomy(path, 'ott')
    count = 0
    for id in tree.allIds():
        count += 1
    print count, 'ids'
    return tree
def doit(tax_path, ids_path):

    ott = Taxonomy.getRawTaxonomy(tax_path, 'ott')

    all_nodes = {}

    with open(ids_path, 'r') as infile:
        reader = csv.reader(infile, delimiter='\t')
        otu_count = 0
        for row in reader:
            id = row[0]
            if otu_count % 50000 == 0: print otu_count, id
            otu_count += 1
            node = ott.lookupId(id)
            if node != None:
                all_nodes[node.id] = node

    print 'OTT taxa assigned to OTUs:', len(all_nodes)

    prefix_to_count = {}
    ott_count = 0

    for id in all_nodes:
        node = all_nodes[id]
        ott_count += 1
        for qid in node.sourceIds:
            prefix = qid.prefix
            count = prefix_to_count.get(prefix, 0)
            prefix_to_count[prefix] = count + 1

    print 'OTT ids assigned to OTUs:', otu_count
    for prefix in prefix_to_count:
        print prefix, prefix_to_count[prefix]
def load_fung():
    fung = Taxonomy.getTaxonomy('tax/fung/', 'if')

    fung.analyzeMajorRankConflicts()

    # 2014-04-14 Bad Fungi homonyms in new version of IF.  90156 is the good one.
    # 90154 has no descendants
    if fung.maybeTaxon('90154') != None:
        print 'Removing Fungi 90154'
        fung.taxon('90154').prune(this_source)
    # 90155 is "Nom. inval." and has no descendants
    if fung.maybeTaxon('90155') != None:
        print 'Removing Fungi 90155'
        fung.taxon('90155').prune(this_source)

    fix_basal(fung)

    # smush folds sibling taxa that have the same name.
    # fung.smush()

    if True:
        patch_fung(fung)
    else:
        try:
            patch_fung(fung)
        except:
            print '**** Exception in patch_fung'

    fung.smush()

    return fung
示例#7
0
def compare(t1, t2):
    print 'comparing', t1, 'to', t2
    retired = 0
    became_hidden = 0
    became_unhidden = 0
    became_extinct = 0
    became_unextinct = 0
    became_suppressed = 0
    became_unsuppressed = 0
    kept = 0
    novel = 0
    tax1 = Taxonomy.getTaxonomy(t1, 'x')
    tax1.inferFlags()
    tax2 = Taxonomy.getTaxonomy(t2, 'x')
    tax2.inferFlags()
    for taxon in tax1.taxa():
        probe = tax2.lookupId(taxon.id)
        if probe == None:
            retired += 1
        elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden():
            became_hidden += 1
        elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden():
            became_unhidden += 1
        elif probe.isExtinct() and not taxon.isExtinct():
            became_extinct += 1
        elif not probe.isExtinct() and taxon.isExtinct():
            became_unextinct += 1
        elif probe.isHidden() and not taxon.isHidden():
            became_suppressed += 1
        elif not probe.isHidden() and taxon.isHidden():
            became_unsuppressed += 1
        else:
            kept += 1
    for taxon in tax2.taxa():
        if tax1.lookupId(taxon.id) == None:
            novel += 1
    print
    print 'id retired:', retired
    print 'newly hidden:', became_hidden
    print 'no longer hidden:', became_unhidden
    print 'newly extinct:', became_extinct
    print 'no longer extinct:', became_unextinct
    print 'newly otherwise suppressed:', became_suppressed
    print 'no longer otherwise suppressed:', became_unsuppressed
    print 'new:', novel
    print 'no change in status:', kept
def compare(t1, t2):
    print 'comparing', t1, 'to', t2
    retired = 0
    became_hidden = 0
    became_unhidden = 0
    became_extinct = 0
    became_unextinct = 0
    became_suppressed = 0
    became_unsuppressed = 0
    kept = 0
    novel = 0
    tax1 = Taxonomy.getTaxonomy(t1, 'x')
    tax1.inferFlags()
    tax2 = Taxonomy.getTaxonomy(t2, 'x')
    tax2.inferFlags()
    for taxon in tax1.taxa():
        probe = tax2.lookupId(taxon.id)
        if probe == None:
            retired += 1
        elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden():
            became_hidden += 1
        elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden():
            became_unhidden += 1
        elif probe.isExtinct() and not taxon.isExtinct():
            became_extinct += 1
        elif not probe.isExtinct() and taxon.isExtinct():
            became_unextinct += 1
        elif probe.isHidden() and not taxon.isHidden():
            became_suppressed += 1
        elif not probe.isHidden() and taxon.isHidden():
            became_unsuppressed += 1
        else:
            kept += 1
    for taxon in tax2.taxa():
        if tax1.lookupId(taxon.id) == None:
            novel += 1
    print
    print 'id retired:', retired
    print 'newly hidden:', became_hidden
    print 'no longer hidden:', became_unhidden
    print 'newly extinct:', became_extinct
    print 'no longer extinct:', became_unextinct
    print 'newly otherwise suppressed:', became_suppressed
    print 'no longer otherwise suppressed:', became_unsuppressed
    print 'new:', novel
    print 'no change in status:', kept
def report(dir, idspace):
    tax = Taxonomy.getRawTaxonomy(os.path.join('tax', dir, ''), idspace)
    # tax.smush() 
    # HomonymReport.homonymDensityReport(tax, dir + '-density-report.csv')
    # HomonymReport.homonymUncertaintyReport(tax, 'reports/' + dir + '-uncertainty-report.csv')
    if not os.path.isdir(report_dir):
        os.makedirs(report_dir)
    HomonymReport.homonymReport(tax, os.path.join(report_dir, dir + '-homonym-report.csv'))
示例#10
0
def report(dir, idspace):
    tax = Taxonomy.getRawTaxonomy(os.path.join('tax', dir, ''), idspace)
    # tax.smush()
    # HomonymReport.homonymDensityReport(tax, dir + '-density-report.csv')
    # HomonymReport.homonymUncertaintyReport(tax, 'reports/' + dir + '-uncertainty-report.csv')
    if not os.path.isdir(report_dir):
        os.makedirs(report_dir)
    HomonymReport.homonymReport(
        tax, os.path.join(report_dir, dir + '-homonym-report.csv'))
def load_silva():
    silva = Taxonomy.getTaxonomy('tax/silva/', 'silva')

    # Used in studies pg_2448,pg_2783,pg_2753, seen deprecated on 2015-07-20
    silva.taxon('AF364847').rename('Pantoea ananatis LMG 20103')    # ncbi:706191
    silva.taxon('EF690403').rename('Pantoea ananatis B1-9')  # ncbi:1048262

    patch_silva(silva)

    return silva
def load_gbif():
    gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif')
    gbif.smush()

    # In GBIF, if a rank is skipped for some children but not others, that
    # means the rank-skipped children are incertae sedis.  Mark them so.
    gbif.analyzeMajorRankConflicts()

    fix_basal(gbif)  # creates a Eukaryota node
    gbif.taxon('Animalia').synonym('Metazoa')

    patch_gbif(gbif)
    return gbif
def load_ncbi():
    ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi')
    fix_SAR(ncbi)

    ncbi.taxon('Viridiplantae').rename('Chloroplastida')
    patch_ncbi(ncbi)

    # analyzeOTUs sets flags on questionable taxa ("unclassified",
    #  hybrids, and so on) to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    ncbi.analyzeContainers()

    return ncbi
示例#14
0
def doit(ott, sep, outpath, conpath):

    do_rug = False  #os.path.isdir('out/ruggiero')

    if do_rug:
        rug = Taxonomy.getRawTaxonomy('out/ruggiero/', 'rug')
        # Prepare for conflict analysis
        # oh no, we really need a separation taxonomy to do that.
        rug_alignment = AlignmentByName(rug, ott)
        rug_alignment.align()
        rug_conflict = ConflictAnalysis(rug, ott, rug_alignment, True)

    overall_table(ott, outpath)
    source_breakdown_table(ott, conpath)
def doit(ott, sep, outpath, conpath):

    do_rug = False  #os.path.isdir('out/ruggiero')

    if do_rug:
        rug = Taxonomy.getRawTaxonomy('out/ruggiero/', 'rug')
        # Prepare for conflict analysis
        # oh no, we really need a separation taxonomy to do that.
        rug_alignment = AlignmentByName(rug, ott)
        rug_alignment.align()
        rug_conflict = ConflictAnalysis(rug, ott, rug_alignment, True)

    overall_table(ott, outpath)
    source_breakdown_table(ott, conpath)
def load_h2007():
    h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007')

    # 2014-04-08 Misspelling
    if h2007.maybeTaxon('Chaetothryriomycetidae') != None:
        h2007.taxon('Chaetothryriomycetidae').rename('Chaetothyriomycetidae')

    if h2007.maybeTaxon('Asteriniales') != None:
        h2007.taxon('Asteriniales').rename('Asterinales')
    else:
        h2007.taxon('Asterinales').synonym('Asteriniales')

    # h2007/if synonym https://github.com/OpenTreeOfLife/reference-taxonomy/issues/40
    h2007.taxon('Urocystales').synonym('Urocystidales')

    return h2007
def load_worms():
    worms = Taxonomy.getTaxonomy('tax/worms/', 'worms')
    worms.smush()

    worms.taxon('Biota').rename('life')
    worms.taxon('Animalia').synonym('Metazoa')

    fix_basal(worms)

    # 2015-02-17 According to WoRMS web site.  Occurs in pg_1229
    if worms.maybeTaxon('Scenedesmus communis') != None:
        worms.taxon('Scenedesmus communis').synonym('Scenedesmus caudata')

    # See NCBI
    worms.taxon('Millericrinida').extant()

    # Help to match up with IRMNG
    worms.taxon('Ochrophyta').synonym('Heterokontophyta')

    worms.smush()  # Gracilimesus gorbunovi, pg_1783

    return worms
# counts number of taxa with rank=family in a given taxon

from org.opentreeoflife.taxa import Taxonomy, Rank
import argparse

parser = argparse.ArgumentParser(description='load nexsons into postgres')
parser.add_argument('taxonname',
    help='name of taxon to count'
    )
args = parser.parse_args()

name = args.taxonname
ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/'
ott = Taxonomy.getTaxonomy(ott_path, 'ott')
def count_families(taxon):
    count = 0
    with open('families.txt','w') as f:
        for t in taxon.descendants(False):
            if t.rank == Rank.FAMILY_RANK:
                f.write("{n}\n".format(n=t.name))
                count += 1
    f.close()
    return count
print "number families: ",count_families(ott.taxon(name))
示例#19
0
def tst(target, source, want):
    global tests
    t = Taxonomy.getRawTaxonomy(target, 'target')
    s = Taxonomy.getRawTaxonomy(source, 'source')
    u = combine(t, s, blustery)
    tests.append((t, s, u, want))
示例#20
0
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng', 'ott', 'name', 'synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1
                and taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([
                    taxon.sourceIds[0].id, taxon.id, taxon.name,
                    'synthesis' if probe.inSynthesis else ''
                ])
示例#21
0
# Jython script to build the "model village" taxonomy.

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy
from claim import Has_child

# Create model taxonomy
tax = UnionTaxonomy()

# Establish homonym-resolution skeleton (not really used here)
# skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
# tax.setSkeleton(skel)


# Add NCBI subset to the model taxonomy
ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/')
# analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
#  to allow the option of suppression downstream
ncbi.analyzeOTUs()
tax.absorb(ncbi)

# Add GBIF subset fo the model taxonomy
gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/')
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
# intermediate ranks are missing (e.g. a family that's a child of a
# class)
gbif.analyzeMajorRankConflicts()
tax.absorb(gbif)

# "Old" patch system with tab-delimited files
tax.edit('t/edits/')
示例#22
0
import sys
from org.opentreeoflife.taxa import Taxonomy, Newick

source = sys.argv[1]    # Name of directory containing original taxonomy (must end in /)

ott = Taxonomy.getRawTaxonomy(source, 'ott')

count = 0
grafts = 0
non_tip_grafts = 0

# Seen = seen idspaces among ancestors.
# Returns set of seen idspaces.

def recur(taxon, seen):
    global count, grafts, non_tip_grafts
    count += 1

    # idspace (source) of taxon
    space = taxon.sourceIds.get(0).prefix

    all = empty()
    seen_child = adjoin(space, seen)
    for child in taxon.getChildren():
        under = recur(child, seen_child)
        child_space = child.sourceIds.get(0).prefix
        if child_space != space:
            # A graft or resolution.
            if intersectp(under, seen):
                # A resolution.
                print 'resolve', child, taxon, child.rank
def load_irmng():
    irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng')
    irmng.smush()
    irmng.analyzeMajorRankConflicts()

    fix_basal(irmng)
    irmng.taxon('Animalia').synonym('Metazoa')

    # JAR 2014-04-26 Flush all 'Unaccepted' taxa
    irmng.taxon('Unaccepted', 'life').prune(this_source)

    # Fixes

    # Neopithecus (extinct) occurs in two places.  Flush one, mark the other
    irmng.taxon('1413316').prune(this_source) #Neopithecus in Mammalia
    irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae)

    # RR #50
    # irmng.taxon('Saxo-Fridericia').rename('Saxofridericia')
    # irmng.taxon('Saxofridericia').absorb(irmng.taxon('Saxo-fridericia'))
    saxo = irmng.maybeTaxon('1063899')
    if saxo != None:
        saxo.absorb(irmng.taxon('1071613'))

    # Romina 2014-04-09
    # IRMNG has EIGHT different Trichodermas.  (Four are synonyms of other things.)
    # 1307461 = Trichoderma Persoon 1794, in Hypocreaceae
    # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86
    irmng.taxon('Hypocrea').absorb(irmng.taxon('1307461'))

    # JAR 2015-06-28
    # The synonym Ochrothallus multipetalus -> Niemeyera multipetala
    # is no good; it interferes with correct processing of Ochrothallus 
    # multipetalus.  We could remove the synonym, but instead remove its 
    # target because no synonym-removal command is available.
    irmng.taxon('Niemeyera multipetala').prune(this_source)

    tip = irmng.taxon('Tipuloidea', 'Hemiptera')  # irmng:1170022
    if tip != None:
        tip.prune("about:blank#this-homonym-is-causing-too-much-trouble")

    oph = irmng.taxon('Ophiurina', 'Ophiurinidae') # irmng:1346026
    if oph != None:
        oph.prune("about:blank#this-homonym-is-causing-too-much-trouble")

    # NCBI synonymizes Pelecypoda = Bivalvia
    irmng.taxon('Bivalvia').absorb(irmng.taxon('Pelecypoda')) # bogus order
    # hmm
    irmng.taxon('Bivalvia').extant()

    # This one was mapping to Blattodea, and making it extinct.
    # Caused me a couple of hours of grief.
    # My guess is it's because its unique child Sinogramma is in Blattodea in GBIF.
    # Wikipedia says it's paraphyletic.
    irmng.taxon('Blattoptera', 'Insecta').prune('https://en.wikipedia.org/wiki/Blattoptera')

    # 2015-07-25 Found while trying to figure out why Theraphosidae was marked extinct.
    # NCBI thinks that Theraphosidae and Aviculariidae are the same.
    irmng.taxon('Aviculariidae').extant()

    # 2015-07-25 Extra Dipteras are confusing new division logic.  Barren genus
    irmng.taxon('1323521').prune(this_source)

    # 2015-09-10 This one is unclassified (Diptera) and is leading to confusion with two other Steinias.
    irmng.taxon('1299622').prune(this_source)

    # 2015-09-11 https://github.com/OpenTreeOfLife/feedback/issues/74
    # Lymnea is a snail, not a shark
    irmng.taxon('1317416').prune(this_source)

    # 2015-10-12 JAR checked IRMNG online and this taxon (Ctenophora in Chelicerata) did not exist
    if irmng.maybeTaxon('1279363') != None:
        irmng.taxon('1279363').prune(this_source)

    return irmng
def load_713():
    study713 = Taxonomy.getTaxonomy('tax/713/', 'study713')
    return study713
    * copied = total number of nodes originating from this source (copied)
    * aligned = number of source nodes aligned and copied
    * absorbed = number of source nodes absorbed (not copied)
    * conflict = number of inconsistent source nodes (not copied)
    """

def dump_table_as_csv(table, outfile):
    # Provide CSV form for Pensoft
    writer = csv.writer(outfile)
    for row in table:
        writer.writerow(row)

def max_depth(node):
    m = 0
    for child in node.getChildren():
        d = max_depth(child) + 1
        if d > m: m = d
    return m

if __name__ == '__main__':

    taxpath = sys.argv[1]
    seppath = sys.argv[2]
    outpath = sys.argv[3]  # general report, JSON
    conpath = sys.argv[4]  # contributions, CSV
    sep = Taxonomy.getRawTaxonomy(seppath, 'ott')
    ott = Taxonomy.getRawTaxonomy(taxpath, 'ott')
    ott.inferFlags()

    doit(ott, sep, outpath, conpath)
示例#26
0
import sys

from org.opentreeoflife.taxa import Taxonomy, Rank

ott = Taxonomy.getRawTaxonomy(sys.argv[1], 'ott')

# Look for splitting:
#   Suppose X, Y are distinct in GBIF, but both align to X in NCBI,
#   because NCBI says Y a synonym of X.
#   Then we have X in NCBI with GBIF X and Y aligning to it, and
#   Y a synonym via NCBI but not via GBIF.
#   So GBIF X is a source for X, and GBIF Y is in sources for Y-synonym of X.

for X in ott.taxa():
    # Species only
    if X.rank != Rank.SPECIES_RANK: continue

    xid = X.sourceIds[0].id

    # Look for Y, a synonym of X...
    for Y in X.getSynonyms():

        yids = [qid.id for qid in Y.sourceIds]

        # that has same source as X...
        if not xid in yids:
            continue

        # but, an alignment from Y
        for yid in yids:
def tst(target, source, want):
    global tests
    t = Taxonomy.getRawTaxonomy(target, 'target')
    s = Taxonomy.getRawTaxonomy(source, 'source')
    u = combine(t, s, blustery)
    tests.append((t, s, u, want))
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng','ott','name','synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1 and
            taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([taxon.sourceIds[0].id,
                                 taxon.id,
                                 taxon.name,
                                 'synthesis' if probe.inSynthesis else ''])
示例#29
0
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in [
            'Pentaphragma ellipticum',
            'Lachnophyllum',
            'Sipolisia',
            'Cicerbita bourgaei',
            'Adenophora triphylla',
            'Artemisia vulgaris',
            'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)

    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)  # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')
                        small, big, small_tax.id, small_id)
                    show_interloper(small_node, small_id, ott)

            else:
                print '** More than one taxon named %s is in %s' % (small, big)
                print '  ', small_nodes

    infile.close()


def show_interloper(small_node, small_id, ott):
    if small_node != small_node.taxon():
        print '   %s is a synonym for %s' % (small_node.name,
                                             small_node.taxon().name)
    probe = ott.lookupId(small_id)
    if probe != None:
        print '   Id %s belongs to %s' % (small_id, probe)
    else:
        print '   (There is no taxon with id %s)' % small_id


if __name__ == '__main__':
    if len(sys.argv) == 3:
        inclusions = sys.argv[1]
        taxname = sys.argv[2]
    else:
        print 'ignoring supplied args', sys.argv
        inclusions = 'inclusions.csv'
        taxname = 'tax/ott/'
    check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))
import sys

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import AlignmentByName
from org.opentreeoflife.conflict import ConflictAnalysis

rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')

with open('scratch/Ruggiero.tre', 'w') as outfile:
    outfile.write(rug.toNewick(False))
    outfile.write('\n')
def create_ott():

    ott = UnionTaxonomy.newTaxonomy()

    # There ought to be tests for all of these...

    for name in names_of_interest:
        ott.eventlogger.namesOfInterest.add(name)

    # When lumping, prefer to use ids that have been used in OTU matching
    # This list could be used for all sorts of purposes...
    ott.loadPreferredIds('ids-that-are-otus.tsv', False)
    ott.loadPreferredIds('ids-in-synthesis.tsv', True)

    ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel'))

    silva = prepare_silva(ott)
    ott.absorb(silva)
    check_invariants(ott)

    h2007 = prepare_h2007(ott)
    ott.absorb(h2007)

    (fungi, fungorum_sans_fungi) = prepare_fungorum(ott)
    ott.absorb(fungi)
    check_invariants(ott)

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = prepare_lamiales(ott)
    ott.absorb(lamiales)

    (malacostraca, worms_sans_malacostraca) = prepare_worms(ott)
    ott.absorb(malacostraca)

    ncbi = prepare_ncbi(ott)
    align_ncbi_to_silva(ncbi, silva, ott)
    ott.absorb(ncbi)
    check_invariants(ott)

    ott.absorb(worms_sans_malacostraca)

    ott.absorb(fungorum_sans_fungi)

    gbif = prepare_gbif(ott)
    ott.absorb(gbif)

    irmng = prepare_irmng(ott)
    ott.absorb(irmng)

    taxonomies.link_to_h2007(ott)

    get_default_extinct_info_from_gbif(gbif, ott)

    check_invariants(ott)
    # consider try: ... except: print '**** Exception in patch_ott'
    patch_ott(ott)

    # Experimental...
    unextinct_ncbi(ncbi, ott)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # -----------------------------------------------------------------------------
    # OTT id assignment

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the otu-deprecated.tsv file after a 
    # series of smasher runs.

    for (inf, sup, id) in [
            ('Tipuloidea', 'Diptera', '722875'),
            ('Saccharomycetes', 'Saccharomycotina', '989999'),
            ('Phaeosphaeria', 'Ascomycota', '5486272'),
            ('Synedra acus','Eukaryota','992764'),
            ('Epiphloea','Halymeniaceae','5342325'),
            ('Hessea','Archaeplastida','600099'),
            ('Morganella','Arthropoda','6400'),
            ('Rhynchonelloidea','Rhynchonellidae','5316010'),
            ('Epiphloea', 'Lichinales', '5342482'),
            ('Morganella', 'Fungi', '973932'),
            ('Parmeliaceae', 'Lecanorales', '305904'),
    ]:
        tax = ott.taxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI

    for (ncbi_id, ott_id, name) in ncbi_assignments_list:
        n = ncbi.maybeTaxon(ncbi_id)
        if n != None:
            im = ott.image(n)
            if im != None:
                im.setId(ott_id)
            else:
                print '** NCBI %s not mapped - %s' % (ncbi_id, name)
        else:
            print '** No NCBI taxon %s - %s' % (ncbi_id, name)

    # Cylindrocarpon is now Neonectria
    ott.image(gbif.taxon('2563163')).setId('51754')

    # Foo
    trich = fungi.maybeTaxon('Trichosporon')
    if trich != None:
        ott.image(trich).setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess


    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getTaxonomy('tax/prev_ott/')

    # Assign old ids to nodes in the new version
    ott.assignIds(ids)

    report_on_h2007(h2007, ott)

    return ott
import sys, codecs
from org.opentreeoflife.taxa import Taxonomy, Newick

source = sys.argv[1]    # Name of directory containing original taxonomy (must end in /)
name = sys.argv[2]      # Name of taxon to extract
dest = sys.argv[3]      # Directory to store result (must end in /)

if not (dest.endswith('/') or dest.endswith('.tre')):
    print >>sys.stderr, 'Invalid taxonomy destination (need / or .tre)', dest
    sys.exit(1)

selection = Taxonomy.getRawTaxonomy(source, 'foo').select(name)

if dest.endswith('.tre'):
    with codecs.open(dest, 'w', 'utf-8') as outfile:
        outfile.write(Newick.toNewick(selection, Newick.USE_NAMES_AND_IDS))
        outfile.write('\n')
else:
    selection.dump(dest)
示例#34
0
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in ['Pentaphragma ellipticum',
                 'Lachnophyllum',
                 'Sipolisia',
                 'Cicerbita bourgaei',
                 'Adenophora triphylla',
                 'Artemisia vulgaris',
                 'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)


    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [
        has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')
    ]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)    # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')
示例#35
0
import sys, os, csv

from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon
from org.opentreeoflife.smasher import UnionTaxonomy

dwh = UnionTaxonomy.newTaxonomy('dwh')

#Use this to tell smasher what separation file to use
dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation'))

# 1. trunk
# 2. ictv
# 3. IOC
# 4. ASW
# 5. ODO
# 6. BOM
# 7. ERE
# 8. ONY
# 9. EET
# 10. NCBI
# 11. WOR
# 12. CLP
# 13. COL

#use this to load the taxonomies

trunk = Taxonomy.getTaxonomy('t/tax/2018_12/dynamichierarchytrunk2018-11-21/',
                             'trunk')
ictv = Taxonomy.getTaxonomy(
    't/tax/2018_12/ICTV-virus_taxonomy-with-higherClassification/', 'ictv')
IOC = Taxonomy.getTaxonomy('t/tax/2018_12/ioc-birdlist/', 'IOC')
示例#36
0
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.taxa import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [('fung', load_fung(), 'Index Fungorum'),
              ('ncbi', load_ncbi(), 'NCBI'),
              ('gbif', load_gbif(), 'GBIF'),
              ('irmng', load_irmng(), 'IRMNG'),
              ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'),
          ]

def main():
    infile = open('order-counts-orders.csv', 'r')
    reader = csv.reader(infile)
    reader.next()   #header row
    taxa = ['Fungi']
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)

def write_counts(taxa):
    outfile = open('order-counts.csv', 'w')
    writer = csv.writer(outfile)
    header = ['order']
    for (name, taxonomy, label) in taxonomies:
        header += [label + ' bin', label + ' sp', label + ' tip']
示例#37
0
    * conflict = number of inconsistent source nodes (not copied)
    """


def dump_table_as_csv(table, outfile):
    # Provide CSV form for Pensoft
    writer = csv.writer(outfile)
    for row in table:
        writer.writerow(row)


def max_depth(node):
    m = 0
    for child in node.getChildren():
        d = max_depth(child) + 1
        if d > m: m = d
    return m


if __name__ == '__main__':

    taxpath = sys.argv[1]
    seppath = sys.argv[2]
    outpath = sys.argv[3]  # general report, JSON
    conpath = sys.argv[4]  # contributions, CSV
    sep = Taxonomy.getRawTaxonomy(seppath, 'ott')
    ott = Taxonomy.getRawTaxonomy(taxpath, 'ott')
    ott.inferFlags()

    doit(ott, sep, outpath, conpath)
示例#38
0
def create_ott(ott_spec):

    # Fail fast
    additions_clone_path = os.path.join(access_head('amendments'),
                                        'amendments-1')
    if not os.path.isdir(additions_clone_path):
        print '# cannot find', additions_clone_path
        sys.exit(1)

    with open(os.path.join(access_head('idlist'), 'by_qid.csv'),
              'r') as infile:
        print '# can access idlist'

    ott_path = management.source_path(ott_spec)

    ott = UnionTaxonomy.newTaxonomy('ott')

    # Would be nice if there were tests for all of these...
    for name in names_of_interest:
        ott.eventLogger.namesOfInterest.add(name)

    ott.setSkeleton(Taxonomy.getTaxonomy('curation/separation/', 'separation'))

    # These are particularly hard cases; create alignment targets up front
    adjustments.deal_with_polysemies(ott)

    # Align and merge each source in sequence
    merge_sources(ott)

    # "Old" patch system
    TsvEdits.edit(ott, 'curation/edits/')

    # consider try: ... except: print '**** Exception in patch_ott'
    amendments.patch_ott(ott)

    # End of topology changes.  Now assign ids.
    retain_ids(ott, access_source('ott-PREVIOUS'),
               os.path.join(access_head('idlist'), 'by_qid.csv'))

    # Apply the additions (which already have ids assigned).
    # This has to happen *after* ids are assigned, since additions use OTT
    # ids to identify parents.
    print '-- Processing additions --'
    Addition.processAdditions(additions_clone_path, ott)

    # Mint ids for new nodes
    print '-- Minting new ids --'
    ott.assignNewIds(new_taxa_path)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # data structure integrity checks
    ott.check()

    # For deprecated id report (dump)
    ott.loadPreferredIds('ids_that_are_otus.tsv', False)
    ott.loadPreferredIds('ids_in_synthesis.tsv', True)

    ott.dump(ott_path)

    record_ott_sources(ott_spec)

    return ott
import sys, os, csv

from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon
from org.opentreeoflife.smasher import UnionTaxonomy

dwh = UnionTaxonomy.newTaxonomy('dwh')

#Use this to tell smasher what separation file to use
dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation'))

# 1. trunk
# 2. ictv
# 3. IOC
# 4. ASW
# 5. ODO
# 6. BOM
# 7. ERE
# 8. ONY
# 9. EET
# 10. NCBI
# 11. WOR
# 12. CLP
# 13. COL

#use this to load the taxonomies

trunk = Taxonomy.getTaxonomy('t/tax/2018_12/trunk/', 'trunk')
ictv = Taxonomy.getTaxonomy('t/tax/2018_12/ictv/', 'ictv')
IOC = Taxonomy.getTaxonomy('t/tax/2018_12/IOC/', 'IOC')
ASW = Taxonomy.getTaxonomy('t/tax/2018_12/ASW/', 'ASW')
ODO = Taxonomy.getTaxonomy('t/tax/2018_12/ODO/', 'ODO')
示例#40
0
def merge_sources(ott):

    # Genbank - this is a kludge to make sure it's in the dependencies list.
    # But eventually it ought to be handled in this file, not in the silva
    # import script.
    access_head('genbank')

    # SILVA
    silva = load_taxonomy('silva')
    adjustments.adjust_silva(silva)
    silva_to_ott = adjustments.align_silva(silva, ott)
    align_and_merge(silva_to_ott)

    # Hibbett 2007
    h2007 = Taxonomy.getTaxonomy('curation/h2007/tree.tre', 'h2007')
    adjustments.adjust_h2007(h2007)
    h2007_to_ott = ott.alignment(h2007)
    align_and_merge(h2007_to_ott)

    # Index Fungorum
    fungorum = load_taxonomy('fung')
    adjustments.adjust_fung(fungorum)
    (fungi, fungorum_sans_fungi) = split_taxonomy(fungorum, 'Fungi')
    align_and_merge(adjustments.align_fungi(fungi, ott))

    # Connect IF families to Hibbett 2007 orders
    adjustments.link_to_h2007(ott)

    # Look for orders that have no children in OTT
    report_on_h2007(h2007, h2007_to_ott, '#')

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = Taxonomy.getTaxonomy('curation/lamiales/', 'study713')
    adjustments.adjust_lamiales(lamiales)
    align_and_merge(adjustments.align_lamiales(lamiales, ott))

    # WoRMS
    # higher priority to Worms for Malacostraca, Cnidaria, Mollusca
    #  so we split out
    # those clades from worms and absorb them before NCBI
    worms = load_taxonomy('worms')
    adjustments.adjust_worms(worms)
    # Malacostraca instead of Decapoda because M. is in the separation taxonomy
    (malacostraca,
     worms_sans_malacostraca) = split_taxonomy(worms, 'Malacostraca')
    align_and_merge(ott.alignment(malacostraca))
    (cnidaria, worms_sans_cnidaria) = split_taxonomy(worms_sans_malacostraca,
                                                     'Cnidaria')
    align_and_merge(ott.alignment(cnidaria))
    (mollusca, low_priority_worms) = split_taxonomy(worms_sans_cnidaria,
                                                    'Mollusca')
    align_and_merge(ott.alignment(mollusca))

    # NCBI
    ncbi = load_taxonomy('ncbi')
    adjustments.adjust_ncbi(ncbi)

    # analyzeOTUs sets flags on questionable taxa (hybrid, metagenomes,
    #  etc) to allow the option of suppression downstream
    ncbi.analyzeOTUs()

    ncbi_to_ott = adjustments.align_ncbi(ncbi, silva, ott)
    align_and_merge(ncbi_to_ott)

    # Look for orders that have no children in OTT
    report_on_h2007(h2007, h2007_to_ott, '#')

    # Reporting
    # Get mapping from NCBI to OTT, derived via SILVA and Genbank.
    mappings = load_ncbi_to_silva(
        os.path.join(management.resource_path('silva'), 'ncbi_to_silva.tsv'),
        ncbi, silva, silva_to_ott)
    compare_ncbi_to_silva(mappings, silva_to_ott)

    # Low-priority WoRMS
    # This is suboptimal, but the names are confusing the division logic
    a = adjustments.align_worms(low_priority_worms, ott)
    align_and_merge(a)

    # The rest of Index Fungorum.  (Maybe not a good idea to use this.
    # These taxa are all in GBIF.)
    # align_and_merge(adjustments.align_fungorum_sans_fungi(fungorum_sans_fungi, ott))

    # GBIF
    gbif = load_taxonomy('gbif')
    adjustments.adjust_gbif(gbif)
    gbif_to_ott = adjustments.align_gbif(gbif, ott)
    align_and_merge(gbif_to_ott)

    # http://dx.doi.org/10.1016/j.ympev.2004.12.019 "Eccrinales
    # (Trichomycetes) are not fungi, but a clade of protists at the
    # early divergence of animals and fungi"
    debug_divisions('Enterobryus cingaloboli', gbif, ott)

    # Cylindrocarpon is now Neonectria
    cyl = gbif_to_ott.image(gbif.taxon('Cylindrocarpon', 'Ascomycota'))
    if cyl != None:
        cyl.setId('51754')

    # IRMNG
    irmng = load_taxonomy('irmng')
    adjustments.adjust_irmng(irmng)
    a = adjustments.align_irmng(irmng, ott)
    hide_irmng(irmng)
    align_and_merge(a)

    # Misc fixups
    report_on_h2007(h2007, h2007_to_ott, '**')

    get_default_extinct_info_from_gbif(
        os.path.join(management.resource_path('gbif'), 'paleo.tsv'), gbif,
        gbif_to_ott)
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)


def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')


if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
# counts number of taxa with rank=family in a given taxon

from org.opentreeoflife.taxa import Taxonomy, Rank
import argparse

parser = argparse.ArgumentParser(description='load nexsons into postgres')
parser.add_argument('taxonname', help='name of taxon to count')
args = parser.parse_args()

name = args.taxonname
ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/'
ott = Taxonomy.getTaxonomy(ott_path, 'ott')


def count_families(taxon):
    count = 0
    with open('families.txt', 'w') as f:
        for t in taxon.descendants(False):
            if t.rank == Rank.FAMILY_RANK:
                f.write("{n}\n".format(n=t.name))
                count += 1
    f.close()
    return count


print "number families: ", count_families(ott.taxon(name))
示例#43
0
def load_taxonomy(spec):
    return Taxonomy.getTaxonomy(access_head(spec),
                                management.get_property(spec, "ott_idspace"))
        small = row[0]
        big = row[1]
        small_id = row[2]

        small_tax = ott.maybeTaxon(small_id)
        if small_tax == None:
            small_tax = ott.maybeTaxon(small)
            if small_tax == None:
                print '** No unique taxon with id %s or name %s' % (small_id, small)
            else:
                print '** %s is %s, not %s' % (small, small_tax.id, small_id)
        else:
            look = ott.maybeTaxon(small, big)
            if look == None:
                print '** %s=%s not under %s' % (small, small_id, big)
                small_tax.show()
            elif look != small_tax:
                print '** The %s that descends from %s is %s, not %s' % (small, big, look.id, small_id)
            if small_tax.isHidden():
                print '%s (%s) is hidden' % (small, small_id)

    infile.close()

if __name__ == '__main__':
    taxname = 'tax/ott/'
    if len(sys.argv) > 1:
        taxname = sys.argv[1]
    else:
        print sys.argv
    check(Taxonomy.getTaxonomy(taxname))
示例#45
0
def retain_ids(ott, prev_path, by_qid):

    # ad hoc assignments specifically for NCBI taxa, basedon NCBI id

    for (ncbi_id, ott_id, name) in ncbi_ott_assignments.ncbi_assignments_list:
        im = ott.lookupQid(QualifiedId('ncbi', ncbi_id))
        if im == None:
            print '* ncbi:%s not found in OTT - %s' % (ncbi_id, name)
        else:
            if im.name != name:
                print '* ncbi:%s name is %s, but expected %s' % (ncbi_id,
                                                                 im.name, name)
            im.addId(ott_id)

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the deprecated.tsv file after a
    # series of smasher runs.

    for (inf, sup, id) in [
        ('Tipuloidea', 'Diptera', '722875'),
        ('Saccharomycetes', 'Saccharomycotina', '989999'),
        ('Phaeosphaeria', 'Ascomycota', '5486272'),
        ('Synedra acus', 'Eukaryota', '992764'),
        ('Hessea', 'Archaeplastida', '600099'),
        ('Morganella', 'Arthropoda', '6400'),
        ('Rhynchonelloidea', 'Rhynchonellidae', '5316010'),
        ('Morganella', 'Fungi', '973932'),
        ('Parmeliaceae', 'Lecanorales', '305904'),
        ('Cordana', 'Ascomycota', '946160'),
        ('Pseudofusarium', 'Ascomycota', '655794'),
        ('Marssonina', 'Dermateaceae', '372158'),  # ncbi:324777
        ('Marssonia', 'Lamiales', '5512668'),  # gbif:7268388
            # ('Gloeosporium', 'Pezizomycotina', '75019'),  # synonym for Marssonina
        ('Escherichia coli', 'Enterobacteriaceae', '474506'),  # ncbi:562
            # ('Dischloridium', 'Trichocomaceae', '895423'),
        ('Exaiptasia pallida', 'Cnidaria', '135923'),
        ('Choanoflagellida', 'Holozoa', '202765'),
        ('Billardiera', 'Lamiales', '798963'),
        ('Trachelomonas grandis', 'Bacteria', '58035'),  # study ot_91 Tr46259
        ('Hypomyzostoma', 'Myzostomida',
         '552744'),  # was incorrectly in Annelida
        ('Gyromitus', 'SAR', '696946'),
        ('Pseudogymnoascus destructans', 'Pezizomycotina', '428163'),
            # ('Amycolicicoccus subflavus', 'Mycobacteriaceae', '541768'),  # ncbi:639313
            # ('Pohlia', 'Foraminifera', '5325989')  - NO
        ('Pohlia', 'Amphibia', '5325989'),  # irmng:1311321
        ('Phyllanthus', 'Pentapetalae', '452944'),  # pg_25 @josephwb = 5509975
    ]:
        tax = ott.maybeTaxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxon('452944').addId('5509975')

    # ott.taxon('474506') ...

    ott.taxonThatContains('Rhynchonelloidea',
                          'Sphenarina').setId('795939')  # NCBI

    # Trichosporon is a mess, because it occurs 3 times in NCBI.
    trich = ott.taxonThatContains('Trichosporon', 'Trichosporon cutaneum')
    if trich != None:
        trich.setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess

    # --------------------
    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getRawTaxonomy(prev_path, 'ott')

    # Edit the id source taxonomy to optimize id coverage

    # Kludge to undo lossage in OTT 2.9
    for taxon in ids.taxa():
        if (len(taxon.sourceIds) >= 2 and taxon.sourceIds[0].prefix == "ncbi"
                and taxon.sourceIds[1].prefix == "silva"):
            taxon.sourceIds.remove(taxon.sourceIds[0])

    # OTT 2.9 has both Glaucophyta and Glaucophyceae...
    # this creates an ambiguity when aligning.
    # Need to review this; maybe they *should* be separate taxa.
    g1 = ids.maybeTaxon('Glaucophyta')
    g2 = ids.maybeTaxon('Glaucophyceae')
    if g1 != None and g2 != None and g1 != g2:
        g1.absorb(g2)

    # Assign old ids to nodes in the new version
    ott.carryOverIds(ids)  # Align & copy ids

    print '-- Checking id list'
    retain_ids_from_list(ott, by_qid)
import sys

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import AlignmentByName
from org.opentreeoflife.conflict import ConflictAnalysis

rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')

with open('scratch/Ruggiero.tre', 'w') as outfile:
     outfile.write(rug.toNewick(False))
     outfile.write('\n')

# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport 

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)

def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')

if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
                small_node = small_nodes[0]
                small_tax = small_node.taxon()
                if small_id != '' and small_tax != small_id_tax:
                    print '** The id of %s in %s is %s (expected %s)' % (small, big, small_tax.id, small_id)
                    show_interloper(small_node, small_id, ott)

            else:
                print '** More than one taxon named %s is in %s' % (small, big)
                print '  ', small_nodes

    infile.close()

def show_interloper(small_node, small_id, ott):
    if small_node != small_node.taxon():
        print '   %s is a synonym for %s' % (small_node.name, small_node.taxon().name)
    probe = ott.lookupId(small_id)
    if probe != None:
        print '   Id %s belongs to %s' % (small_id, probe)
    else:
        print '   (There is no taxon with id %s)' % small_id

if __name__ == '__main__':
    if len(sys.argv) == 3:
        inclusions = sys.argv[1]
        taxname = sys.argv[2]
    else:
        print 'ignoring supplied args', sys.argv
        inclusions = 'inclusions.csv'
        taxname = 'tax/ott/'
    check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))