Exemplo n.º 1
0
def genTaxTree(resolver, namesdict, logger, taxonomy=None, draw=False):
    """Return Phylo from TaxonNamesResolver class."""
    ranks = resolver.retrieve('classification_path_ranks')
    qnames = resolver.retrieve('query_name')
    lineages = resolver.retrieve('classification_path')
    # replace ' ' with '_' for taxon tree
    qnames = [re.sub("\s", "_", e) for e in qnames]
    resolved_names_bool = [e in namesdict.keys() for e in qnames]
    ranks = [ranks[ei] for ei, e in enumerate(resolved_names_bool) if e]
    lineages = [lineages[ei] for ei, e in enumerate(resolved_names_bool) if e]
    # identify unresolved names
    unresolved_names = [qnames[ei] for ei, e in enumerate(resolved_names_bool)
                        if not e]
    idents = [qnames[ei] for ei, e in enumerate(resolved_names_bool) if e]
    statement = "Unresolved names: "
    for each in unresolved_names:
        statement += " " + each
    logger.debug(statement)
    # make taxdict
    taxdict = TaxDict(idents=idents, ranks=ranks, lineages=lineages,
                      taxonomy=taxonomy)
    # make treestring
    treestring = taxTree(taxdict)
    if not taxonomy:
        d = 22  # default_taxonomy + 1 in tnr
    else:
        d = len(taxonomy) + 1
    # add outgroup
    treestring = '({0},outgroup:{1});'.format(treestring[:-1], float(d))
    tree = Phylo.read(StringIO(treestring), "newick")
    if draw:
        Phylo.draw_ascii(tree)
    return tree
Exemplo n.º 2
0
# EXPLORE TAXDICT
# a dictionary for each ident with: 'lineage', 'taxref', 'ident', 'cident' and
#  'rank' (+ 'extra')
# the lineage
taxdict['H**o sapiens']['lineage']  # N.B. not all lineages are named, ''
# the ident is the same format as lineage e.g. it could be an ID
taxdict['H**o sapiens']['ident']
# the 'cident' (Contextual Ident), the highest named taxonomic group unique to
#  this ident among all other idents
taxdict['Arabidopsis thaliana']['cident']  # A. thaliana is the only plant
# the 'taxref', a holder of 'ident' and taxonomic posistion. Requires printing
#  e.g. C. tytonis could only resolved to the genus level (22/01/2015)
print(taxdict['Chlorotalpa tytonis']['taxref'])
# check the taxonomy
print(taxdict.taxonomy)
# check the hierarchy, a dictionary of taxrefs ranked and grouped in the form:
#  {'rank':[([taxref1, taxref2, ....],'lineage1'),
#           ([taxref3, taxref4, ....],'lineage2'), ....]}
print(taxdict.hierarchy)
# we've also added an extra data slot
taxdict['H**o sapiens']['extra']

# CREATE TREE
# use the taxdict to create a Newick string
treestring = taxTree(taxdict)

# SAVE TREE
with open('example.tre', 'w') as file:
    file.write(treestring)
Exemplo n.º 3
0
# EXPLORE TAXDICT
# a dictionary for each ident with: 'lineage', 'taxref', 'ident', 'cident' and
#  'rank' (+ 'extra')
# the lineage
taxdict['H**o sapiens']['lineage']  # N.B. not all lineages are named, ''
# the ident is the same format as lineage e.g. it could be an ID
taxdict['H**o sapiens']['ident']
# the 'cident' (Contextual Ident), the highest named taxonomic group unique to
#  this ident among all other idents
taxdict['Arabidopsis thaliana']['cident']  # A. thaliana is the only plant
# the 'taxref', a holder of 'ident' and taxonomic posistion. Requires printing
#  e.g. C. tytonis could only resolved to the genus level (22/01/2015)
print(taxdict['Chlorotalpa tytonis']['taxref'])
# check the taxonomy
print(taxdict.taxonomy)
# check the hierarchy, a dictionary of taxrefs ranked and grouped in the form:
#  {'rank':[([taxref1, taxref2, ....],'lineage1'),
#           ([taxref3, taxref4, ....],'lineage2'), ....]}
print(taxdict.hierarchy)
# we've also added an extra data slot
taxdict['H**o sapiens']['extra']

# CREATE TREE
# use the taxdict to create a Newick string
treestring = taxTree(taxdict)

# SAVE TREE
with open('example.tre', 'w') as file:
    file.write(treestring)