Пример #1
0
def add_taxon2(taxon):
    try:
        resolver = Resolver(terms=[taxon])
        resolver.main()
    except:
        return(taxon+' not found by Global Names Resolver.')

    print(resolver.__dict__)

    taxon_list = resolver.retrieve('classification_path')[0]
    print(taxon_list)
    rank_list = resolver.retrieve('classification_path_ranks')[0]
    print(rank_list)

    try:
        node = Taxon.objects.get(name='root')
    except Taxon.DoesNotExist:
        node = Taxon.objects.create(name='root')

    for i in range(len(taxon_list)):
        if rank_list[i] != '':
            try:
                node = Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node)
                print(taxon_list[i]+' added to local database.')
            except IntegrityError:
                node = Taxon.objects.get(name=taxon_list[i])
                print(taxon_list[i]+' not added to local database; already there.')
                pass
Пример #2
0
def add_taxon(taxon):
    try:
        Taxon.objects.get(name=taxon)
        return(taxon+' is already in local database.')
    except Taxon.DoesNotExist:
        try:
            resolver = Resolver(terms=[taxon])
            resolver.main()
        except:
            return(taxon+' not found by Global Names Resolver.')

    taxon_list = resolver.retrieve('classification_path')[0]
    rank_list = resolver.retrieve('classification_path_ranks')[0]

    for i in range(len(taxon_list)):
        try:
            node = Taxon.objects.get(name=taxon_list[i])[0]
        except Taxon.DoesNotExist:
            if i==0:
                Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=None)
            else:
                Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node)
    return(taxon_list, rank_list)
Пример #3
0
def run(wd=os.getcwd(), logger=logging.getLogger('')):
    # PRINT STAGE
    logger.info("Stage 1: Names resolution")

    # DIRS
    outdir = os.path.join(wd, '1_names')
    temp_dir = os.path.join(wd, 'tempfiles')
    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    # INPUT
    with open(os.path.join(temp_dir, "paradict.p"), "rb") as file:
        paradict = pickle.load(file)
    with open(os.path.join(temp_dir, "terms.p"), "rb") as file:
        terms = pickle.load(file)

    # PARAMETERS
    outgroupid = paradict["outgroupid"]
    ntools.etools.Entrez.email = paradict["email"]
    minspecies = int(paradict["minspecies"])
    taxonomy = paradict["taxonomic_constraint"]
    taxonomy = taxonomy.split('-')
    ntools.logger = logger

    # PROCESS
    logger.info('Searching for taxids ....')
    logger.info('------TaxonNamesResolver:Start------')
    try:
        parentid = paradict["parentid"]
    except:
        parentid = False
    if len(terms) < minspecies:
        raise TooFewSpeciesError
    resolver = Resolver(terms=terms, datasource="NCBI", taxon_id=parentid,
                        logger=logger)
    resolver.main()
    if len(resolver.retrieve('query_name')) < minspecies:
        raise TooFewSpeciesError
    logger.info('------TaxonNamesResolver:End------')
    logger.info("Generating names dictionary ....")
    namesdict, allrankids, parentid = ntools.genNamesDict(resolver=resolver,
                                                          parentid=parentid,
                                                          logger=logger)
    logger.info("Finding an outgroup ....")
    namesdict = ntools.getOutgroup(namesdict=namesdict, parentid=parentid,
                                   outgroupid=outgroupid, logger=logger)
    # add outgroup ids to allrankids
    allrankids.extend(namesdict['outgroup']['txids'])
    logger.info('Generating taxonomic tree ....')
    taxontree = ntools.genTaxTree(resolver=resolver, namesdict=namesdict,
                                  taxonomy=taxonomy, logger=logger)

    # OUTPUT
    # remove temp TNR folder
    shutil.rmtree("resolved_names")
    # write out changes to hidden pickled files
    with open(os.path.join(temp_dir, "namesdict.p"), "wb") as file:
        pickle.dump(namesdict, file)
    with open(os.path.join(temp_dir, "allrankids.p"), "wb") as file:
        pickle.dump(allrankids, file)
    # write namesdict as csv
    ntools.writeNamesDict(outdir, namesdict)
    # write taxon tree
    ntools.Phylo.write(taxontree, os.path.join(outdir, "taxontree.tre"),
                       "newick")

    # FINISH MESSAGE
    logger.info('Stage finished. Resolved [{0}] names including outgroup.'.
                format(len(namesdict.keys())))
Пример #4
0
# EXAMPLE NAMES
terms = [
    'H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
    'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
    'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus'
]

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. H**o, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s
# it could be anything, just as long as its in the same order
extra = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
# create a taxonomy specifying the names and order of 'ranks'. N.B. this is the
#  default and is based on NCBI's taxonomy.
taxonomy = [
    'subspecies', 'species', 'subgenus', 'genus', 'tribe', 'subfamily',
Пример #5
0
from taxon_names_resolver import taxTree

# EXAMPLE NAMES
terms = ['H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
         'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
         'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus']

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. H**o, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s
# it could be anything, just as long as its in the same order
extra = [1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
# create a taxonomy specifying the names and order of 'ranks'. N.B. this is the
#  default and is based on NCBI's taxonomy.
taxonomy = ['subspecies', 'species', 'subgenus', 'genus', 'tribe', 'subfamily',
            'family', 'superfamily', 'parvorder', 'infraorder', 'suborder',