Пример #1
0
def add_taxon2(taxon):
    try:
        resolver = Resolver(terms=[taxon])
        resolver.main()
    except:
        return(taxon+' not found by Global Names Resolver.')

    print(resolver.__dict__)

    taxon_list = resolver.retrieve('classification_path')[0]
    print(taxon_list)
    rank_list = resolver.retrieve('classification_path_ranks')[0]
    print(rank_list)

    try:
        node = Taxon.objects.get(name='root')
    except Taxon.DoesNotExist:
        node = Taxon.objects.create(name='root')

    for i in range(len(taxon_list)):
        if rank_list[i] != '':
            try:
                node = Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node)
                print(taxon_list[i]+' added to local database.')
            except IntegrityError:
                node = Taxon.objects.get(name=taxon_list[i])
                print(taxon_list[i]+' not added to local database; already there.')
                pass
Пример #2
0
def add_taxon(taxon):
    try:
        Taxon.objects.get(name=taxon)
        return(taxon+' is already in local database.')
    except Taxon.DoesNotExist:
        try:
            resolver = Resolver(terms=[taxon])
            resolver.main()
        except:
            return(taxon+' not found by Global Names Resolver.')

    taxon_list = resolver.retrieve('classification_path')[0]
    rank_list = resolver.retrieve('classification_path_ranks')[0]

    for i in range(len(taxon_list)):
        try:
            node = Taxon.objects.get(name=taxon_list[i])[0]
        except Taxon.DoesNotExist:
            if i==0:
                Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=None)
            else:
                Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node)
    return(taxon_list, rank_list)
Пример #3
0
def run(wd=os.getcwd(), logger=logging.getLogger('')):
    # PRINT STAGE
    logger.info("Stage 1: Names resolution")

    # DIRS
    outdir = os.path.join(wd, '1_names')
    temp_dir = os.path.join(wd, 'tempfiles')
    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    # INPUT
    with open(os.path.join(temp_dir, "paradict.p"), "rb") as file:
        paradict = pickle.load(file)
    with open(os.path.join(temp_dir, "terms.p"), "rb") as file:
        terms = pickle.load(file)

    # PARAMETERS
    outgroupid = paradict["outgroupid"]
    ntools.etools.Entrez.email = paradict["email"]
    minspecies = int(paradict["minspecies"])
    taxonomy = paradict["taxonomic_constraint"]
    taxonomy = taxonomy.split('-')
    ntools.logger = logger

    # PROCESS
    logger.info('Searching for taxids ....')
    logger.info('------TaxonNamesResolver:Start------')
    try:
        parentid = paradict["parentid"]
    except:
        parentid = False
    if len(terms) < minspecies:
        raise TooFewSpeciesError
    resolver = Resolver(terms=terms, datasource="NCBI", taxon_id=parentid,
                        logger=logger)
    resolver.main()
    if len(resolver.retrieve('query_name')) < minspecies:
        raise TooFewSpeciesError
    logger.info('------TaxonNamesResolver:End------')
    logger.info("Generating names dictionary ....")
    namesdict, allrankids, parentid = ntools.genNamesDict(resolver=resolver,
                                                          parentid=parentid,
                                                          logger=logger)
    logger.info("Finding an outgroup ....")
    namesdict = ntools.getOutgroup(namesdict=namesdict, parentid=parentid,
                                   outgroupid=outgroupid, logger=logger)
    # add outgroup ids to allrankids
    allrankids.extend(namesdict['outgroup']['txids'])
    logger.info('Generating taxonomic tree ....')
    taxontree = ntools.genTaxTree(resolver=resolver, namesdict=namesdict,
                                  taxonomy=taxonomy, logger=logger)

    # OUTPUT
    # remove temp TNR folder
    shutil.rmtree("resolved_names")
    # write out changes to hidden pickled files
    with open(os.path.join(temp_dir, "namesdict.p"), "wb") as file:
        pickle.dump(namesdict, file)
    with open(os.path.join(temp_dir, "allrankids.p"), "wb") as file:
        pickle.dump(allrankids, file)
    # write namesdict as csv
    ntools.writeNamesDict(outdir, namesdict)
    # write taxon tree
    ntools.Phylo.write(taxontree, os.path.join(outdir, "taxontree.tre"),
                       "newick")

    # FINISH MESSAGE
    logger.info('Stage finished. Resolved [{0}] names including outgroup.'.
                format(len(namesdict.keys())))
        print('No names file provided!')
        print('Type `TaxonNamesResolver.py -h` for help.')
        sys.exit()
    if not os.path.isfile(args.names):
        print('[{0}] could not be found!'.format(args.names))
        sys.exit()
    print('\n' + description + '\n')
    if args.datasource:
        datasource = args.datasource
    else:
        datasource = 'NCBI'
    # simple logging, no levels, duplicate to console if verbose
    logfile = 'log.txt'
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    loghandler = logging.FileHandler(logfile, 'a')
    loghandler.setFormatter(logging.Formatter('%(message)s'))
    logger.addHandler(loghandler)
    if args.verbose:
        console = logging.StreamHandler()
        console.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(console)
    # log system info
    logSysInfo()
    resolver = Resolver(args.names, datasource, args.taxonid)
    resolver.main()
    resolver.write()
    logEndTime()
    if not args.verbose:
        print('\nComplete\n')
        print('No names file provided!')
        print('Type `TaxonNamesResolver.py -h` for help.')
        sys.exit()
    if not os.path.isfile(args.names):
        print('[{0}] could not be found!'.format(args.names))
        sys.exit()
    print('\n' + description + '\n')
    if args.datasource:
        datasource = args.datasource
    else:
        datasource = 'NCBI'
    # simple logging, no levels, duplicate to console if verbose
    logfile = 'log.txt'
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    loghandler = logging.FileHandler(logfile, 'a')
    loghandler.setFormatter(logging.Formatter('%(message)s'))
    logger.addHandler(loghandler)
    if args.verbose:
        console = logging.StreamHandler()
        console.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(console)
    # log system info
    logSysInfo()
    resolver = Resolver(args.names, datasource, args.taxonid)
    resolver.main()
    resolver.write()
    logEndTime()
    if not args.verbose:
        print('\nComplete\n')
Пример #6
0
# PACKAGES
from taxon_names_resolver import Resolver
from taxon_names_resolver import TaxDict
from taxon_names_resolver import taxTree

# EXAMPLE NAMES
terms = [
    'H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
    'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
    'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus'
]

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. H**o, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s
Пример #7
0
console.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(console)

# PACKAGES
from taxon_names_resolver import Resolver
from taxon_names_resolver import TaxDict
from taxon_names_resolver import taxTree

# EXAMPLE NAMES
terms = ['H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
         'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
         'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus']

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. H**o, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s