def add_taxon2(taxon): try: resolver = Resolver(terms=[taxon]) resolver.main() except: return(taxon+' not found by Global Names Resolver.') print(resolver.__dict__) taxon_list = resolver.retrieve('classification_path')[0] print(taxon_list) rank_list = resolver.retrieve('classification_path_ranks')[0] print(rank_list) try: node = Taxon.objects.get(name='root') except Taxon.DoesNotExist: node = Taxon.objects.create(name='root') for i in range(len(taxon_list)): if rank_list[i] != '': try: node = Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node) print(taxon_list[i]+' added to local database.') except IntegrityError: node = Taxon.objects.get(name=taxon_list[i]) print(taxon_list[i]+' not added to local database; already there.') pass
def add_taxon(taxon): try: Taxon.objects.get(name=taxon) return(taxon+' is already in local database.') except Taxon.DoesNotExist: try: resolver = Resolver(terms=[taxon]) resolver.main() except: return(taxon+' not found by Global Names Resolver.') taxon_list = resolver.retrieve('classification_path')[0] rank_list = resolver.retrieve('classification_path_ranks')[0] for i in range(len(taxon_list)): try: node = Taxon.objects.get(name=taxon_list[i])[0] except Taxon.DoesNotExist: if i==0: Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=None) else: Taxon.objects.create(name=taxon_list[i], rank=rank_list[i], parent=node) return(taxon_list, rank_list)
def run(wd=os.getcwd(), logger=logging.getLogger('')): # PRINT STAGE logger.info("Stage 1: Names resolution") # DIRS outdir = os.path.join(wd, '1_names') temp_dir = os.path.join(wd, 'tempfiles') if not os.path.isdir(outdir): os.mkdir(outdir) # INPUT with open(os.path.join(temp_dir, "paradict.p"), "rb") as file: paradict = pickle.load(file) with open(os.path.join(temp_dir, "terms.p"), "rb") as file: terms = pickle.load(file) # PARAMETERS outgroupid = paradict["outgroupid"] ntools.etools.Entrez.email = paradict["email"] minspecies = int(paradict["minspecies"]) taxonomy = paradict["taxonomic_constraint"] taxonomy = taxonomy.split('-') ntools.logger = logger # PROCESS logger.info('Searching for taxids ....') logger.info('------TaxonNamesResolver:Start------') try: parentid = paradict["parentid"] except: parentid = False if len(terms) < minspecies: raise TooFewSpeciesError resolver = Resolver(terms=terms, datasource="NCBI", taxon_id=parentid, logger=logger) resolver.main() if len(resolver.retrieve('query_name')) < minspecies: raise TooFewSpeciesError logger.info('------TaxonNamesResolver:End------') logger.info("Generating names dictionary ....") namesdict, allrankids, parentid = ntools.genNamesDict(resolver=resolver, parentid=parentid, logger=logger) logger.info("Finding an outgroup ....") namesdict = ntools.getOutgroup(namesdict=namesdict, parentid=parentid, outgroupid=outgroupid, logger=logger) # add outgroup ids to allrankids allrankids.extend(namesdict['outgroup']['txids']) logger.info('Generating taxonomic tree ....') taxontree = ntools.genTaxTree(resolver=resolver, namesdict=namesdict, taxonomy=taxonomy, logger=logger) # OUTPUT # remove temp TNR folder shutil.rmtree("resolved_names") # write out changes to hidden pickled files with open(os.path.join(temp_dir, "namesdict.p"), "wb") as file: pickle.dump(namesdict, file) with open(os.path.join(temp_dir, "allrankids.p"), "wb") as file: pickle.dump(allrankids, file) # write namesdict as csv ntools.writeNamesDict(outdir, namesdict) # write taxon tree ntools.Phylo.write(taxontree, os.path.join(outdir, "taxontree.tre"), "newick") # FINISH MESSAGE logger.info('Stage finished. Resolved [{0}] names including outgroup.'. format(len(namesdict.keys())))
print('No names file provided!') print('Type `TaxonNamesResolver.py -h` for help.') sys.exit() if not os.path.isfile(args.names): print('[{0}] could not be found!'.format(args.names)) sys.exit() print('\n' + description + '\n') if args.datasource: datasource = args.datasource else: datasource = 'NCBI' # simple logging, no levels, duplicate to console if verbose logfile = 'log.txt' logger = logging.getLogger('') logger.setLevel(logging.INFO) loghandler = logging.FileHandler(logfile, 'a') loghandler.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(loghandler) if args.verbose: console = logging.StreamHandler() console.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(console) # log system info logSysInfo() resolver = Resolver(args.names, datasource, args.taxonid) resolver.main() resolver.write() logEndTime() if not args.verbose: print('\nComplete\n')
# PACKAGES from taxon_names_resolver import Resolver from taxon_names_resolver import TaxDict from taxon_names_resolver import taxTree # EXAMPLE NAMES terms = [ 'H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta', 'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens', 'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus' ] # RESOLVE # pass the terms, the datasource and the logger (optional) resolver = Resolver(terms=terms, datasource="NCBI", logger=logger) resolver.main() # resolve! # CREATE TAXDICT # extract the unique names for each term ('idents', query_name is best as it is # guaranteed to be unique) idents = resolver.retrieve('query_name') # extract the lists of names for all known parental taxonomic groups for each # term ('lineages', e.g. H**o, Primate, Mammalia) lineages = resolver.retrieve('classification_path') # for Taxonomic IDs instead of names, use: # lineages = resolver.retrieve('classification_path_ids') # extract the lists of corresponding rank names for 'lineages' ('ranks', e.g. # species, genus etc.) for each entity ranks = resolver.retrieve('classification_path_ranks') # optional extra data slots are also possible, for example a list of 1s and 0s
console.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(console) # PACKAGES from taxon_names_resolver import Resolver from taxon_names_resolver import TaxDict from taxon_names_resolver import taxTree # EXAMPLE NAMES terms = ['H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta', 'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens', 'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus'] # RESOLVE # pass the terms, the datasource and the logger (optional) resolver = Resolver(terms=terms, datasource="NCBI", logger=logger) resolver.main() # resolve! # CREATE TAXDICT # extract the unique names for each term ('idents', query_name is best as it is # guaranteed to be unique) idents = resolver.retrieve('query_name') # extract the lists of names for all known parental taxonomic groups for each # term ('lineages', e.g. H**o, Primate, Mammalia) lineages = resolver.retrieve('classification_path') # for Taxonomic IDs instead of names, use: # lineages = resolver.retrieve('classification_path_ids') # extract the lists of corresponding rank names for 'lineages' ('ranks', e.g. # species, genus etc.) for each entity ranks = resolver.retrieve('classification_path_ranks') # optional extra data slots are also possible, for example a list of 1s and 0s