print('No names file provided!')
        print('Type `TaxonNamesResolver.py -h` for help.')
        sys.exit()
    if not os.path.isfile(args.names):
        print('[{0}] could not be found!'.format(args.names))
        sys.exit()
    print('\n' + description + '\n')
    if args.datasource:
        datasource = args.datasource
    else:
        datasource = 'NCBI'
    # simple logging, no levels, duplicate to console if verbose
    logfile = 'log.txt'
    logger = logging.getLogger('')
    logger.setLevel(logging.INFO)
    loghandler = logging.FileHandler(logfile, 'a')
    loghandler.setFormatter(logging.Formatter('%(message)s'))
    logger.addHandler(loghandler)
    if args.verbose:
        console = logging.StreamHandler()
        console.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(console)
    # log system info
    logSysInfo()
    resolver = Resolver(args.names, datasource, args.taxonid)
    resolver.main()
    resolver.write()
    logEndTime()
    if not args.verbose:
        print('\nComplete\n')
Пример #2
0
def run(wd=os.getcwd(), logger=logging.getLogger('')):
    # PRINT STAGE
    logger.info("Stage 1: Names resolution")

    # DIRS
    outdir = os.path.join(wd, '1_names')
    temp_dir = os.path.join(wd, 'tempfiles')
    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    # INPUT
    with open(os.path.join(temp_dir, "paradict.p"), "rb") as file:
        paradict = pickle.load(file)
    with open(os.path.join(temp_dir, "terms.p"), "rb") as file:
        terms = pickle.load(file)

    # PARAMETERS
    outgroupid = paradict["outgroupid"]
    ntools.etools.Entrez.email = paradict["email"]
    minspecies = int(paradict["minspecies"])
    taxonomy = paradict["taxonomic_constraint"]
    taxonomy = taxonomy.split('-')
    ntools.logger = logger

    # PROCESS
    logger.info('Searching for taxids ....')
    logger.info('------TaxonNamesResolver:Start------')
    try:
        parentid = paradict["parentid"]
    except:
        parentid = False
    if len(terms) < minspecies:
        raise TooFewSpeciesError
    resolver = Resolver(terms=terms, datasource="NCBI", taxon_id=parentid,
                        logger=logger)
    resolver.main()
    if len(resolver.retrieve('query_name')) < minspecies:
        raise TooFewSpeciesError
    logger.info('------TaxonNamesResolver:End------')
    logger.info("Generating names dictionary ....")
    namesdict, allrankids, parentid = ntools.genNamesDict(resolver=resolver,
                                                          parentid=parentid,
                                                          logger=logger)
    logger.info("Finding an outgroup ....")
    namesdict = ntools.getOutgroup(namesdict=namesdict, parentid=parentid,
                                   outgroupid=outgroupid, logger=logger)
    # add outgroup ids to allrankids
    allrankids.extend(namesdict['outgroup']['txids'])
    logger.info('Generating taxonomic tree ....')
    taxontree = ntools.genTaxTree(resolver=resolver, namesdict=namesdict,
                                  taxonomy=taxonomy, logger=logger)

    # OUTPUT
    # remove temp TNR folder
    shutil.rmtree("resolved_names")
    # write out changes to hidden pickled files
    with open(os.path.join(temp_dir, "namesdict.p"), "wb") as file:
        pickle.dump(namesdict, file)
    with open(os.path.join(temp_dir, "allrankids.p"), "wb") as file:
        pickle.dump(allrankids, file)
    # write namesdict as csv
    ntools.writeNamesDict(outdir, namesdict)
    # write taxon tree
    ntools.Phylo.write(taxontree, os.path.join(outdir, "taxontree.tre"),
                       "newick")

    # FINISH MESSAGE
    logger.info('Stage finished. Resolved [{0}] names including outgroup.'.
                format(len(namesdict.keys())))
Пример #3
0
# PACKAGES
from taxon_names_resolver import Resolver
from taxon_names_resolver import TaxDict
from taxon_names_resolver import taxTree

# EXAMPLE NAMES
terms = [
    'H**o sapiens', 'Gorilla gorilla', 'Pongo pongo', 'Macca mulatta',
    'Mus musculus', 'Ailuropoda melanoleuca', 'Ailurus fulgens',
    'Chlorotalpa tytonis', 'Arabidopsis thaliana', 'Bacillus subtilus'
]

# RESOLVE
# pass the terms, the datasource and the logger (optional)
resolver = Resolver(terms=terms, datasource="NCBI", logger=logger)
resolver.main()  # resolve!

# CREATE TAXDICT
# extract the unique names for each term ('idents', query_name is best as it is
#  guaranteed to be unique)
idents = resolver.retrieve('query_name')
# extract the lists of names for all known parental taxonomic groups for each
#  term ('lineages', e.g. H**o, Primate, Mammalia)
lineages = resolver.retrieve('classification_path')
# for Taxonomic IDs instead of names, use:
#  lineages = resolver.retrieve('classification_path_ids')
# extract the lists of corresponding rank names for 'lineages' ('ranks', e.g.
#  species, genus etc.) for each entity
ranks = resolver.retrieve('classification_path_ranks')
# optional extra data slots are also possible, for example a list of 1s and 0s