Пример #1
0
  def test_ncbiquery(self):
    ncbi = NCBITaxa(dbfile=DATABASE_PATH)

    id2name = ncbi.get_taxid_translator(['9606', '7507'])
    self.assertEqual(id2name[7507], 'Mantis religiosa')
    self.assertEqual(id2name[9606], 'H**o sapiens')

    name2id = ncbi.get_name_translator(['Mantis religiosa', 'h**o sapiens'])
    self.assertEqual(name2id['Mantis religiosa'], [7507])
    self.assertEqual(name2id['h**o sapiens'], [9606])

    name2id = ncbi.get_name_translator(['Bacteria'])
    self.assertEqual(set(name2id['Bacteria']), set([2, 629395]))

    out = ncbi.get_descendant_taxa("9605", intermediate_nodes=True)
    #Out[9]: [1425170, 741158, 63221, 9606]
    self.assertEqual(set(out), set([1425170, 741158, 63221, 9606]))
    
    out = ncbi.get_descendant_taxa("9605", intermediate_nodes=False)
    #Out[10]: [1425170, 741158, 63221]
    self.assertEqual(set(out), set([1425170, 741158, 63221]))
    
    out = ncbi.get_descendant_taxa("9605", intermediate_nodes=False, rank_limit="species")
    #Out[11]: [9606, 1425170]
    self.assertEqual(set(out), set([9606, 1425170]))
Пример #2
0
    def test_ncbiquery(self):
        ncbi = NCBITaxa(dbfile=DATABASE_PATH)

        id2name = ncbi.get_taxid_translator(["9606", "7507"])
        self.assertEqual(id2name[7507], "Mantis religiosa")
        self.assertEqual(id2name[9606], "H**o sapiens")

        name2id = ncbi.get_name_translator(["Mantis religiosa", "h**o sapiens"])
        self.assertEqual(name2id["Mantis religiosa"], 7507)
        self.assertEqual(name2id["h**o sapiens"], 9606)
Пример #3
0
def getNcbiTaxonomy():
	ncbi = NCBITaxa()	
	nameToTaxIdList = ncbi.get_name_translator(ORGANISM_NAMES_LIST)
        #print (str(nameToTaxIdList))
	with open (OUTPUT_FILE, "w") as outputFile:
		for name in ORGANISM_NAMES_LIST:
		#for name, taxIds in nameToTaxIdList.items():
			taxIds = nameToTaxIdList[name]
			for eachId in taxIds:
				lineage = ncbi.get_lineage(str(eachId))
				names = ncbi.get_taxid_translator(lineage)
				outputFile.write("\t".join([names[taxid] for taxid in lineage]) + "\n")
Пример #4
0
from pandas import DataFrame
from Bio import SeqIO
from pandas import Index
from ete2 import NCBITaxa

data_path = "/home/moritz/people/MoreData/genomes/img_od1s"
img_fasta = "/home/moritz/people/MoreData/raw_imgs/od1s.fasta"
img_xls = "/home/moritz/people/MoreData/raw_imgs/od1s.xls"
name = "parcu_from_img_"
taxDb = NCBITaxa()

contigs = DataFrame.from_csv(img_xls, sep="\t", header=0, index_col=0)
manual_taxo = taxDb.get_name_translator(['Candidatus Parcubacteria'
                                         ]).values()[0][0]
metadata = {
    name + str(g): {
        'IMG_ID': g,
        'name': name + str(g),
        'species_taxid': manual_taxo,
        'long_name': contigs.loc[contigs['Genome ID'] == g]['Genome'].iloc[0]
    }
    for g in set(contigs['Genome ID'])
}

seq_dict = {k: [] for k in metadata}

with open(img_fasta, "r") as file:
    for i, c in enumerate(SeqIO.parse(file, "fasta")):
        seq_dict[name + str(contigs.iloc[i]['Genome ID'])] += [c]
Пример #5
0
def run(args):
    # add lineage profiles/stats

    import re
    from ete2 import PhyloTree, NCBITaxa

    if not args.taxonomy and not args.info:
        args.taxonomy = True

    ncbi = NCBITaxa()

    all_taxids = {}
    all_names = set()
    queries = []

    if not args.search:
        log.error('Search terms should be provided (i.e. --search) ')
        sys.exit(-1)
    for n in args.search:
        queries.append(n)
        try:
            all_taxids[int(n)] = None
        except ValueError:
            all_names.add(n.strip())

    # translate names
    name2tax = ncbi.get_name_translator(all_names)
    all_taxids.update([(v, None) for v in name2tax.values()])

    not_found_names = all_names - set(name2tax.keys())
    if args.fuzzy and not_found_names:
        log.warn("%s unknown names", len(not_found_names))
        for name in not_found_names:
            # enable extension loading
            tax, realname, sim = ncbi.get_fuzzy_name_translation(
                name, args.fuzzy)
            if tax:
                all_taxids[tax] = None
                name2tax[name] = tax
                name2realname[name] = realname
                name2score[name] = "Fuzzy:%0.2f" % sim

    if args.taxonomy:
        log.info("Dumping NCBI taxonomy of %d taxa..." % (len(all_taxids)))
        t = ncbi.get_topology(all_taxids.keys(),
                              intermediate_nodes=args.full_lineage,
                              rank_limit=args.rank_limit,
                              collapse_subspecies=args.collapse_subspecies)

        id2name = ncbi.get_taxid_translator([n.name for n in t.traverse()])
        for n in t.traverse():
            n.add_features(taxid=n.name)
            n.add_features(sci_name=str(id2name.get(int(n.name), "?")))
            n.name = "%s - %s" % (id2name.get(int(n.name), n.name), n.name)
            lineage = ncbi.get_lineage(n.taxid)
            n.add_features(
                named_lineage='|'.join(ncbi.translate_to_names(lineage)))
        dump(t,
             features=[
                 "taxid", "name", "rank", "bgcolor", "sci_name",
                 "collapse_subspecies", "named_lineage"
             ])
    elif args.info:
        print '# ' + '\t'.join(
            ["Taxid", "Sci.Name", "Rank", "Named Lineage", "Taxid Lineage"])
        translator = ncbi.get_taxid_translator(all_taxids)
        ranks = ncbi.get_rank(all_taxids)
        for taxid, name in translator.iteritems():
            lineage = ncbi.get_lineage(taxid)
            named_lineage = ','.join(ncbi.translate_to_names(lineage))
            lineage_string = ','.join(map(str, lineage))
            print '\t'.join([
                str(taxid), name,
                ranks.get(taxid, ''), named_lineage, lineage_string
            ])
Пример #6
0
def run(args):
    # add lineage profiles/stats
    
    import re
    from ete2 import PhyloTree, NCBITaxa

    # dump tree by default
    if not args.tree and not args.info and not args.descendants:
        args.tree = True
    
    ncbi = NCBITaxa()

    all_taxids = {}
    all_names = set()
    queries = []

    if not args.search:
        log.error('Search terms should be provided (i.e. --search) ')
        sys.exit(-1)
    for n in args.search:
        queries.append(n)
        try:
            all_taxids[int(n)] = None
        except ValueError:
            all_names.add(n.strip())
            
    # translate names
    name2tax = ncbi.get_name_translator(all_names)
    all_taxids.update([(v, None) for v in name2tax.values()])

    not_found_names = all_names - set(name2tax.keys())
    if args.fuzzy and not_found_names:
        log.warn("%s unknown names", len(not_found_names))
        for name in not_found_names:
            # enable extension loading
            tax, realname, sim = ncbi.get_fuzzy_name_translation(name, args.fuzzy)
            if tax:
                all_taxids[tax] = None
                name2tax[name] = tax
                name2realname[name] = realname
                name2score[name] = "Fuzzy:%0.2f" %sim
                
    if not_found_names:
        log.warn("[%s] could not be translated into taxids!" %','.join(not_found_names))
                
    if args.tree:
        if len(all_taxids) == 1:
            target_taxid = all_taxids.keys()[0]
            log.info("Dumping NCBI descendants tree for %s" %(target_taxid))
            t = ncbi.get_descendant_taxa(target_taxid, collapse_subspecies=args.collapse_subspecies, rank_limit=args.rank_limit, return_tree=True)
        else:
            log.info("Dumping NCBI taxonomy of %d taxa..." %(len(all_taxids)))
            t = ncbi.get_topology(all_taxids.keys(),
                              intermediate_nodes=args.full_lineage,
                              rank_limit=args.rank_limit,
                              collapse_subspecies=args.collapse_subspecies)
        
        id2name = ncbi.get_taxid_translator([n.name for n in t.traverse()])        
        for n in t.traverse():
            n.add_features(taxid=n.name)
            n.add_features(sci_name=str(id2name.get(int(n.name), "?")))
            n.name = "%s - %s" %(id2name.get(int(n.name), n.name), n.name)
            lineage = ncbi.get_lineage(n.taxid)
            n.add_features(named_lineage = '|'.join(ncbi.translate_to_names(lineage)))
        dump(t, features=["taxid", "name", "rank", "bgcolor", "sci_name",
                          "collapse_subspecies", "named_lineage"])
    elif args.descendants:
        log.info("Dumping NCBI taxonomy of %d taxa..." %(len(all_taxids)))
        print '# ' + '\t'.join(["Taxid", "Sci.Name", "Rank", "descendant_taxids", "descendant_names"])
        translator = ncbi.get_taxid_translator(all_taxids)
        ranks = ncbi.get_rank(all_taxids)         
        for taxid in all_taxids:
            descendants = ncbi.get_descendant_taxa(taxid, collapse_subspecies=args.collapse_subspecies, rank_limit=args.rank_limit)
            print '\t'.join([str(taxid), translator.get(taxid, taxid), ranks.get(taxid, ''),
                             '|'.join(map(str, descendants)),
                             '|'.join(map(str, ncbi.translate_to_names(descendants)))])
        
    elif args.info:
        print '# ' + '\t'.join(["Taxid", "Sci.Name", "Rank", "Named Lineage", "Taxid Lineage"])
        translator = ncbi.get_taxid_translator(all_taxids)
        
        ranks = ncbi.get_rank(all_taxids) 
        for taxid, name in translator.iteritems():
            lineage = ncbi.get_lineage(taxid)            
            named_lineage = ','.join(ncbi.translate_to_names(lineage))
            lineage_string = ','.join(map(str, lineage))
            print '\t'.join([str(taxid), name, ranks.get(taxid, ''), named_lineage, lineage_string])
Пример #7
0
from ete2 import NCBITaxa
from ete2 import Tree, TreeStyle, AttrFace

ncbi = NCBITaxa()

input = [l.rstrip("\n") for l in open("db/example_input", "r")]

taxid = ncbi.get_name_translator(input)
tree  = ncbi.get_topology(taxid.values())

#print tree.get_ascii(attributes=["sci_name", "rank", "taxid"])

# custom layout: adds "rank" on top of branches, and sci_name as tip names
def my_layout(node):
    if getattr(node, "rank", None):
        rank_face = AttrFace("rank", fsize=7, fgcolor="indianred")
        node.add_face(rank_face, column=0, position="branch-top")
    if node.is_leaf():
        sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue")
        node.add_face(sciname_face, column=0, position="branch-right")

ts = TreeStyle()
ts.layout_fn = my_layout
ts.show_leaf_name = False

tree.render("tree.pdf", tree_style=ts)
Пример #8
0
from ete2 import NCBITaxa
from ete2 import Tree, TreeStyle, AttrFace

ncbi = NCBITaxa()

input = [l.rstrip("\n") for l in open("db/example_input", "r")]

taxid = ncbi.get_name_translator(input)
tree = ncbi.get_topology(taxid.values())

#print tree.get_ascii(attributes=["sci_name", "rank", "taxid"])


# custom layout: adds "rank" on top of branches, and sci_name as tip names
def my_layout(node):
    if getattr(node, "rank", None):
        rank_face = AttrFace("rank", fsize=7, fgcolor="indianred")
        node.add_face(rank_face, column=0, position="branch-top")
    if node.is_leaf():
        sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue")
        node.add_face(sciname_face, column=0, position="branch-right")


ts = TreeStyle()
ts.layout_fn = my_layout
ts.show_leaf_name = False

tree.render("tree.pdf", tree_style=ts)