def construct_tree(taxon, db, includespecies, taxalist=None): conn = sqlite3.connect(db) c = conn.cursor() includelist = None if taxalist != None: tl = set() tlf = open(taxalist, "r") for i in tlf: tl.add(i) tlf.close() includelist = get_all_included(tl, c) species = [] stack = [] done = set() rt = None nodes = {} # id is key, value is node if (taxon.isdigit()): c.execute("select ncbi_id from taxonomy where ncbi_id = ?", (taxon, )) else: c.execute( "select ncbi_id from taxonomy where name = ? and node_rank != 'species'", (taxon, )) for j in c: stack.append(str(j[0])) rt = node.Node() rt.label = taxon + "_" + str(j[0]) rt.data["id"] = str(j[0]) nodes[str(j[0])] = rt while len(stack) > 0: id = stack.pop() if id in done: continue done.add(id) if includespecies: c.execute( "select ncbi_id,name,name_class,edited_name from taxonomy where parent_ncbi_id = ?", (id, )) else: c.execute( "select ncbi_id,name,name_class,edited_name from taxonomy where parent_ncbi_id = ? and node_rank != 'species'", (id, )) childs = [] for j in c: tid = str(j[0]) if includelist != None and tid not in includelist: continue childs.append(tid) stack.append(tid) if str(j[2]) == "scientific name": name = str(j[1]) edname = str(j[3]) nn = node.Node() nn.label = clean_name(edname) + "_" + str(tid) nn.data["id"] = tid nodes[tid] = nn nn.parent = nodes[id] nodes[id].add_child(nn) if len(childs) == 0 and id not in species: species.append(id) return rt
os.system(cmd) return newalns if __name__ == "__main__": if len(sys.argv) != 2: print "python " + sys.argv[0] + " startdir" sys.exit(0) cld = sys.argv[1] #take off the trailing slash if there is one if cld[-1] == "/": cld = cld[0:-1] count = 0 tree = node.Node() nodes = {} firstnode = True #build a tree from the directory for root, dirs, files in os.walk(cld, topdown=True): if "clusters" in root: continue if "clusters" in dirs: if firstnode == True: tree.label = root.split("/")[-1] firstnode = False nodes[root.split("/")[-1]] = tree nd = nodes[root.split("/")[-1]] nd.data["dir"] = root nd.data["names"] = set()
def construct_tree_only_ids(baseid, c, ids): species = [] stack = [] done = set() rt = None includelist = get_all_included(ids, c) # node_ids = {} # id is key, value is parent id nodes = {} # id is key, value is node stack.append(str(baseid)) rt = node.Node() rt.label = baseid + "_" + str(baseid) rt.data["id"] = str(baseid) nodes[str(baseid)] = rt while len(stack) > 0: id = stack.pop() if id in done: continue done.add(id) c.execute( "select ncbi_id,name,name_class,edited_name from taxonomy where parent_ncbi_id = ?", (id, )) childs = [] for j in c: tid = str(j[0]) if includelist != None and tid not in includelist: continue childs.append(tid) stack.append(tid) if str(j[2]) == "scientific name": name = str(j[1]) edname = str(j[3]) nn = node.Node() nn.label = clean_name(name) #+"_"+str(tid) nn.data["id"] = tid nodes[tid] = nn # node_ids[tid] = id nn.parent = nodes[id] nodes[id].add_child(nn) if len(childs) == 0 and id not in species: species.append(id) for i in rt.iternodes(): if len(i.children) == 0: continue else: i.label = "" going = True while going: found = False for i in rt.iternodes(): if i.parent != None and len(i.children) == 1 and i.label == "": par = i.parent ch = i.children[0] par.remove_child(i) par.add_child(ch) found = True break if found == False: going = False break return rt
sys.stderr.write("here\n") while len(diffnms) > 0: for j in diffnms: going = True cn = diffnds[j] while going: par = cn.parent pln = set(par.lvsnms()).intersection(rootnms) if len(pln) > 0: amrca = tree_utils.get_mrca_wnms(pln,tree1) #if VERBOSE: # sys.stderr.write("add at this node"+" "+par.get_newick_repr(False)+" "+amrca.get_newick_repr(False)+"\n") if len(pln) == 1: amrca = tree1.get_leaf_by_name(list(pln)[0]) #print "f",amrca.get_newick_repr(True) nn = node.Node() if EDITLEN: nn.length = amrca.length/2. nn.height = amrca.height+amrca.length/2. amrca.length = nn.length amrca.parent.add_child(nn) amrca.parent.remove_child(amrca) nn.add_child(amrca) amrca = nn for k in par.children: if len(set(k.lvsnms()).intersection(rootnms)) > 0: continue else: #tree_utils.set_heights(amrca) #print "a",k.get_newick_repr(True),amrca.length,amrca.get_newick_repr(True),amrca.height if EDITLEN:
def construct_tree_only_ids(baseid, c, ids): species = [] stack = [] done = set() rt = None includelist = get_all_included(ids, c) # node_ids = {} # id is key, value is parent id nodes = {} # id is key, value is node stack.append(str(baseid)) rt = node.Node() rt.label = baseid + "_" + str(baseid) rt.data["id"] = str(baseid) nodes[str(baseid)] = rt while len(stack) > 0: id = stack.pop() if id in done: continue done.add(id) c.execute( "select ncbi_id,name_class,name,node_rank from taxonomy where parent_ncbi_id = ?", (id, )) for j in c: tid = str(j[0]) if includelist != None and tid not in includelist: continue stack.append(tid) if str(j[1]) == "scientific name" and (noinclude == False or str(j[3]) != stopat): nn = node.Node() nn.label = str(tid) nn.data["id"] = tid nn.data["rank"] = str(j[3]) nodes[tid] = nn # node_ids[tid] = id if id in ids or "incertae" in str( j[2]) or "unidentified" in str( j[2]) or "unplaced" in str(j[2]): #should lose these and not constrain nodes[tid] = nodes[id] #nn.parent = nodes[id].parent #nodes[id].parent.add_child(nn) else: nn.parent = nodes[id] nodes[id].add_child(nn) elif (noinclude == True and str(j[3]) == stopat) and str( j[1]) == "scientific name": if id in ids: nodes[tid] = nodes[id].parent else: nodes[tid] = nodes[id] if useonly: #remove any constraint that insn't in the list toremove = set() for i in rt.iternodes(): if len(i.children) == 0 or i == rt: continue else: if i.data["rank"] not in useonlylist: toremove.add(i) for i in toremove: p = i.parent if p == None: continue p.remove_child(i) i.parent = None for j in i.children: p.add_child(j) j.parent = p for i in rt.iternodes(): if len(i.children) == 0: continue else: i.label = "" going = True while going: found = False for i in rt.iternodes(): if i.parent != None and len(i.children) == 1 and i.label == "": par = i.parent ch = i.children[0] par.remove_child(i) par.add_child(ch) found = True break if found == False: going = False break if len(rt.children) == 1: rt = rt.children[0] return rt