def parse_trees(e, otus): "e is a nexml document parsed by etree" from ivy.tree import Node #from tree import Node v = [] for tb in e.findall(NEXML+"trees"): for te in tb.findall(NEXML+"tree"): t = Storage() t.attrib = Storage(te.attrib) t.nodes = {} for n in te.findall(NEXML+"node"): node = Node() if n.attrib.get("otu"): node.isleaf = True node.otu = otus[n.attrib["otu"]] node.label = node.otu.label t.nodes[n.attrib["id"]] = node for edge in te.findall(NEXML+"edge"): d = edge.attrib n = t.nodes[d["target"]] p = t.nodes[d["source"]] length = d.get("length") if length: n.length = float(length) p.add_child(n) r = [ n for n in t.nodes.values() if not n.parent ] assert len(r)==1 r = r[0] r.isroot = True for i, n in enumerate(r): n.id = i+1 t.root = r v.append(t) return v
def parse_trees(e, otus): "e is a nexml document parsed by etree" from ivy.tree import Node #from tree import Node v = [] for tb in e.findall(NEXML + "trees"): for te in tb.findall(NEXML + "tree"): t = Storage() t.attrib = Storage(te.attrib) t.nodes = {} for n in te.findall(NEXML + "node"): node = Node() if n.attrib.get("otu"): node.isleaf = True node.otu = otus[n.attrib["otu"]] node.label = node.otu.label t.nodes[n.attrib["id"]] = node for edge in te.findall(NEXML + "edge"): d = edge.attrib n = t.nodes[d["target"]] p = t.nodes[d["source"]] length = d.get("length") if length: n.length = float(length) p.add_child(n) r = [n for n in t.nodes.values() if not n.parent] assert len(r) == 1 r = r[0] r.isroot = True for i, n in enumerate(r): n.id = i + 1 t.root = r v.append(t) return v
def nextbacktree(t, i, q=None, lim=0, maxdepth=None): """ create a tree of ivy.tree.Nodes from table t, with records having fields next, back, parent, and depth """ db = t._db r = t[int(i)] limits = dict(orderby=t.next) if lim: limits["limitby"] = (0,lim) q2 = (t.next>r.next)&(t.back<r.back) if maxdepth: q2 &= (t.depth <= maxdepth) recs = db(q)(q2).select(**limits) #Node = ivy.tree.Node root = Node(); root.isroot = True root.rec = r; root.id = r.id i2n = {}; i2n[r.id] = root for r in recs: n = Node() n.rec = r; n.id = r.id; n.next = r.next; n.back = r.back if (((not maxdepth) and (r.next==r.back-1)) or (maxdepth and (n.depth == maxdepth))): n.isleaf = True i2n[r.id] = n i2n[r.parent].add_child(n) return root
def buildtree(t, otu_id2data): from ivy.tree import Node if t.has_key('nodeById'): # newer Nexson 1.2.1 node_id2data = t['nodeById'] else: # older Nexson 1.0.0 node_id2data = {} for n in t['node']: node_id2data[ n['@id'] ] = n root = None for i, d in node_id2data.iteritems(): n = Node() n.snode_id = i n.taxid = None if d.get('@root'): n.isroot = True root = n oid = d.get('@otu') if oid: n.isleaf = True try: n.otu = otu_id2data[oid] n.label = (n.otu.get('^ot:ottTaxonName') or n.otu.get('^ot:originalLabel')) n.taxid = n.otu.get('^ot:ottId') except KeyError: pass ## print t['nexson_file'], 'missing', oid n.nexson_id = i d['node'] = n if t.has_key('edgeBySourceId'): # newer Nexson 1.2.1 edges_by_source_id = t['edgeBySourceId'] else: # older Nexson 1.0.0 edges_by_source_id = {} for e in t['edge']: if not edges_by_source_id.has_key( e['@source'] ): edges_by_source_id[ e['@source'] ] = {} edges_by_source_id[ e['@source'] ][ e['@id'] ] = e for nid, ed in edges_by_source_id.iteritems(): n = node_id2data[nid] for e in ed.itervalues(): cd = node_id2data[e['@target']] c = cd['node'] c.length = e.get('@length') n['node'].add_child(c) root.tree_nexson = t root.stree = t['treeid'] root.ladderize() ivy.tree.index(root) return root
def getSTree( db, treeId ): root = None parentStack = [] for snodeRow in db( db.snode.tree == treeId ).select( db.snode.ALL, db.taxon.ALL, left = db.taxon.on( db.taxon.id == db.snode.taxon ), orderby = db.snode.next ).as_list(): node = Node() node.id = snodeRow['snode']['id']; node.next = snodeRow['snode']['next']; node.back = snodeRow['snode']['back']; node.length = snodeRow['snode']['length'] node.label = snodeRow['snode']['label']; node.taxon = snodeRow['taxon']['name']; node.depth = len( parentStack ) if( node.next - node.back == 1 ): node.isleaf = True if( node.next == 1 ): node.isroot = True root = node parentStack.append( node ) continue while( node.back > parentStack[ -1 ].back ): parentStack.pop() parentStack[ -1 ].add_child( node ) parentStack.append( node ) return root
def getGTree( db, treeId ): root = None parentStack = [] for gnodeRow in db( ( db.gnode.tree == treeId ) & ( db.gnode.pruned == False ) ).select( db.gnode.ALL, orderby = db.gnode.next ).as_list(): node = Node() node.id = gnodeRow['id']; node.next = gnodeRow['next']; node.back = gnodeRow['back'] node.label = gnodeRow['label']; node.depth = len( parentStack ) if( node.next - node.back == 1 ): node.isleaf = True if( node.next == 1 ): node.isroot = True root = node parentStack.append( node ) continue while( node.back > parentStack[ -1 ].back ): parentStack.pop() parentStack[ -1 ].add_child( node ) parentStack.append( node ) return root
def getNodeFromRowData( row ): node = Node() node.id = row[0]; node.next = row[1]; node.back = row[2]; node.length = row[3]; node.label = row[4] node.taxon = row[5]; return node
def getNodeFromRowData(row): node = Node() node.id = row[0] node.next = row[1] node.back = row[2] node.length = row[3] node.label = row[4] node.taxon = row[5] return node
def reindex_from_parents(t): db = t._db rec2node = dict([(rec.id, Node(rec=rec, isleaf=True)) for rec in db(t.id > 0).select()]) root = None for r, n in rec2node.items(): if n.rec.parent is not None: p = rec2node[n.rec.parent] p.add_child(n) p.isleaf = False else: root = n index(root) for n in root: n.rec.update_record(next=n.next, back=n.back, depth=n.depth) return root
def getGClade( db, rootRec, collapsedNodeRecs ): cladeRoot = None parentStack = [] query = db( ( db.gnode.tree == rootRec.tree ) & ( db.gnode.next >= rootRec.next ) & ( db.gnode.back <= rootRec.back ) ) for rec in collapsedNodeRecs: query=query( ~( ( db.gnode.next > rec['gnode']['next'] ) & ( db.gnode.back < rec['gnode']['back'] ) ) ) for gnodeRow in query.select( db.gnode.ALL, orderby = db.gnode.next ).as_list(): node = Node() node.id = gnodeRow['id']; node.next = gnodeRow['next']; node.back = gnodeRow['back'] node.label = gnodeRow['label']; node.depth = len( parentStack ); node.text = node.label if( node.next - node.back == 1 ): node.isleaf = True if( node.next == rootRec.next ): cladeRoot = node parentStack.append( node ) continue while( node.back > parentStack[ -1 ].back ): parentStack.pop() node.parent = parentStack[ -1 ] parentStack[ -1 ].add_child( node ) parentStack.append( node ) return cladeRoot
def getSClade( db, rootRec, collapsedNodeRecs ): cladeRoot = None parentStack = [] query = db( ( db.snode.tree == rootRec.tree ) & ( db.snode.next >= rootRec.next ) & ( db.snode.back <= rootRec.back ) ) for rec in collapsedNodeRecs: query=query( ~( ( db.snode.next > rec['snode']['next'] ) & ( db.snode.back < rec['snode']['back'] ) ) ) for snodeRow in query.select( db.snode.ALL, db.taxon.ALL, left = db.taxon.on( db.taxon.id == db.snode.taxon ), orderby = db.snode.next ).as_list(): node = Node() node.id = snodeRow['snode']['id']; node.next = snodeRow['snode']['next']; node.back = snodeRow['snode']['back']; node.length = snodeRow['snode']['length'] node.label = snodeRow['snode']['label']; node.taxon = snodeRow['taxon']['name']; node.depth = len( parentStack ); node.text = node.taxon if node.taxon else node.label if( node.next - node.back == 1 ): node.isleaf = True if( node.next == rootRec.next ): cladeRoot = node parentStack.append( node ) continue while( node.back > parentStack[ -1 ].back ): parentStack.pop() node.parent = parentStack[ -1 ] parentStack[ -1 ].add_child( node ) parentStack.append( node ) return cladeRoot
def nextbacktree(t, i, q=None, lim=0, maxdepth=None): """ create a tree of ivy.tree.Nodes from table t, with records having fields next, back, parent, and depth """ db = t._db r = t[int(i)] limits = dict(orderby=t.next) if lim: limits["limitby"] = (0, lim) q2 = (t.next > r.next) & (t.back < r.back) if maxdepth: q2 &= (t.depth <= maxdepth) recs = db(q)(q2).select(**limits) #Node = ivy.tree.Node root = Node() root.isroot = True root.rec = r root.id = r.id i2n = {} i2n[r.id] = root for r in recs: n = Node() n.rec = r n.id = r.id n.next = r.next n.back = r.back if (((not maxdepth) and (r.next == r.back - 1)) or (maxdepth and (n.depth == maxdepth))): n.isleaf = True i2n[r.id] = n i2n[r.parent].add_child(n) return root
def parse(data, ttable=None, treename=None): """ Parse a newick string. *data* is any file-like object that can be coerced into shlex, or a string (converted to StringIO) *ttable* is a dictionary mapping node labels in the newick string to other values. Returns: the root node. """ from ivy.tree import Node if type(data) in types.StringTypes: data = StringIO(data) start_pos = data.tell() tokens = Tokenizer(data) node = None root = None lp = 0 rp = 0 rooted = 1 previous = None ni = 0 # node id counter (preorder) - zero-based indexing li = 0 # leaf index counter ii = 0 # internal node index counter pi = 0 # postorder sequence while 1: token = tokens.get_token() #print token, if token == ';' or token == tokens.eof: assert lp == rp, \ "unbalanced parentheses in tree description: (%s, %s)" \ % (lp, rp) break # internal node elif token == '(': lp = lp + 1 newnode = Node() newnode.ni = ni ni += 1 newnode.isleaf = False newnode.ii = ii ii += 1 newnode.treename = treename if node: if node.children: newnode.left = node.children[-1].right + 1 else: newnode.left = node.left + 1 node.add_child(newnode) else: newnode.left = 1 newnode.right = 2 newnode.right = newnode.left + 1 node = newnode elif token == ')': rp = rp + 1 node = node.parent node.pi = pi pi += 1 if node.children: node.right = node.children[-1].right + 1 elif token == ',': node = node.parent if node.children: node.right = node.children[-1].right + 1 # branch length elif token == ':': token = tokens.get_token() if token == '[': node.length_comment = tokens.parse_embedded_comment() token = tokens.get_token() if not (token == ''): try: brlen = float(token) except ValueError: raise ValueError, ("invalid literal for branch length, " "'%s'" % token) else: raise 'NewickError', \ 'unexpected end-of-file (expecting branch length)' node.length = brlen # comment elif token == '[': node.comment = tokens.parse_embedded_comment() if node.comment[0] == '&': # metadata meta = META.findall(node.comment[1:]) if meta: node.meta = {} for k, v in meta: v = eval(v.replace('{', '(').replace('}', ')')) node.meta[k] = v # leaf node or internal node label else: if previous != ')': # leaf node if ttable: try: ttoken = (ttable.get(int(token)) or ttable.get(token)) except ValueError: ttoken = ttable.get(token) if ttoken: token = ttoken newnode = Node() newnode.ni = ni ni += 1 newnode.pi = pi pi += 1 newnode.label = "_".join(token.split()).replace("'", "") newnode.isleaf = True newnode.li = li li += 1 if node.children: newnode.left = node.children[-1].right + 1 else: newnode.left = node.left + 1 newnode.right = newnode.left + 1 newnode.treename = treename node.add_child(newnode) node = newnode else: # label if ttable: node.label = ttable.get(token, token) else: node.label = token previous = token node.isroot = True return node
def parse(data, ttable=None, treename=None): """ Parse a newick string. *data* is any file-like object that can be coerced into shlex, or a string (converted to StringIO) *ttable* is a dictionary mapping node labels in the newick string to other values. Returns: the root node. """ from ivy.tree import Node if type(data) in types.StringTypes: data = StringIO(data) start_pos = data.tell() tokens = Tokenizer(data) node = None; root = None lp=0; rp=0; rooted=1 previous = None ni = 0 # node id counter (preorder) - zero-based indexing li = 0 # leaf index counter ii = 0 # internal node index counter pi = 0 # postorder sequence while 1: token = tokens.get_token() #print token, if token == ';' or token == tokens.eof: assert lp == rp, \ "unbalanced parentheses in tree description: (%s, %s)" \ % (lp, rp) break # internal node elif token == '(': lp = lp+1 newnode = Node() newnode.ni = ni; ni += 1 newnode.isleaf = False newnode.ii = ii; ii += 1 newnode.treename = treename if node: if node.children: newnode.left = node.children[-1].right+1 else: newnode.left = node.left+1 node.add_child(newnode) else: newnode.left = 1; newnode.right = 2 newnode.right = newnode.left+1 node = newnode elif token == ')': rp = rp+1 node = node.parent node.pi = pi; pi += 1 if node.children: node.right = node.children[-1].right + 1 elif token == ',': node = node.parent if node.children: node.right = node.children[-1].right + 1 # branch length elif token == ':': token = tokens.get_token() if token == '[': node.length_comment = tokens.parse_embedded_comment() token = tokens.get_token() if not (token == ''): try: brlen = float(token) except ValueError: raise ValueError, ("invalid literal for branch length, " "'%s'" % token) else: raise 'NewickError', \ 'unexpected end-of-file (expecting branch length)' node.length = brlen # comment elif token == '[': node.comment = tokens.parse_embedded_comment() if node.comment[0] == '&': # metadata meta = META.findall(node.comment[1:]) if meta: node.meta = {} for k, v in meta: v = eval(v.replace('{','(').replace('}',')')) node.meta[k] = v # leaf node or internal node label else: if previous != ')': # leaf node if ttable: try: ttoken = (ttable.get(int(token)) or ttable.get(token)) except ValueError: ttoken = ttable.get(token) if ttoken: token = ttoken newnode = Node() newnode.ni = ni; ni += 1 newnode.pi = pi; pi += 1 newnode.label = "_".join(token.split()).replace("'", "") newnode.isleaf = True newnode.li = li; li += 1 if node.children: newnode.left = node.children[-1].right+1 else: newnode.left = node.left+1 newnode.right = newnode.left+1 newnode.treename = treename node.add_child(newnode) node = newnode else: # label if ttable: node.label = ttable.get(token, token) else: node.label = token previous = token node.isroot = True return node