예제 #1
0
def tree_generate(node_name,
                  index,
                  pos,
                  dependency_list,
                  visited_list,
                  reverse=False):
    """Generating a tree from the node name given

    :type index: int
    :type pos: str
    :type visited_list: list
    :type node_name: str
    :type dependency_list: list
    """
    node = Node(node_name, pos, index=index)
    visited_list += [index]
    kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list
                           if d[0][0] == index]
    for x in kids_index_name_pos:
        if x in visited_list:
            node.value = 1
            node.addkid(Node(label=x[1], index=x[0], pos=x[2]))
        else:
            node.addkid(
                tree_generate(x[1], x[0], x[2], dependency_list, visited_list))
        visited_list += [x]

    else:
        if Node.get_children(node):
            node.value += sum([g.value for g in Node.get_children(node)])
            # print node.label, node.value, node.pos
    node.children.sort(key=lambda x: x.label, reverse=reverse)
    return node
예제 #2
0
    def convert_parse_tree_to_zss_tree(self, tree_as_string, ignore_leaves=False):
        '''
        The ignore leaves argument will create a tree where the words in the sentence
        are not included. This will only represent sentence structure. 
        '''

        tree_as_list = [item.strip() for item in re.split(r'([\(\)])', tree_as_string) if item.strip()]

        tree_as_list = tree_as_list[2:-1]

        stack = [Node('ROOT')]
        root_node = stack[0]
        # Iterate over the list
        for i, item in enumerate(tree_as_list):
            if item == '(':
                # match the string for each item
                match = re.search(r'[A-Z]+[ ][A-Za-z]+', tree_as_list[i + 1])
                if match:
                    # if match, node has no children
                    label = match.group().split(' ')
                    node = Node(label[0]).addkid(Node(label[1])) if not ignore_leaves else Node(label[0])
                else:
                    # otherwise node has children
                    node = Node(tree_as_list[i + 1])
                # Add the node to the children of the current item
                stack[-1].addkid(node)
                # Then add the node to the stack itself
                stack.append(node)
            elif item == ')':
                # this node has no children so just pop it from the stack
                stack.pop()
        return root_node
예제 #3
0
    def convertTreeToEditDistanceFormat(self):
        nodeObjects = dict()

        #remove all weights and then make a set. This should help in having the same ordered list. If the order of the edges is not the same, then zss will mess up the edit distance somehow.
        unweightedEdges = []
        for edge in self.edgeList:
            newEdge = (0, edge[1], edge[2])
            unweightedEdges.append(newEdge)

        unweightedEdges.sort(key=lambda tup: tup[1])

        for edge in set(unweightedEdges):
            #obtain the parent and child
            parent = edge[1]
            child = edge[2]

            #Create an object for the parent.
            #The only object that we will be appending to is the parent one
            if parent not in nodeObjects.keys():
                parentNode = Node(str(parent), [])
                nodeObjects[parent] = parentNode

            if child not in nodeObjects.keys():
                childNode = Node(str(child), [])
                nodeObjects[child] = childNode

            nodeObjects[parent].addkid(nodeObjects[child])

        return nodeObjects
예제 #4
0
def list_to_zsstree(input_list, label='0'):
    if not isinstance(input_list, list):  # 叶节点
        return Node(label)
    else:  # 中间节点
        return Node(label)\
            .addkid(list_to_zsstree(input_list[0]))\
            .addkid(list_to_zsstree(input_list[1]))
예제 #5
0
def zss_similarity(node1, node2):
    a = Node(node1['name'], node1['children'])
    b = Node(node2['name'], node2['children'])

    dist = simple_distance(a, b)

    return dist
예제 #6
0
def loop_tree(dictionary, node):
    print node
    print "----"
    for k, v in dictionary.iteritems():
        if isinstance(dictionary[k], dict):
            loop_tree(dictionary[k], node.addkid(Node(k)))
        else:
            node.addkid(Node(v))
예제 #7
0
 def order_children(rootnode):
     if len(rootnode.children) == 0:
         return
     # order children of the rootnode
     rootnode.children = sorted(Node.get_children(rootnode),
                                key=lambda c: c.label)
     for cnode in Node.get_children(rootnode):
         order_children(cnode)
예제 #8
0
def test_de():
    expected_ops = [
        Operation(Operation.remove, Node("b"), None),
        Operation(Operation.remove, Node("c"), None),
        Operation(Operation.match, Node("a"), Node("a"))
    ]
    cost, ops = simple_distance(D, E, return_operations=True)
    assert ops == expected_ops
예제 #9
0
 def helper(obj):
     if isinstance(obj, list):
         node = Node(obj[0])
         for kid in obj[1:]:
             node.addkid(helper(kid))
         return node
     else:
         return Node(obj)
예제 #10
0
 def helper(node: Node, index: int):
     children = dep[str(index)]['deps'].values()
     children = sum(children, [])
     for c in children:
         cNode = Node(dep[str(c)]['word'])
         helper(cNode, c)
         node.addkid(cNode)
     return
예제 #11
0
파일: repair.py 프로젝트: iradicek/clara
 def treetostr(self, node):
     l = Node.get_label(node)
     t = None
     if isinstance(l, tuple):
         t, l = l
     if t == 'O':
         return '%s(%s)' % (l, ', '.join(
             map(self.treetostr, Node.get_children(node))))
     return l
예제 #12
0
def dgl_tree_to_zzs_tree(tree, vocab_key_list, u):
    if tree.in_degrees(u) == 0:
        return Node(vocab_key_list[tree.ndata['x'][u]])
    node = Node("PAD")
    in_nodes = tree.in_edges(u)[0]
    for in_node in in_nodes:
        in_node = int(in_node)
        node.addkid(dgl_tree_to_zzs_tree(tree, vocab_key_list, in_node))
    return node
예제 #13
0
 def treetostr(self, node):
     l = Node.get_label(node)
     t = None
     if isinstance(l, tuple):
         t, l = l
     if t == 'O':
         return '%s(%s)' % (l, ', '.join(
             map(self.treetostr, Node.get_children(node))))
     return l
예제 #14
0
def construct_node(tree, level, threshold = 1000):
  root=Node(tree['tagName'])
  if 'children' not in tree or level == threshold:
    root.label = tree['tagName']
    return root
  for child in tree['children']:
    child_node = construct_node(child, level+1, threshold)
    root.addkid(child_node)
  return root
예제 #15
0
 def build_tree_no_date(self, event):
         A = (
             Node(event.event_type.lower())
                 .addkid(Node(event.country.lower()))
                 .addkid(Node(event.state.lower()))
                 .addkid(Node(event.city.lower()))
                 .addkid(Node(event.name.lower()))
             )
         return A
예제 #16
0
        def transform(parent):
            if parent not in clause:
                return Node(parent[0])

            children = clause[parent]
            root = parent[0]
            xs = []
            for child in children:
                xs.append(transform(child))
            res = Node(root, children=xs)
            return res
예제 #17
0
def parsed_tree_to_zzs_tree(u):
    if len(u.child) == 1:
        return parsed_tree_to_zzs_tree(u.child[0])
    elif len(u.child) > 1:
        assert len(u.child) == 2
        node = Node("PAD")
        node.addkid(parsed_tree_to_zzs_tree(u.child[0]))
        node.addkid(parsed_tree_to_zzs_tree(u.child[1]))
        return node
    else:
        return Node(u.value)
예제 #18
0
def get_ztree(cn, ztp=None):
    if isinstance(cn, str):
        cn = Tree.fromstring(cn)
    if ztp is None:
        ztp = Node(cn.label())
    for subtree in cn:
        if isinstance(subtree, Tree):
            n = Node(subtree.label())
            ztp.addkid(n)
            get_ztree(subtree, n)
    return ztp
예제 #19
0
def main(node,child):
    if isinstance(child, list) and len(child) == 0:
        return node.addkid(Node(''))
    if not isinstance(child, list) and not isinstance(child, dict):
        return node.addkid(Node(child))
    if isinstance(child, dict):
        for k, v in child.items():
            node.addkid(main(Node(k), v))
    if isinstance(child, list):
        for n, i in enumerate(child):
            node.addkid(main(Node(n), i))
예제 #20
0
def make_zss_tree(ast_node):
    if hasattr(ast_node, "data"):
        data = ast_node.data
    else:
        data = ast_node.type
        # data = "TOKEN"
    node = Node(data)
    if hasattr(ast_node, "children"):
        for child in ast_node.children:
            node.addkid(make_zss_tree(child))
    return node
예제 #21
0
def zss_code_distance(code_a, code_b):
    root_node_a = ast.parse(code_a)
    root_zss_node_a = Node("root")
    zss_ast_visit(root_node_a, root_zss_node_a)

    root_node_b = ast.parse(code_b)
    root_zss_node_b = Node("root")
    zss_ast_visit(root_node_b, root_zss_node_b)

    return simple_distance(root_zss_node_a,
                           root_zss_node_b,
                           label_dist=label_weight)
예제 #22
0
def make_html_zssgraph(parent, graph=None, ignore_comments=True):
  ''' Given a string containing HTML, return a zss style tree of the DOM
  '''
  if not graph:
    graph = Node(parent.tag)
  for node in parent.getchildren():
    # if the element is a comment, ignore it
    if ignore_comments and not isinstance(node.tag, basestring):
      continue
    graph.addkid(Node(node.tag))
    make_html_zssgraph(node, graph)
  return graph
예제 #23
0
def convert(args, tree, label, height):
    node = Node(label)
    if height == args.tree_height2:
        return (node, height)

    heights = [height]
    for child in tree[label]:
        if child in tree:
            kid, height_kid = convert(args, tree, child, height + 1)
            heights.append(height_kid)
            node.addkid(kid)
    return (node, max(heights))
예제 #24
0
def make_html_zssgraph(parent, graph=None, ignore_comments=True):
    ''' Given a string containing HTML, return a zss style tree of the DOM
  '''
    if not graph:
        graph = Node(parent.tag)
    for node in parent.getchildren():
        # if the element is a comment, ignore it
        if ignore_comments and not isinstance(node.tag, basestring):
            continue
        graph.addkid(Node(node.tag))
        make_html_zssgraph(node, graph)
    return graph
예제 #25
0
파일: repair.py 프로젝트: iradicek/clara
 def totree(self, e):
     if isinstance(e, Var):
         return Node(('V', str(e)))
     if isinstance(e, Const):
         return Node(('C', str(e)))
     if isinstance(e, Op):
         name = e.name
         if name == 'AssAdd':
             name = 'Ass'
         n = Node(('O', name))
         for arg in e.args:
             n.addkid(self.totree(arg))
         return n
예제 #26
0
def zss_code_ast_edit(code_a, code_b):
    root_node_a = ast.parse(code_a)
    root_zss_node_a = Node("root")
    zss_ast_visit(root_node_a, root_zss_node_a)

    root_node_b = ast.parse(code_b)
    root_zss_node_b = Node("root")
    zss_ast_visit(root_node_b, root_zss_node_b)

    cost, ops = simple_distance(root_zss_node_a,
                                root_zss_node_b,
                                label_dist=label_weight,
                                return_operations=True)
    return cost, ops
예제 #27
0
def tree_generate(node_name, index, pos, dependency_list, visited_list, reverse=False):
    """Generating a tree from the node name given

    :type index: int
    :type pos: str
    :type visited_list: list
    :type node_name: str
    :type dependency_list: list
    """
    node = Node(node_name, pos, index=index)
    visited_list += [index]
    kids_index_name_pos = [(d[1][0], d[1][1], d[1][2]) for d in dependency_list if d[0][0] == index]
    for x in kids_index_name_pos:
        if x in visited_list:
            node.value = 1
            node.addkid(Node(label=x[1], index=x[0], pos=x[2]))
        else:
            node.addkid(tree_generate(x[1], x[0], x[2], dependency_list, visited_list))
        visited_list += [x]

    else:
        if Node.get_children(node):
            node.value += sum([g.value for g in Node.get_children(node)])
            # print node.label, node.value, node.pos
    node.children.sort(key=lambda x: x.label, reverse=reverse)
    return node
예제 #28
0
파일: a1.py 프로젝트: Zhengzi/maple
def tree_edit_distance(s1, s2):
    l1 = s1.split(',')
    l2 = s2.split(',')
    n1 = Node("")
    for item in l1:
        #print item
        n1.addkid(Node(item))

    n2 = Node("")
    for item in l2:
        #print item
        n2.addkid(Node(item))

    return simple_distance(n1, n2)
예제 #29
0
def mktree(node, child, count=0):
    print(count)
    if isinstance(child, list):
        for c in child:
            count += 1
            return mktree(node, c, count)

    elif isinstance(child, dict):
        for k, v in child.items():
            if isinstance(child[k], dict):
                node.addkid(Node(k))
                return mktree(node, v, count)
            else:
                node.addkid(Node(v))
예제 #30
0
def to_zzzNode(E, root=0):
    from zss import Node
    A = Node(str(root))
    U = [(0, A)]
    while len(U) != 0:
        parent = U[0][0]
        node = U[0][1]
        del U[0]
        children = getChildren(E, parent)
        if len(children) == 0:
            continue
        for i in range(len(children)):
            node.addkid(Node(str(children[i])))
            U.append((children[i], node.children[i]))
    return A
    def syntax_similarity_conversation(self, documents1):
        """Syntax similarity of each document with its before and after."""
        global numnodes
        documents1parsed = []

        # Detect sentences and parse them
        for d1 in tqdm(range(len(documents1))):
            tempsents = (self.sent_detector.tokenize(documents1[d1].strip()))
            for s in tempsents:
                if len(s.split()) > 70:
                    documents1parsed.append("NA")
                    break
            else:
                temp = list(self.parser.raw_parse_sents((tempsents)))
                for i in range(len(temp)):
                    temp[i] = list(temp[i])[0]
                    temp[i] = ParentedTree.convert(temp[i])
                documents1parsed.append(list(temp))

        results = []
        for d1 in range(len(documents1parsed) - 1):
            d2 = d1 + 1
            if documents1parsed[d1] == "NA" or documents1parsed[d2] == "NA":
                results.append(float('NaN'))
                continue

            costMatrix = []
            for i in range(len(documents1parsed[d1])):
                numnodes = 0
                tempnode = Node(documents1parsed[d1][i].root().label())
                sentencedoc1 = self.convert_mytree(documents1parsed[d1][i],
                                                   tempnode)
                temp_costMatrix = []
                sen1nodes = numnodes
                for j in range(len(documents1parsed[d2])):
                    numnodes = .0
                    tempnode = Node(documents1parsed[d2][j].root().label())
                    sentencedoc2 = self.convert_mytree(documents1parsed[d2][j],
                                                       tempnode)
                    ED = simple_distance(sentencedoc1, sentencedoc2)
                    ED /= (numnodes + sen1nodes)
                    temp_costMatrix.append(ED)
                costMatrix.append(temp_costMatrix)
            costMatrix = np.array(costMatrix)

            results.append(1 - np.mean(costMatrix))

        return np.array(results)
예제 #32
0
    def json_to_tree(self, toplevel):
        prog = Node("toplevel")

        def helper(obj):
            if isinstance(obj, list):
                node = Node(obj[0])
                for kid in obj[1:]:
                    node.addkid(helper(kid))
                return node
            else:
                return Node(obj)

        for fun in toplevel:
            prog.addkid(helper(fun))

        return prog
예제 #33
0
def randtree(depth=2, alpha='abcdefghijklmnopqrstuvwxyz', repeat=2, width=2):
    labels = [''.join(x) for x in itertools.product(alpha, repeat=repeat)]
    shuffle(labels)
    labels = (x for x in labels)
    root = Node("root")
    p = [root]
    c = list()
    for x in xrange(depth - 1):
        for y in p:
            for z in xrange(randint(1, 1 + width)):
                n = Node(labels.next())
                y.addkid(n)
                c.append(n)
        p = c
        c = list()
    return root
예제 #34
0
def parseTreeFromStrings(tree, debug=False):
    """
    Create the tree from a list of strings
    @param tree:       {List} string format of the tree, one element per string
    @param debug:       {Boolean} True to display debugging information; False not
    @return:            {Dictionary} the three types of trees in the dictionary
    """
    if tree is None:
        return None

    root = None
    rightMosts, cur = [], 0
    for line in tree:
        header = line.split(": ")[0]
        level, tag = (0, header) if "|-" not in header else header.split("- ")
        tag = tag.strip()
        if "|-" in header:
            level = 1 + int(len(level) / 2)
        xs = [int(x) for x in re.split("\\D+", line) if x != ""]
        cur = Node("%s[x=%04d,y=%04d]" % (tag, xs[2]-xs[0], xs[3]-xs[1]))
        if level == len(rightMosts):
            rightMosts.append(cur)
        else:
            assert level < len(rightMosts)
            rightMosts[level] = cur
        pass # else - if level == len(rightMosts)
        if level == 0:
            root = cur
        else:
            rightMosts[level-1].addkid(cur)
        pass # else - if level == 0
    pass # for line in tree

    return root
예제 #35
0
 def build_tree(self, event):
     A = (Node(event.event_type.lower()).addkid(Node(
         event.country.lower())).addkid(Node(event.state.lower())).addkid(
             Node(event.city.lower())).addkid(Node(
                 event.name.lower())).addkid(
                     Node(event.day.lower())).addkid(
                         Node(event.month.lower())).addkid(
                             Node(event.year.lower())))
     return A
예제 #36
0
파일: feedback.py 프로젝트: iradicek/clara
    def treesize(self, t):
        '''
        Calculates the total size of the tree
        '''

        size = 0

        def ts(node):
            return 1 + sum(map(ts, Node.get_children(node)))
                
        for loc in t:
            for var, tree in t[loc].items():
                lab = Node.get_label(tree)
                if lab == ('V', var):
                    continue
                size += ts(tree)

        return size
예제 #37
0
파일: test.py 프로젝트: Zhengzi/maple
def tree_edit_distance(s1,s2):	
	l1 = s1.split(',')
	l2 = s2.split(',')	
	n1 = Node("")
	for item in l1:
		#print item
		n1.addkid(Node(item))
		
	n2 = Node("")
	for item in l2:
		#print item
		n2.addkid(Node(item))
	
	return simple_distance(n1, n2)
예제 #38
0
파일: feedback.py 프로젝트: iradicek/clara
 def ts(node):
     return 1 + sum(map(ts, Node.get_children(node)))
예제 #39
0
def get_children(node):
    """Returns the children of node"""
    return Node.get_children(node)