Пример #1
0
    def rightbranch(tree):
        """
        Transform a subtree lying on a right branch.
        """
        def leftbranch(subtree, transformed_right):
            """
            Transform a subtree lying on a left branch.
            transformed_right is transformed right material between this node and Anc.
            """
            if tb.is_preterminal(subtree):
                return [subtree, transformed_right]
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(
                    left,
                    tb.make_nonterminal(
                        make_pair(Anc, tb.tree_label(left)),
                        rightbranch(right) + [transformed_right]))

        if tb.is_preterminal(tree):
            return [flag(tree)]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            return leftbranch(
                left,
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right)))
Пример #2
0
 def leftbranch(subtree, continuation, X1):
     if tb.is_preterminal(subtree):
         return [relabel(subtree, X1)] + continuation
     else:
         left, right = tb.tree_children(subtree)
         X2 = tb.tree_label(left) + '>'
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right, X2) + continuation)
         ], X1)
Пример #3
0
 def leftbranch(subtree, continuation):
     if tb.is_preterminal(subtree):
         return [subtree] + continuation
     else:
         left, right = tb.tree_children(subtree)
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right) + continuation)
         ])
Пример #4
0
def searchTree(labels, regex, tree):
    if tb.is_terminal(tree):
        return
    else:
        label = tb.tree_label(tree).split('#')[0]
        if regex.match(label):
            if not label in labels:
                labels[label] = {}
            word = ''.join(tb.terminals(tree)).replace("\\", "")
            incr(word, labels[label])
        for subtree in tb.tree_subtrees(tree):
            searchTree(labels, regex, subtree)
Пример #5
0
 def visit(node, sofar):
     assert tb.is_phrasal(node)
     label = tb.tree_label(node)
     assert is_pair(label)
     A, X = pair_categories(label)
     children = tb.tree_children(node)
     assert len(children) > 0
     assert tb.is_preterminal(children[0])
     if len(children) == 1:
         assert is_flagged(children[0])
         xf = cat_c(A, X, unflag(children[0]))
     elif len(children) == 2:
         if is_flagged(children[0]):
             A1, B = pair_categories(tb.tree_label(children[1]))
             assert A1 == A
             xf = cat_b(A, X, B, unflag(children[0]))
         else:
             C, a = pair_categories(tb.tree_label(children[1]))
             assert a == tb.tree_label(children[0]), \
                    "error in label of node.children[1] a = {}, node = {}".format(a, node)
             xf = cat_e(A, X, C, children[0])
     elif len(children) == 3:
         assert not is_flagged(children[0])
         C, a = pair_categories(tb.tree_label(children[1]))
         A1, B = pair_categories(tb.tree_label(children[2]))
         assert A == A1
         xf = cat_d(A, X, B, C, children[0])
     else:
         sys.exit("error: ill-formed subtree {}\n in tree {}".format(
             node, xtree))
     sofar.append(xf)
     for child in children[1:]:
         sofar = visit(child, sofar)
     return sofar
Пример #6
0
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar,segssofar
Пример #7
0
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar, segssofar
     for child in tb.tree_children(node):
         wordssofar, segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar, segssofar
Пример #8
0
def lcx2tree_labels0(xtree):
    """
    Maps an lcx2 tree to the corresponding labels, as in my 1996 paper.
    """
    def visit(node, sofar):
        assert tb.is_phrasal(node)
        label = tb.tree_label(node)
        assert is_pair(label)
        A, X = pair_categories(label)
        children = tb.tree_children(node)
        assert len(children) > 0
        assert tb.is_preterminal(children[0])
        if len(children) == 1:
            assert is_flagged(children[0])
            xf = cat_c(A, X, unflag(children[0]))
        elif len(children) == 2:
            if is_flagged(children[0]):
                A1, B = pair_categories(tb.tree_label(children[1]))
                assert A1 == A
                xf = cat_b(A, X, B, unflag(children[0]))
            else:
                C, a = pair_categories(tb.tree_label(children[1]))
                assert a == tb.tree_label(children[0]), \
                       "error in label of node.children[1] a = {}, node = {}".format(a, node)
                xf = cat_e(A, X, C, children[0])
        elif len(children) == 3:
            assert not is_flagged(children[0])
            C, a = pair_categories(tb.tree_label(children[1]))
            A1, B = pair_categories(tb.tree_label(children[2]))
            assert A == A1
            xf = cat_d(A, X, B, C, children[0])
        else:
            sys.exit("error: ill-formed subtree {}\n in tree {}".format(
                node, xtree))
        sofar.append(xf)
        for child in children[1:]:
            sofar = visit(child, sofar)
        return sofar

    root = tb.tree_label(xtree)
    rchildren = tb.tree_children(xtree)
    assert len(rchildren) == 2, "nonbinary xtree = {}".format(xtree)
    sofar = [cat_a(root, rchildren[0])]
    return visit(rchildren[1], sofar)
Пример #9
0
def lcx(root):
    """lcx() maps a binary tree into the left-corner transform of my 1996 paper."""
    def rightbranch(tree):
        def leftbranch(subtree, continuation):
            if tb.is_preterminal(subtree):
                return [subtree] + continuation
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(left, [
                    tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                        rightbranch(right) + continuation)
                ])

        Anc = tb.tree_label(tree)
        return leftbranch(tree, [])

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root))
Пример #10
0
def lct(root):
    """
    lct() implements the same transform as lcx(), but it also relabels
    the preterminal labels to implement the transduction in my 1996
    paper.
    
    It isn't complete, i.e., it doesn't implement the relabelling.

    """
    def relabel(tree, label):
        return tb.make_nonterminal(tree[0] + ' ' + label,
                                   tb.tree_children(tree))

    def rightbranch(tree, X0):
        def leftbranch(subtree, continuation, X1):
            if tb.is_preterminal(subtree):
                return [relabel(subtree, X1)] + continuation
            else:
                left, right = tb.tree_children(subtree)
                X2 = tb.tree_label(left) + '>'
                return leftbranch(left, [
                    tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                        rightbranch(right, X2) + continuation)
                ], X1)

        if tb.is_preterminal(tree):
            return [relabel(tree, X0 + '<' + tb.tree_label(tree) + ']')]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            X2 = tb.tree_label(left) + '>'
            return leftbranch(left, [
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right, X2))
            ], X0)

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root, ''))
Пример #11
0
def lcx2(root):
    """
    lcx2() maps a binary tree into the left-corner transform of my 1996 paper.
    Preterminals that are right children, i.e., generated under schema (11b) and (11c),
    are flagged.  This permits us to distinguish schema (11b) and (11e).
    """
    def rightbranch(tree):
        """
        Transform a subtree lying on a right branch.
        """
        def leftbranch(subtree, transformed_right):
            """
            Transform a subtree lying on a left branch.
            transformed_right is transformed right material between this node and Anc.
            """
            if tb.is_preterminal(subtree):
                return [subtree, transformed_right]
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(
                    left,
                    tb.make_nonterminal(
                        make_pair(Anc, tb.tree_label(left)),
                        rightbranch(right) + [transformed_right]))

        if tb.is_preterminal(tree):
            return [flag(tree)]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            return leftbranch(
                left,
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right)))

    if tb.is_preterminal(root):
        return root
    else:
        return tb.make_nonterminal(tb.tree_label(root), rightbranch(root))
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(node)
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     mo = score_cat_rex.match(tb.tree_label(node))
     if mo:
         if segssofar != []:
             word = ''.join(segssofar)
             segssofar = []
             try:
                 topic = mo.group('topic')
                 if topic != None:
                     wordssofar.append((word,topic))
                 else:
                     wordssofar.append(word)
             except IndexError:
                 wordssofar.append(word)
             
     return wordssofar,segssofar