Пример #1
0
 def buildSyllable(sTree, segments, noLabel):
     c = tb.tree_category(sTree)
     if c == "C0" or c == "C1" or c == "C2" or c == "C3" or c == "C4" or c == "C5" or c == "Consonant" or c == "Stress":
         if noLabel:
             segments.append(tb.tree_children(sTree)[0])
         else:
             segments.append(tb.tree_children(sTree)[0] + "_C")
     elif tb.tree_category(sTree) == "Vowel":
         if noLabel:
             segments.append(tb.tree_children(sTree)[0])
         else:
             segments.append(tb.tree_children(sTree)[0] + "_V")
     else:
         for child in tb.tree_children(sTree):
             buildSyllable(child, segments, noLabel)
Пример #2
0
 def buildWord(wTree, sCat, syllables, segSep, sylSep, noLabel):
     try:
         if sCat.search(
                 tb.tree_category(wTree)
         ):  #=="Syllable" or tb.tree_category(wTree)=="SyllableIF" or tb.tree_category(wTree)=="SyllableI" or tb.tree_category(wTree)=="SyllableF":
             segments = []
             buildSyllable(wTree, segments, noLabel)
             syllables.append(segSep.join(segments))
         else:
             for child in tb.tree_children(wTree):
                 buildWord(child, sCat, syllables, segSep, sylSep, noLabel)
     except:
         print "%s\n" % wTree
         sys.exit()
         for child in tb.tree_children(wTree):
             buildWord(child, sCat, syllables, segSep, sylSep, noLabel)
Пример #3
0
    def rightbranch(tree):
        """
        Transform a subtree lying on a right branch.
        """
        def leftbranch(subtree, transformed_right):
            """
            Transform a subtree lying on a left branch.
            transformed_right is transformed right material between this node and Anc.
            """
            if tb.is_preterminal(subtree):
                return [subtree, transformed_right]
            else:
                left, right = tb.tree_children(subtree)
                return leftbranch(
                    left,
                    tb.make_nonterminal(
                        make_pair(Anc, tb.tree_label(left)),
                        rightbranch(right) + [transformed_right]))

        if tb.is_preterminal(tree):
            return [flag(tree)]
        else:
            Anc = tb.tree_label(tree)
            left, right = tb.tree_children(tree)
            return leftbranch(
                left,
                tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                    rightbranch(right)))
Пример #4
0
 def visit(node, sofar):
     assert tb.is_phrasal(node)
     label = tb.tree_label(node)
     assert is_pair(label)
     A, X = pair_categories(label)
     children = tb.tree_children(node)
     assert len(children) > 0
     assert tb.is_preterminal(children[0])
     if len(children) == 1:
         assert is_flagged(children[0])
         xf = cat_c(A, X, unflag(children[0]))
     elif len(children) == 2:
         if is_flagged(children[0]):
             A1, B = pair_categories(tb.tree_label(children[1]))
             assert A1 == A
             xf = cat_b(A, X, B, unflag(children[0]))
         else:
             C, a = pair_categories(tb.tree_label(children[1]))
             assert a == tb.tree_label(children[0]), \
                    "error in label of node.children[1] a = {}, node = {}".format(a, node)
             xf = cat_e(A, X, C, children[0])
     elif len(children) == 3:
         assert not is_flagged(children[0])
         C, a = pair_categories(tb.tree_label(children[1]))
         A1, B = pair_categories(tb.tree_label(children[2]))
         assert A == A1
         xf = cat_d(A, X, B, C, children[0])
     else:
         sys.exit("error: ill-formed subtree {}\n in tree {}".format(
             node, xtree))
     sofar.append(xf)
     for child in children[1:]:
         sofar = visit(child, sofar)
     return sofar
Пример #5
0
 def leftbranch(subtree, continuation):
     if tb.is_preterminal(subtree):
         return [subtree] + continuation
     else:
         left, right = tb.tree_children(subtree)
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right) + continuation)
         ])
Пример #6
0
 def leftbranch(subtree, continuation, X1):
     if tb.is_preterminal(subtree):
         return [relabel(subtree, X1)] + continuation
     else:
         left, right = tb.tree_children(subtree)
         X2 = tb.tree_label(left) + '>'
         return leftbranch(left, [
             tb.make_nonterminal(make_pair(Anc, tb.tree_label(left)),
                                 rightbranch(right, X2) + continuation)
         ], X1)
Пример #7
0
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar,segssofar
Пример #8
0
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(simplify_terminal(node))
         return wordssofar, segssofar
     for child in tb.tree_children(node):
         wordssofar, segssofar = visit(child, wordssofar, segssofar)
     if word_rex.match(tb.tree_label(node)):
         if segssofar != []:
             wordssofar.append(''.join(segssofar))
             segssofar = []
     return wordssofar, segssofar
Пример #9
0
def visitTree(tree, words, wCat, sCat, segSep, sylSep, noLabel):
    """
        Performs a pre-order traversal of tree, and collects syllable and word boundaries
    """
    def buildSyllable(sTree, segments, noLabel):
        c = tb.tree_category(sTree)
        if c == "C0" or c == "C1" or c == "C2" or c == "C3" or c == "C4" or c == "C5" or c == "Consonant" or c == "Stress":
            if noLabel:
                segments.append(tb.tree_children(sTree)[0])
            else:
                segments.append(tb.tree_children(sTree)[0] + "_C")
        elif tb.tree_category(sTree) == "Vowel":
            if noLabel:
                segments.append(tb.tree_children(sTree)[0])
            else:
                segments.append(tb.tree_children(sTree)[0] + "_V")
        else:
            for child in tb.tree_children(sTree):
                buildSyllable(child, segments, noLabel)

    def buildWord(wTree, sCat, syllables, segSep, sylSep, noLabel):
        try:
            if sCat.search(
                    tb.tree_category(wTree)
            ):  #=="Syllable" or tb.tree_category(wTree)=="SyllableIF" or tb.tree_category(wTree)=="SyllableI" or tb.tree_category(wTree)=="SyllableF":
                segments = []
                buildSyllable(wTree, segments, noLabel)
                syllables.append(segSep.join(segments))
            else:
                for child in tb.tree_children(wTree):
                    buildWord(child, sCat, syllables, segSep, sylSep, noLabel)
        except:
            print "%s\n" % wTree
            sys.exit()
            for child in tb.tree_children(wTree):
                buildWord(child, sCat, syllables, segSep, sylSep, noLabel)


#    print("VisitT %s"%tb.tree_category(tree))

    if wCat.search(tb.tree_category(tree)):
        syllables = []
        buildWord(tree, sCat, syllables, segSep, sylSep, noLabel)
        words.append(sylSep.join(syllables))
    else:
        for child in tb.tree_children(tree):
            visitTree(child, words, wCat, sCat, segSep, sylSep, noLabel)
Пример #10
0
def lcx2tree_labels0(xtree):
    """
    Maps an lcx2 tree to the corresponding labels, as in my 1996 paper.
    """
    def visit(node, sofar):
        assert tb.is_phrasal(node)
        label = tb.tree_label(node)
        assert is_pair(label)
        A, X = pair_categories(label)
        children = tb.tree_children(node)
        assert len(children) > 0
        assert tb.is_preterminal(children[0])
        if len(children) == 1:
            assert is_flagged(children[0])
            xf = cat_c(A, X, unflag(children[0]))
        elif len(children) == 2:
            if is_flagged(children[0]):
                A1, B = pair_categories(tb.tree_label(children[1]))
                assert A1 == A
                xf = cat_b(A, X, B, unflag(children[0]))
            else:
                C, a = pair_categories(tb.tree_label(children[1]))
                assert a == tb.tree_label(children[0]), \
                       "error in label of node.children[1] a = {}, node = {}".format(a, node)
                xf = cat_e(A, X, C, children[0])
        elif len(children) == 3:
            assert not is_flagged(children[0])
            C, a = pair_categories(tb.tree_label(children[1]))
            A1, B = pair_categories(tb.tree_label(children[2]))
            assert A == A1
            xf = cat_d(A, X, B, C, children[0])
        else:
            sys.exit("error: ill-formed subtree {}\n in tree {}".format(
                node, xtree))
        sofar.append(xf)
        for child in children[1:]:
            sofar = visit(child, sofar)
        return sofar

    root = tb.tree_label(xtree)
    rchildren = tb.tree_children(xtree)
    assert len(rchildren) == 2, "nonbinary xtree = {}".format(xtree)
    sofar = [cat_a(root, rchildren[0])]
    return visit(rchildren[1], sofar)
 def visit(node, wordssofar, segssofar):
     """Does a preorder visit of the nodes in the tree"""
     if tb.is_terminal(node):
         if not ignore_terminal_rex.match(node):
             segssofar.append(node)
         return wordssofar,segssofar
     for child in tb.tree_children(node):
         wordssofar,segssofar = visit(child, wordssofar, segssofar)
     mo = score_cat_rex.match(tb.tree_label(node))
     if mo:
         if segssofar != []:
             word = ''.join(segssofar)
             segssofar = []
             try:
                 topic = mo.group('topic')
                 if topic != None:
                     wordssofar.append((word,topic))
                 else:
                     wordssofar.append(word)
             except IndexError:
                 wordssofar.append(word)
             
     return wordssofar,segssofar
Пример #12
0
 def relabel(tree, label):
     return tb.make_nonterminal(tree[0] + ' ' + label,
                                tb.tree_children(tree))