Exemplo n.º 1
0
def hybrid_tree_1():
    tree = HybridTree()
    tree.add_node('v1', CoNLLToken('Piet', '_', 'NP', 'NP', '_', 'SBJ'), True)
    tree.add_node('v21', CoNLLToken('Marie', '_', 'N', 'N', '_', 'OBJ'), True)
    tree.add_node('v', CoNLLToken('helpen', '_', 'V', 'V', '_', 'ROOT'), True)
    tree.add_node('v2', CoNLLToken('lezen', '_', 'V', 'V', '_', 'VBI'), True)
    tree.add_child('v', 'v2')
    tree.add_child('v', 'v1')
    tree.add_child('v2', 'v21')
    tree.add_to_root('v')
    tree.reorder()
    return tree
Exemplo n.º 2
0
def multi_dep_tree():
    tree = HybridTree('multi')
    tree.add_node('1', CoNLLToken('A', '_', 'pA', 'pA', '_', 'dA'), True)
    tree.add_node('211', CoNLLToken('B', '_', 'pB', 'pB', '_', 'dB'), True)
    tree.add_node('11', CoNLLToken('C', '_', 'pC', 'pC', '_', 'dC'), True)
    tree.add_node('2', CoNLLToken('D', '_', 'pD', 'pD', '_', 'dD'), True)
    tree.add_node('21', CoNLLToken('E', '_', 'pE', 'pE', '_', 'dE'), True)
    tree.add_to_root('2')
    tree.add_to_root('1')
    for c in ['21', '211']:
        tree.add_child('2', c)
    tree.add_child('1', '11')
    tree.reorder()
    return tree
Exemplo n.º 3
0
def derivation_to_hybrid_tree(der,
                              poss,
                              ordered_labels,
                              construct_token,
                              disconnected=None):
    """
    :param der:
    :type der: LCFRSDerivation
    :param poss: list of POS-tags
    :type poss: list[str]
    :param ordered_labels: list of words
    :type ordered_labels: list[str]
    :param disconnected: list of positions in ordered_labels that are disconnected
    :type disconnected: list[object]
    :rtype: GeneralHybridTree
    Turn a derivation tree into a hybrid tree. Assuming poss and ordered_labels to have equal length.
    """
    if not disconnected:
        disconnected = []
    tree = HybridTree()
    j = 1
    for i in range(len(ordered_labels)):
        token = construct_token(ordered_labels[i], poss[i], True)
        if i in disconnected:
            tree.add_node("d" + str(i), token, True, False)
        else:
            tree.add_node("c" + str(j), token, True, True)
            j += 1
    for id in der.ids():
        token = construct_token(der.getRule(id).lhs().nont(), '_', False)
        tree.add_node(id, token)
        for child in der.child_ids(id):
            tree.add_child(id, child)
        for position in der.terminal_positions(id):
            tree.add_child(id, "c" + str(position))
    tree.add_to_root(der.root_id())
    tree.reorder()
    return tree
Exemplo n.º 4
0
    def test_fst_compilation_right(self):
        if not test_pynini:
            return
        tree = hybrid_tree_1()
        tree2 = hybrid_tree_2()
        terminal_labeling = the_terminal_labeling_factory().get_strategy('pos')

        (_, grammar) = induce_grammar(
            [tree, tree2],
            the_labeling_factory().create_simple_labeling_strategy(
                'empty', 'pos'), terminal_labeling.token_label,
            [right_branching], 'START')

        a, rules = compile_wfst_from_right_branching_grammar(grammar)

        print(repr(a))

        symboltable = a.input_symbols()

        string = 'NP N V V V'.split(' ')

        token_sequence = [
            construct_conll_token(form, lemma) for form, lemma in zip(
                'Piet Marie helpen leren lezen'.split(' '), string)
        ]

        fsa = fsa_from_list_of_symbols(string, symboltable)
        self.assertEqual(
            '0\t1\tNP\tNP\n1\t2\tN\tN\n2\t3\tV\tV\n3\t4\tV\tV\n4\t5\tV\tV\n5\n',
            fsa.text().decode('utf-8'))

        b = compose(fsa, a)

        print(b.input_symbols())
        for i in b.input_symbols():
            print(i)

        print("Input Composition")
        print(b.text(symboltable, symboltable).decode('utf-8'))

        i = 0
        for path in paths(b):
            print(i, "th path:", path, end=' ')
            r = list(map(rules.index_object, path))
            d = PolishDerivation(r[1::])
            dcp = DCP_evaluator(d).getEvaluation()
            h = HybridTree()
            dcp_to_hybridtree(h, dcp, token_sequence, False,
                              construct_conll_token)
            h.reorder()
            if h == tree2:
                print("correct")
            else:
                print("incorrect")
            i += 1

        stats = defaultdict(lambda: 0)
        local_rule_stats(b, stats, 15)

        print(stats)

        print("Shortest path probability")
        best = shortestpath(b)
        best.topsort()
        self.assertAlmostEqual(1.80844898756e-05,
                               pow(e, -float(shortestdistance(best)[-1])))
        print(best.text())

        polish_rules = retrieve_rules(best)
        self.assertSequenceEqual(polish_rules, [8, 7, 1, 6, 2, 5, 3, 10, 3, 3])

        polish_rules = list(map(rules.index_object, polish_rules))

        print(polish_rules)

        der = PolishDerivation(polish_rules[1::])

        print(der)

        print(
            derivation_to_hybrid_tree(der, string,
                                      "Piet Marie helpen lezen leren".split(),
                                      construct_conll_token))

        dcp = DCP_evaluator(der).getEvaluation()

        h_tree_2 = HybridTree()
        dcp_to_hybridtree(h_tree_2, dcp, token_sequence, False,
                          construct_conll_token)

        print(h_tree_2)
class GeneralHybridTreeTestCase(unittest.TestCase):
    tree = None

    def setUp(self):
        self.tree = HybridTree()
        self.tree.add_node("v1", construct_conll_token("Piet", "NP"), True)
        self.tree.add_node("v21", construct_conll_token("Marie", "N"), True)
        self.tree.add_node("v", construct_conll_token("helpen", "VP"), True)
        self.tree.add_node("v2", construct_conll_token("lezen", "V"), True)
        self.tree.add_child("v", "v2")
        self.tree.add_child("v", "v1")
        self.tree.add_child("v2", "v21")
        self.tree.add_node("v3", construct_conll_token(".", "Punc"), True,
                           False)
        self.tree.add_to_root("v")

    def test_children(self):
        self.assertListEqual(self.tree.children('v'), ['v2', 'v1'])
        self.tree.reorder()
        self.assertListEqual(self.tree.children('v'), ['v1', 'v2'])

    def test_fringe(self):
        self.tree.reorder()
        self.assertListEqual(self.tree.fringe('v'), [2, 0, 3, 1])
        self.assertListEqual(self.tree.fringe('v2'), [3, 1])

    def test_n_spans(self):
        self.tree.reorder()
        self.assertEqual(self.tree.n_spans('v'), 1)
        self.assertEqual(self.tree.n_spans('v2'), 2)

    def test_n_gaps(self):
        self.tree.reorder()
        self.assertEqual(self.tree.n_gaps(), 1)

    def test_node_ids(self):
        self.tree.reorder()
        self.assertListEqual(sorted(self.tree.nodes()),
                             sorted(['v', 'v1', 'v2', 'v21', 'v3']))

    def test_complete(self):
        self.tree.reorder()
        self.assertEqual(self.tree.complete(), True)

    def test_unlabelled_structure(self):
        self.tree.reorder()
        self.assertTupleEqual(self.tree.unlabelled_structure(),
                              ({0, 1, 2, 3}, [({0}, []),
                                              ({1, 3}, [({1}, [])])]))

    def test_max_n_spans(self):
        self.tree.reorder()
        self.assertEqual(self.tree.max_n_spans(), 2)

    def test_labelled_yield(self):
        self.tree.reorder()
        self.assertListEqual(
            [token.form() for token in self.tree.token_yield()],
            "Piet Marie helpen lezen".split(' '))

    def test_full_labelled_yield(self):
        self.tree.reorder()
        self.assertListEqual(
            [token.form() for token in self.tree.full_token_yield()],
            "Piet Marie helpen lezen .".split(' '))

    def test_full_yield(self):
        self.tree.reorder()
        self.assertListEqual(self.tree.full_yield(),
                             'v1 v21 v v2 v3'.split(' '))

    # def test_labelled_spans(self):
    # self.tree.reorder()
    # self.assertListEqual(self.tree.labelled_spans(), [])

    def test_pos_yield(self):
        self.tree.reorder()
        self.assertListEqual(
            [token.pos() for token in self.tree.token_yield()],
            "NP N VP V".split(' '))

    def test_recursive_partitioning(self):
        self.tree.reorder()
        self.assertEqual(self.tree.recursive_partitioning(),
                         ({0, 1, 2, 3}, [({0}, []),
                                         ({1, 3}, [({1}, []), ({3}, [])]),
                                         ({2}, [])]))