def test_recursive_partitioning_transformation(self): tree = HybridTree("mytree") ids = ['a', 'b', 'c', 'd'] for f in ids: tree.add_node(f, CoNLLToken(f, '_', '_', '_', '_', '_'), True, True) if f != 'a': tree.add_child('a', f) tree.add_to_root('a') print(tree) self.assertEqual([token.form() for token in tree.token_yield()], ids) self.assertEqual(tree.recursive_partitioning(), (set([0, 1, 2, 3]), [(set([0]), []), (set([1]), []), (set([2]), []), (set([3]), [])])) print(tree.recursive_partitioning()) [fanout_1 ] = the_recursive_partitioning_factory().get_partitioning('fanout-1') print(fanout_1(tree))
def parse_with_pgf(grammar, forms, poss, bin): """" :type grammar: PGF :return: :rtype: """ lcfrs = grammar.languages[bin + 'grammargfconcrete'] # sentence = "ADJD ADV _COMMA_ KOUS ADV PIS PROAV VVINF VMFIN _PUNCT_" sentence = ' '.join(map(escape, poss)) try: i = lcfrs.parse(sentence, n=1) p, e = next(i) except (StopIteration, pgf.ParseError): return None # print_ast(gr, e, 0) s = lcfrs.graphvizParseTree(e) assert isinstance(s, str) s_ = s.splitlines() tree = HybridTree() # print s i = 0 for line in s.splitlines(): match = re.search(r'^\s*(n\d+)\[label="([^\s]+)"\]\s*$', line) if match: node_id = match.group(1) label = match.group(2) order = int(node_id[1:]) >= 100000 if order: assert escape(poss[i]) == label tree.add_node( node_id, construct_constituent_token(form=forms[i], pos=poss[i], terminal=True), True) i += 1 else: tree.add_node( node_id, construct_constituent_token(form=label, pos='_', terminal=False), False) # print node_id, label if label == 'VROOT1': tree.add_to_root(node_id) continue match = re.search(r'^ (n\d+) -- (n\d+)\s*$', line) if match: parent = match.group(1) child = match.group(2) tree.add_child(parent, child) # print line # print parent, child continue # print tree assert poss == [token.pos() for token in tree.token_yield()] # print the_yield dep_tree = HybridTree() head_table = defaultdict(lambda: None) attachment_point = defaultdict(lambda: None) for i, node in enumerate(tree.id_yield()): token = tree.node_token(node) dep_token = construct_conll_token(token.form(), un_escape(token.pos())) current = tree.parent(node) current = tree.parent(current) while current: current_label = tree.node_token(current).category() if not re.search(r'\d+X\d+$', current_label): s = un_escape(current_label) if s == 'TOP1': s = 'ROOT1' dep_token.set_edge_label(s[:-1]) head_table[current] = i + 1 attachment_point[node] = current break else: current = tree.parent(current) dep_tree.add_node(i + 1, dep_token, order=True) # print head_table for node, dep_node in zip(tree.id_yield(), dep_tree.id_yield()): node = tree.parent(attachment_point[node]) while node: if head_table[node]: dep_tree.add_child(head_table[node], dep_node) break node = tree.parent(node) if not node: dep_tree.add_to_root(dep_node) # print "dep_tree" # print dep_tree # print ' '.join(['(' + token.form() + '/' + token.deprel() + ')' for token in dep_tree.token_yield()]) return dep_tree
class GeneralHybridTreeTestCase(unittest.TestCase): tree = None def setUp(self): self.tree = HybridTree() self.tree.add_node("v1", construct_conll_token("Piet", "NP"), True) self.tree.add_node("v21", construct_conll_token("Marie", "N"), True) self.tree.add_node("v", construct_conll_token("helpen", "VP"), True) self.tree.add_node("v2", construct_conll_token("lezen", "V"), True) self.tree.add_child("v", "v2") self.tree.add_child("v", "v1") self.tree.add_child("v2", "v21") self.tree.add_node("v3", construct_conll_token(".", "Punc"), True, False) self.tree.add_to_root("v") def test_children(self): self.assertListEqual(self.tree.children('v'), ['v2', 'v1']) self.tree.reorder() self.assertListEqual(self.tree.children('v'), ['v1', 'v2']) def test_fringe(self): self.tree.reorder() self.assertListEqual(self.tree.fringe('v'), [2, 0, 3, 1]) self.assertListEqual(self.tree.fringe('v2'), [3, 1]) def test_n_spans(self): self.tree.reorder() self.assertEqual(self.tree.n_spans('v'), 1) self.assertEqual(self.tree.n_spans('v2'), 2) def test_n_gaps(self): self.tree.reorder() self.assertEqual(self.tree.n_gaps(), 1) def test_node_ids(self): self.tree.reorder() self.assertListEqual(sorted(self.tree.nodes()), sorted(['v', 'v1', 'v2', 'v21', 'v3'])) def test_complete(self): self.tree.reorder() self.assertEqual(self.tree.complete(), True) def test_unlabelled_structure(self): self.tree.reorder() self.assertTupleEqual(self.tree.unlabelled_structure(), ({0, 1, 2, 3}, [({0}, []), ({1, 3}, [({1}, [])])])) def test_max_n_spans(self): self.tree.reorder() self.assertEqual(self.tree.max_n_spans(), 2) def test_labelled_yield(self): self.tree.reorder() self.assertListEqual( [token.form() for token in self.tree.token_yield()], "Piet Marie helpen lezen".split(' ')) def test_full_labelled_yield(self): self.tree.reorder() self.assertListEqual( [token.form() for token in self.tree.full_token_yield()], "Piet Marie helpen lezen .".split(' ')) def test_full_yield(self): self.tree.reorder() self.assertListEqual(self.tree.full_yield(), 'v1 v21 v v2 v3'.split(' ')) # def test_labelled_spans(self): # self.tree.reorder() # self.assertListEqual(self.tree.labelled_spans(), []) def test_pos_yield(self): self.tree.reorder() self.assertListEqual( [token.pos() for token in self.tree.token_yield()], "NP N VP V".split(' ')) def test_recursive_partitioning(self): self.tree.reorder() self.assertEqual(self.tree.recursive_partitioning(), ({0, 1, 2, 3}, [({0}, []), ({1, 3}, [({1}, []), ({3}, [])]), ({2}, [])]))