def _create_tree_helper(builder: TreeBuilder, sentence: 'Sentence', root: Token, children_tokens: Dict[str, List[Token]]) -> None: """ Method to create a tree from a sentence given the root token. Args: builder: The TreeBuilder currently being used to create the Tree. sentence: The sentence to construct the tree from. root: The current token we are constructing the tree at. children_tokens: A dictionary from token id to children tokens. Returns: A Tree constructed given the sentence structure. """ try: tokens = children_tokens[root.id] except KeyError: tokens = [] for token in tokens: builder.add_child(data=token, move=True) Sentence._create_tree_helper(builder, sentence, token, children_tokens) builder.move_to_parent()
def to_tree(self): """ Creates a Tree data structure from the current sentence. An empty sentence will create a Tree with no data and no children. Returns: A constructed Tree that represents the dependency graph of the sentence. """ children_tokens = {} root_token = None for token in self: parent_key = token.head try: children_tokens[parent_key].append(token) except KeyError: children_tokens[parent_key] = [token] if token.head == '0': root_token = token builder = TreeBuilder() builder.create_root(root_token) if root_token: Sentence._create_tree_helper(builder, self, root_token, children_tokens) root = builder.build() return root
def test_cannot_remove_out_of_range(): """ Test that the builder cannot remove a child it does not have. """ builder = TreeBuilder() builder.create_root(0) builder.add_child(5) with pytest.raises(IndexError): builder.remove_child(5)
def test_cannot_move_out_of_range(): """ Test that the builder cannot move to a child that is out of index. """ builder = TreeBuilder() builder.create_root(0) builder.add_child(5) with pytest.raises(IndexError): builder.move_to_child(3)
def test_cannot_operate_on_rootless(): """ Verify that operations do not work on a TreeBuilder when no root is created. """ builder = TreeBuilder() with pytest.raises(ValueError): builder.move_to_root() with pytest.raises(ValueError): builder.move_to_parent() with pytest.raises(ValueError): builder.build()
def to_tree(self): """ Creates a Tree data structure from the current sentence. An empty sentence will create a Tree with no data and no children. The children for a node in the tree are ordered, and are ordered as they appear in the sentence. So the earliest child of a token appears first in the token's children in the tree. Each Tree node has a data member that references the actual Token represented by the node. Returns: A constructed Tree that represents the dependency graph of the sentence. """ children_tokens = {} root_token = None for token in self: parent_key = token.head try: children_tokens[parent_key].append(token) except KeyError: children_tokens[parent_key] = [token] if token.head == '0': root_token = token builder = TreeBuilder() builder.create_root(root_token) if root_token: Sentence._create_tree_helper(builder, self, root_token, children_tokens) root = builder.build() return root
def to_tree(self) -> Tree[Token]: """ Creates a Tree data structure from the current sentence. An empty sentence will cannot be converted into a Tree and will throw an exception. The children for a node in the tree are ordered as they appear in the sentence. So the earliest child of a token appears first in the token's children in the tree. Each Tree node has a data member that references the actual Token represented by the node. Multiword tokens are not included in the tree since they are more like virtual Tokens and do not participate in any dependency relationships or carry much value in dependency relations. Returns: A constructed Tree that represents the dependency graph of the sentence. Raises: ValueError: If the sentence can not be made into a tree because a token has an empty head value or if there is no root token. """ children_tokens: Dict[str, List[Token]] = {} for token in self: if token.head is not None: try: children_tokens[token.head].append(token) except KeyError: children_tokens[token.head] = [token] elif not (token.is_multiword() or token.is_empty_node()): raise ValueError( 'The current sentence is not fully defined as a tree and ' \ 'has a token with an empty head at {}'.format(token.id)) builder: TreeBuilder[Token] = TreeBuilder() if '0' in children_tokens: if len(children_tokens['0']) != 1: raise ValueError( 'There should be exactly one root token in a sentence.') root_token = children_tokens['0'][0] builder.create_root(root_token) Sentence._create_tree_helper(builder, self, root_token, children_tokens) else: raise ValueError('The current sentence has no root token.') root = builder.build() return root
def test_cannot_move_up_on_root(): """ Test that when at the root node, the builder cannot move up to a parent. """ builder = TreeBuilder() builder.create_root(0) with pytest.raises(ValueError): builder.move_to_parent()
def test_iter_children(): """ Test that we can properly iterate over the children of a tree. """ builder = TreeBuilder() builder.create_root(0) data = list(range(2, 15, 3)) for datum in data: builder.add_child(datum) t = builder.build() for i, child in enumerate(t): assert child.data == data[i]
def test_parent_assigment(): """ Test that children tree's parents are properly assigned. """ builder = TreeBuilder() builder.create_root(0) builder.add_child(2, move=True) builder.add_child(13) builder.move_to_parent() builder.add_child(7) t = builder.build() assert t.parent is None assert t[0].parent == t assert t[1].parent == t assert t[0][0].parent == t[0]
def test_len_children(): """ Test that we can properly get the number of children. """ builder = TreeBuilder() builder.create_root(0) data = list(range(2, 15, 3)) subdata = [0, 1, 2, 3, 4] for datum in data: builder.add_child(datum, move=True) for subdatum in subdata: builder.add_child(subdatum) builder.move_to_parent() t = builder.build() assert len(t) == len(data) for child in t: assert len(child) == len(subdata)
def test_get_children(): """ Test that we can properly retreive children from a tree by index. """ builder = TreeBuilder() builder.create_root(1) builder.add_child(7) builder.add_child(2, move=True) builder.add_child(13) t = builder.build() assert t[0].data == 7 assert t[1].data == 2 assert t[1][0].data == 13
def test_on_copy_not_on_root(): """ Test that the current pointer is relatively correct after a copy operation. """ builder = TreeBuilder() builder.create_root(0) builder.add_child(5) builder.add_child(6, move=True) _ = builder.build() builder.add_child(7) t = builder.build() assert_tree_structure(t, {(): 0, (0, ): 5, (1, ): 6, (1, 0): 7})
def test_after_creation_copy(): """ Test that a single TreeBuilder can be used multiple times properly. """ builder = TreeBuilder() builder.create_root(0) builder.add_child(2, move=True) builder.add_child(13) builder.move_to_parent() builder.add_child(7) t1 = builder.build() builder.move_to_root() builder.set_data(4) builder.add_child(3, move=True) builder.add_child(15) t2 = builder.build() assert t2 is not t1 assert t2[0] is not t1[0] assert t2[0][0] is not t1[0][0] assert t2[1] is not t1[1] assert t2.data == 4 assert t2[0].data == 2 assert t2[0][0].data == 13 assert t2[1].data == 7 assert t2[2].data == 3 assert t2[2][0].data == 15 assert len(t2) == 3 assert len(t2[0]) == 1 assert len(t2[1]) == 0 assert len(t2[2]) == 1