def test_handle_match_next_in_the_hierarchy_requiring_padding(self): """ When traversing the input, if a new match is encountered, it should be properly accommodated on the graph. If the new level does not immediately follows the next on the hierarchy, and if padding is set to True then we need to pad additional nodes """ match = re.compile(r'Test Match').search('This is a Test Match sentence') descriptor = { 'components': ['A', 'B', 'C', 'D'], 'patterns': [r'A', r'B', r'C', r'D'], 'padding': True } descriptor = compile_patterns(descriptor) graph = initialize_backbone(NetworkxImplementation()) inserted = handle_match(graph, match, 3, descriptor) self.assertEqual(inserted, "Test Match [3]") self.assertListEqual(sorted(graph.nodes()), sorted(["ROOT [0]", "Test Match [3]", "A [1]", "B [2]"])) self.assertDictEqual(graph["A [1]"], {'pad': 1, 'meta': 'A', 'level': 1, 'content': []}) self.assertDictEqual(graph["B [2]"], {'pad': 1, 'meta': 'B', 'level': 2, 'content': []}) self.assertDictEqual(graph["Test Match [3]"], {'pad': 0, 'meta': 'Test Match', 'level': 3, 'content': []})
def test_handle_match_higher_on_the_hierarchy(self): """ When traversing the input, if a new match is encountered, it should be properly accommodated on the graph. If the new level precedes the current one, we should find the appropriate parent to accommodate this new node and properly insert it """ match = re.compile(r'Test Match').search('This is a Test Match sentence') descriptor = { 'components': ['A', 'B', 'C', 'D'], 'patterns': [r'A', r'B', r'C', r'D'], 'padding': False } descriptor = compile_patterns(descriptor) graph = initialize_backbone(NetworkxImplementation()) _ = handle_match(graph, match, 1, descriptor) _ = handle_match(graph, match, 2, descriptor) inserted = handle_match(graph, match, 1, descriptor) self.assertEqual(inserted, "Test Match [3]") self.assertListEqual(sorted(graph.nodes()), sorted(["ROOT [0]", "Test Match [1]", "Test Match [2]", "Test Match [3]"])) self.assertDictEqual(graph["Test Match [3]"], {'pad': 0, 'meta': 'Test Match', 'level': 1, 'content': []}) self.assertEqual(sorted([r for _, r in graph.edges("ROOT [0]")]), sorted(['Test Match [3]', 'Test Match [1]']))
def test_search_bfs_parents(self): """ We should be able to search from the opposite direction. From bottom to top, assuming a acyclic directed graph """ graph = initialize_backbone(NetworkxImplementation()) base = "NODE" _add_node(graph, base, "ROOT [0]", tag=10) # NODE[1] _add_node(graph, base, "NODE [1]", tag=10) # NODE[2] _add_node(graph, base, "NODE [1]", tag=10) # NODE[3] _add_node(graph, base, "NODE [1]", tag=1) # NODE[4] _add_node(graph, base, "NODE [2]", tag=100) # NODE[5] _add_node(graph, base, "NODE [2]", tag=1) # NODE[6] _add_node(graph, base, "NODE [5]", tag=1) # NODE[7] result_dfs = sorted(list(filter_bfs_ancestors(graph, source="NODE [5]", predicate=lambda x: x.get('tag', -1) == 10))) self.assertListEqual(result_dfs, ["NODE [1]", "NODE [2]"]) result_dfs = sorted(list(filter_bfs_ancestors(graph, source="NODE [7]", predicate=lambda x: x.get('tag', -1) == 100))) self.assertListEqual(result_dfs, ["NODE [5]"]) result_dfs = sorted(list(filter_bfs_ancestors(graph, source="NODE [5]", predicate=lambda x: x.get('tag', -1) == 999))) self.assertListEqual(result_dfs, [])
def test_search_dfs_bfs(self): """ The complete result should not change depending on the traversal algorithm. The first match should however. """ graph = initialize_backbone(NetworkxImplementation()) root = graph.root_key base = "NODE" node_1 = _add_node(graph, base, root, tag=1) node_2 = _add_node(graph, base, root, tag=2) node_3 = _add_node(graph, base, node_2, tag=100) node_4 = _add_node(graph, base, node_1, tag=4) node_5 = _add_node(graph, base, node_2, tag=5) node_6 = _add_node(graph, base, node_1, tag=6) node_7 = _add_node(graph, base, node_4, tag=100) node_8 = _add_node(graph, base, node_4, tag=8) result_dfs = sorted(list(filter_dfs(graph, predicate=lambda x: x.get('tag', -1) == 100))) result_bfs = sorted(list(filter_bfs(graph, predicate=lambda x: x.get('tag', -1) == 100))) self.assertEqual(result_bfs, result_dfs) self.assertEqual(len(result_dfs), 2) self.assertListEqual(result_dfs, sorted(['NODE [3]', 'NODE [7]'])) first_bfs = next(filter_bfs(graph, predicate=lambda x: x.get('tag', -1) == 100)) first_dfs = next(filter_dfs(graph, predicate=lambda x: x.get('tag', -1) == 100)) self.assertEqual(first_bfs, "NODE [3]") self.assertIn(first_dfs, ["NODE [3]", "NODE [7]"])
def from_dict(d): root_node = d["nodes"][0] graph = NetworkxImplementation(root_node['key'][:-4]) graph.initialize() # add nodes first for node_all_data in d["nodes"]: node_key = node_all_data["key"] graph.add_node(node_key, attr_dict=node_all_data["content"]) # add edges for node_all_data in d["nodes"]: key = node_all_data["key"] graph.add_edges_from( map(lambda p: (p, key), node_all_data["predecessors"])) graph.add_edges_from( map(lambda s: (key, s), node_all_data["successors"])) doc = Document(graph, root_node['key']) return doc
def test_search(self): """ Filtering the nodes, reachable from a given source, should be possible by passing a predicate. """ graph = initialize_backbone(NetworkxImplementation()) parent = graph.root_key base = "NODE" for i in range(1, 10): parent = _add_node(graph, base, parent, tag=i) result = list(filter_dfs(graph, predicate=lambda x: x.get('tag', -1) == 5)) self.assertEqual(len(result), 1) self.assertEqual(result[0], 'NODE [5]') result = list(filter_dfs(graph, predicate=lambda x: x.get('tag', -1) == 5, source="NODE [7]")) self.assertEqual(len(result), 0)
def test_node_id(self): """ `_node_id` should generate a unique id for each node The ID should follow a namespace convention like a filesystem and take the shape of the document hierarchy """ graph = initialize_backbone(NetworkxImplementation()) data = {'a': 1, 'b': 2} key = 'NEW NODE' parent = "ROOT [0]" new_node = _add_node(graph, key, parent, **data) data = {} key = 'NEW NODE' parent = "ROOT [0]" id = 10 identifier = _unique_path_identifier(graph, key, parent, id) assert identifier == '/root/'
def test_handle_match_next_in_the_hierarchy(self): """ When traversing the input, if a new match is encountered, it should be properly accommodated on the graph. In the most simple case, we detect a node that follows next on the hierarchy """ match = re.compile(r'Test Match').search('This is a Test Match sentence') descriptor = { 'components': ['A', 'B', 'C', 'D'], 'patterns': [r'A', r'B', r'C', r'D'], 'padding': True } descriptor = compile_patterns(descriptor) graph = initialize_backbone(NetworkxImplementation()) inserted = handle_match(graph, match, 1, descriptor) self.assertEqual(inserted, "Test Match [1]") self.assertListEqual(sorted(graph.nodes()), sorted(["ROOT [0]", "Test Match [1]"])) self.assertDictEqual(graph["Test Match [1]"], {'pad': 0, 'meta': 'Test Match', 'level': 1, 'content': []})
def test_padding_01(self): """ When requested, the insertion of a new node must be preceded of a padding process which ensures an uniform and predictable hierarchy """ descriptor = { 'components': ['A', 'B', 'C', 'D'], 'patterns': [r'A', r'B', r'C', r'D'] } descriptor = compile_patterns(descriptor) graph = initialize_backbone(NetworkxImplementation()) data = {'level': 1, 'meta': 'ART'} key = 'NEW NODE' parent = "ROOT [0]" node = _add_node(graph, key, parent, **data) last_node = _pad(graph, node, data['level'] + 1, 4, descriptor) self.assertListEqual(sorted(graph.nodes()), sorted(['ROOT [0]', 'NEW NODE [1]', 'B [2]', 'C [3]'])) self.assertEqual(last_node, 'C [3]')
def test_append_content(self): """ Every line should be inserted as content of the node currently in focus """ graph = initialize_backbone(NetworkxImplementation()) graph = append_content(graph, "this is a sample line") self.assertEqual(graph.cursor_data('content'), ["this is a sample line"]) data = {'level': 1, 'meta': 'ART', 'content': []} key = 'NEW NODE' parent = "ROOT [0]" _ = _add_node(graph, key, parent, **data) graph = append_content(graph, "yet another sample") graph = append_content(graph, "yet another sample 2") self.assertEqual(graph.cursor_data('content'), ["yet another sample", "yet another sample 2"])
def test_padding_no_effect(self): """ If the requested level is just one below the hierarchy the padding process should leave the graph unchanged """ descriptor = { 'components': ['A', 'B', 'C', 'D'], 'patterns': [r'A', r'B', r'C', r'D'] } descriptor = compile_patterns(descriptor) graph = initialize_backbone(NetworkxImplementation()) data = {'level': 1, 'meta': 'ART'} key = 'NEW NODE' parent = "ROOT [0]" node = _add_node(graph, key, parent, **data) nodes_before = sorted(graph.nodes()) last_node = _pad(graph, node, 3 + 1, 4, descriptor) nodes_after = sorted(graph.nodes()) self.assertEqual(last_node, node) self.assertListEqual(nodes_before, nodes_after)
def test_add_node(self): """ We should be able to add a node to the graph with a dictionary with data to be held by the newly created node """ graph = initialize_backbone(NetworkxImplementation()) data = {'a': 1, 'b': 2} key = 'NEW NODE' parent = "ROOT [0]" new_node = _add_node(graph, key, parent, **data) self.assertEqual(new_node, "NEW NODE [1]") self.assertDictEqual(graph[new_node], data) self.assertEqual(graph.edges(parent), [('ROOT [0]', 'NEW NODE [1]')]) data = {} key = 'NEW NODE' parent = "ROOT [0]" new_node = _add_node(graph, key, parent, **data) self.assertEqual(new_node, "NEW NODE [2]") self.assertDictEqual(graph[new_node], data) self.assertEqual(sorted(graph.edges(parent)), sorted([('ROOT [0]', 'NEW NODE [1]'), ('ROOT [0]', 'NEW NODE [2]')]))
def initialize_graph(name='ROOT'): """ Initializes a raw graph, subject to the chosen underlying graph library # TODO: check some global flag indicating which backbone to use """ return initialize_backbone(NetworkxImplementation(root=name))