def testGetSimilarNodesToQueryNode(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similar_nodes_exp = np.array([0, 5, 7]) similar_nodes, _ = similar_nodes_mining.get_similar_nodes( "n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0) equality = similar_nodes_exp == similar_nodes if type(equality) is not bool: equality = equality.all() self.assertTrue( equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )
def testRBallHyper_CenterDefaultColor(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) rball_in = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, -1, center_default_color=True) rball_out = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 1, center_default_color=True) rball_all = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 0, center_default_color=True) d_rball_all = Hypergraph(example_graphs.gt_dummy_rball_10_r2_all) d_rball_out = Hypergraph(example_graphs.gt_dummy_rball_10_r2_out) d_rball_in = Hypergraph(example_graphs.gt_dummy_rball_10_r2_in) d_rball_all.node["n_10"]["labels"] = ["0"] d_rball_out.node["n_10"]["labels"] = ["0"] d_rball_in.node["n_10"]["labels"] = ["0"] all_isomorphic = algorithms.isomorphic(d_rball_all, rball_all) out_isomorphic = algorithms.isomorphic(d_rball_out, rball_out) in_isomorphic = algorithms.isomorphic(d_rball_in, rball_in) self.assertTrue(all_isomorphic, "Problem extracting r-ball with edge_dir=0.") self.assertTrue(out_isomorphic, "Problem extracting r-ball with edge_dir=1.") self.assertTrue(in_isomorphic, "Problem extracting r-ball with edge_dir=-1.")
def testHypergraph_edges_iter(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) self.assertEqual(len(list(dummy_hypergraph.edges_iter())), 32) self.assertEqual(set(dummy_hypergraph.edges_iter("n_6")), set(["e_5", "e_9", "e_13", "e_28"])) self.assertEqual(set(dummy_hypergraph.edges_iter("n_5", "n_1")), set(["e_15"]))
def testHypergraph_subgraph_with_labels(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) subgraph = dummy_hypergraph.subgraph_with_labels( set(["n_1", "n_6", "n_9", "n_10"])) isomorphic = algorithms.isomorphic(example_graphs.gt_dummy_subgraph, subgraph) self.assertTrue(isomorphic, "Incorrect subgraph extraction from hypergraph.")
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def testHypergraph_ReadWrite(self): file_name = "test_files/dummy_hypergraph.tmp" dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) dummy_hypergraph.save_to_file(file_name) read_hypergraph = Hypergraph.load_from_file(file_name) self.assertEqual( dummy_hypergraph, read_hypergraph, "The read hypergraph is different from the saved one.")
def testCharacteristicMatrix_ReadWrite(self): file_name = "test_files/characteristic_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) ch_matrix.save_to_file(file_name) read_ch_matrix = CharacteristicMatrix.load_from_file(file_name) self.assertEqual(read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def calculate_ch_matrix(): in_files = helpers.datasets[dataset]["files"] print "Converting RDF to NetworkX graph started at", time.strftime( time_format) start = time.time() graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False) print "Converting RDF to NetworkX graph took", time.time() - start, "s" print "-----------------------------------------" print "Saving NodeID map started at", time.strftime(time_format) start = time.time() inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset)) print "Saving NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Building hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph(graph) print "Building hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Saving hypergraph started at", time.strftime(time_format) start = time.time() hypergraph.save_to_file(path + "{0}_hgraph".format(dataset)) print "Saving hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Building characteristic matrix started at", time.strftime( time_format) start = time.time() rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database( hypergraph, r_in=r_in, r_out=r_out, r_all=r_all) ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True) print "Building characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Saving Column index to Node map started at", time.strftime( time_format) start = time.time() inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset)) print "Saving Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" print "Saving characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset)) print "Saving characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def testCharacteristicMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testSketchMatrix_ReadWrite(self): file_name = "test_files/sketch_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) sketch_matrix = SketchMatrix(5, 20, ch_matrix) sketch_matrix.save_to_file(file_name) read_sketch_matrix = SketchMatrix.load_from_file(file_name) equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all() self.assertTrue(equality, "The read sketch matrix is different from the saved one.")
def testDropEdgesByProbability(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) edges_count = dummy_hypergraph.number_of_edges() for p in [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]: new_hypergraph = algorithms.drop_edges_by_probability(dummy_hypergraph, p) new_graph = algorithms.drop_edges_by_probability(example_graphs.gt_dummy_graph, p) edges_prop_exp = ((1. - p) * edges_count) / float(edges_count) edges_prop_hyper = float(new_hypergraph.number_of_edges()) / float(edges_count) edges_prop = float(new_graph.number_of_edges()) / float(edges_count) msg = "The proportion of edges remaining after being dropping deviate too much from the expected." self.assertAlmostEquals(edges_prop_exp, edges_prop_hyper, delta=0.2, msg=msg) self.assertAlmostEquals(edges_prop_exp, edges_prop, delta=0.2, msg=msg)
def testGetSimilarNodesToQueryNode(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similar_nodes_exp = np.array([0, 5, 7]) similar_nodes, _ = similar_nodes_mining.get_similar_nodes("n_7", dummy_hypergraph, sketch_matrix, 0, [], r_in=3, r_out=2, r_all=0) equality = similar_nodes_exp == similar_nodes if type(equality) is not bool: equality = equality.all() self.assertTrue(equality, "Wrong similar nodes were extracted (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
def testCharacteristicMatrix_JaccardSimMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() equality = ( self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all() self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
def testWShinglesExtraction(self): h1 = Hypergraph(example_graphs.w_shingles_graph_1) h2 = Hypergraph(example_graphs.w_shingles_graph_2) h1_shingles_exp = set([ u'_1.0,', u'2;(wl', u',1)))', u'1.2),', u',a),b', u'))),w', u',(1.2', u'2),(1', u'(x,((', u'(0,1)', u';(x,(', u',((0,', u'2;(x,', u'))),a', u'(1.2;', u'0,((0', u'),(1.', u'.2;(x', u';(wl_', u'.2;(w', u',a),(', u'l_1.2', u'l_1.1', u'l_1.0', u')),a)', u'),a),', u'1;(1.', u'((0,1', u'_1.1)', u')),wl', u'1.0,(', u'(0.1;', u'_1.2)', u'wl_1.', u'1))),', u'0,1))', u'.2),(', u'0.1;(', u'2),wl', u'),wl_', u'.0,((', u'a),b)', u'x,((0', u'a),(1', u'.1;(1', u'(wl_1', u'1.2;(', u',wl_1', u';(1.2', u'.2),w' ]) h2_shingles_exp = set([ u'y,((1', u'2;(wl', u'1.4),', u',1)))', u'(1,0)', u'1.2),', u'_1.0,', u'))),w', u',(1.2', u'2),(1', u'(x,((', u'(0,1)', u'_1.4)', u';(x,(', u',((0,', u'2;(x,', u'))),a', u'(1.2;', u'))),c', u'0,((0', u'1,0))', u'),(1.', u'_1.5)', u',0)))', u'.2;(y', u';(wl_', u'.2;(w', u',a),(', u'l_1.5', u'l_1.4', u'l_1.3', u'l_1.2', u'c),b)', u'l_1.0', u')),a)', u'),a),', u'1;(1.', u'((0,1', u'3,((1', u',((1,', u'(y,((', u';(y,(', u'1.0,(', u'(0.1;', u'_1.2)', u'wl_1.', u'1))),', u'0,1))', u'.2),(', u'0.1;(', u'),wl_', u'.0,((', u'),c),', u'x,((0', u'1.3,(', u'a),(1', u',c),b', u'.4),w', u'4),wl', u'.2;(x', u'.1;(1', u'_1.3,', u'0))),', u'.3,((', u')),wl', u'(wl_1', u')),c)', u'1.2;(', u',wl_1', u'2;(y,', u';(1.2', u'((1,0' ]) intersection_exp = set([ u')),wl', u'_1.0,', u'.1;(1', u'1.0,(', u'2;(wl', u'_1.2)', u',1)))', u',wl_1', u'1.2),', u'wl_1.', u'1))),', u'0,1))', u'.2),(', u'))),w', u'0.1;(', u',(1.2', u'2),(1', u'.0,((', u'(x,((', u'(0,1)', u';(x,(', u'(0.1;', u',((0,', u'2;(x,', u'))),a', u'(1.2;', u'0,((0', u'),(1.', u'.2;(x', u';(wl_', u'a),(1', u'.2;(w', u',a),(', u'x,((0', u'l_1.2', u'l_1.0', u'(wl_1', u')),a)', u'),a),', u'1;(1.', u'((0,1', u'1.2;(', u'),wl_', u';(1.2' ]) wl_state = None h1_shingles, wl_state = shingle_extraction.extract_w_shingles( h1, wl_iterations=1, wl_state=wl_state) h2_shingles, wl_state = shingle_extraction.extract_w_shingles( h2, wl_iterations=1, wl_state=wl_state) self.assertEqual(h1_shingles_exp, h1_shingles, "Wrong w-shingles were extracted from hypergraph.") self.assertEqual(h2_shingles_exp, h2_shingles, "Wrong w-shingles were extracted from hypergraph.") self.assertEqual( intersection_exp, h1_shingles & h2_shingles, "The intersection of the two sets of w-shingles is incorrect.")
def testCharacteristicMatrix_ReadWrite(self): file_name = "test_files/characteristic_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) ch_matrix.save_to_file(file_name) read_ch_matrix = CharacteristicMatrix.load_from_file(file_name) self.assertEqual( read_ch_matrix, ch_matrix, "The read characteristic matrix is different from the saved one.")
def load_ch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Reading hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset)) print "Reading hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Reading characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix = CharacteristicMatrix.load_from_file(path + "{0}_ch_matrix".format(dataset)) print "Reading characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime(time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def testSketchMatrix_ReadWrite(self): file_name = "test_files/sketch_matrix.tmp" dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=2, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=4) sketch_matrix = SketchMatrix(5, 20, ch_matrix) sketch_matrix.save_to_file(file_name) read_sketch_matrix = SketchMatrix.load_from_file(file_name) equality = (read_sketch_matrix.matrix == sketch_matrix.matrix).all() self.assertTrue( equality, "The read sketch matrix is different from the saved one.")
def rule_3(hypergraph): modified = False parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys()) if len(parallel_hedges_groups_keys) > 0: modified = True for key in parallel_hedges_groups_keys: hedges_group = hypergraph.parallel_hedges_groups[key] endpoints = hypergraph.endpoints(hedges_group[0]) perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for hedge in hedges_group: possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm)) possible_label["label"].sort() possible_label["label"] = u",".join(possible_label["label"]) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(hedges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label)) hypergraph.reset_parallel_hedges_groups() return modified
def load_ch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Reading hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset)) print "Reading hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Reading characteristic matrix started at", time.strftime( time_format) start = time.time() ch_matrix = CharacteristicMatrix.load_from_file( path + "{0}_ch_matrix".format(dataset)) print "Reading characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime( time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def calculate_ch_matrix(): in_files = helpers.datasets[dataset]["files"] print "Converting RDF to NetworkX graph started at", time.strftime(time_format) start = time.time() graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False) print "Converting RDF to NetworkX graph took", time.time() - start, "s" print "-----------------------------------------" print "Saving NodeID map started at", time.strftime(time_format) start = time.time() inout.save_to_file(node_id_map, path + "{0}_node_id_map".format(dataset)) print "Saving NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Building hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph(graph) print "Building hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Saving hypergraph started at", time.strftime(time_format) start = time.time() hypergraph.save_to_file(path + "{0}_hgraph".format(dataset)) print "Saving hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Building characteristic matrix started at", time.strftime(time_format) start = time.time() rballs_database, index_node_map = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all) ch_matrix = CharacteristicMatrix(rballs_database, hypergraph.number_of_nodes(), wl_iterations=wl_iterations, print_progress=True) print "Building characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Saving Column index to Node map started at", time.strftime(time_format) start = time.time() inout.save_to_file(index_node_map, path + "{0}_index_node_map".format(dataset)) print "Saving Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" print "Saving characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix.save_to_file(path + "{0}_ch_matrix".format(dataset)) print "Saving characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def r_ball_hyper(hypergraph, center, r, edge_dir=0, center_default_color=False): '''The same as r_ball but for Hypergraph. ''' assert type(hypergraph) is Hypergraph visited_nodes = set() def recurse(u, i): visited_nodes.add(u) edges = hypergraph.edges_iter_dir(u, dir_code=edge_dir) skip_edges = set() for edge in edges: if edge in skip_edges: continue endpoints = hypergraph.endpoints(edge) new_endpoints = set(endpoints) - set([u]) for v in new_endpoints: if not rball.has_node(v): rball.add_node(v, attr_dict=copy.deepcopy(hypergraph.node[v])) first_new_endpoint = next(iter(new_endpoints)) # TODO: this condition may be tricky if the graph has hyperedges if not rball.has_edge(u, first_new_endpoint, edge_dir): parallel_edges = hypergraph.edges_iter_dir(u, first_new_endpoint, dir_code=edge_dir) # add all parallel edges in the same direction to the r-ball for parallel_edge in parallel_edges: skip_edges.add(parallel_edge) p_edge_attr = hypergraph.edge(parallel_edge) direction = p_edge_attr["direction"] # TODO: not safe if we have hyperedges rball.add_edge(endpoints, direction=copy.deepcopy(direction), label=u",".join(copy.deepcopy(p_edge_attr["labels"]))) if i < r: for v in new_endpoints: if v not in visited_nodes: recurse(v, i + 1) rball = Hypergraph() if center_default_color: # the center node's default color is 0 ("owl:Thing") rball.add_node(center, attr_dict={"labels": ["0"]}) else: rball.add_node(center, attr_dict=copy.deepcopy(hypergraph.node[center])) if r > 0: recurse(center, 1) rball.init_parallel_edges_groups() rball.init_nodes_with_n_neighbors() return rball
def testWeisfeilerLehman(self): wl_state_exp = { "labels": { "0": "wl_0.0", "1": "wl_0.1", "a": "wl_0.2", "b": "wl_0.3", "wl_0.0;in(wl_0.3)": "wl_1.0", "wl_0.0;any(wl_0.2),in(wl_0.2)": "wl_1.1", "wl_0.1;any(wl_0.2),out(wl_0.2,wl_0.3)": "wl_1.2", "wl_0.1;any(wl_0.2),out(wl_0.2)": "wl_1.3", "wl_0.2;in(wl_0.1),out(wl_0.0)": "wl_1.4", "wl_0.2;any(wl_0.0,wl_0.1)": "wl_1.5", "wl_0.3;in(wl_0.1),out(wl_0.0)": "wl_1.6", "wl_1.0;in(wl_1.6)": "wl_2.0", "wl_1.1;any(wl_1.5),in(wl_1.4)": "wl_2.1", "wl_1.2;any(wl_1.5),out(wl_1.4,wl_1.6)": "wl_2.2", "wl_1.3;any(wl_1.5),out(wl_1.4)": "wl_2.3", "wl_1.4;in(wl_1.2),out(wl_1.1)": "wl_2.4", "wl_1.4;in(wl_1.3),out(wl_1.1)": "wl_2.5", "wl_1.5;any(wl_1.1,wl_1.2)": "wl_2.6", "wl_1.5;any(wl_1.1,wl_1.3)": "wl_2.7", "wl_1.6;in(wl_1.2),out(wl_1.0)": "wl_2.8", "wl_2.0;in(wl_2.8)": "wl_3.0", "wl_2.1;any(wl_2.7),in(wl_2.4)": "wl_3.1", "wl_2.1;any(wl_2.6),in(wl_2.5)": "wl_3.2", "wl_2.2;any(wl_2.6),out(wl_2.4,wl_2.8)": "wl_3.3", "wl_2.3;any(wl_2.7),out(wl_2.5)": "wl_3.4", "wl_2.4;in(wl_2.2),out(wl_2.1)": "wl_3.5", "wl_2.5;in(wl_2.3),out(wl_2.1)": "wl_3.6", "wl_2.6;any(wl_2.1,wl_2.2)": "wl_3.7", "wl_2.7;any(wl_2.1,wl_2.3)": "wl_3.8", "wl_2.8;in(wl_2.2),out(wl_2.0)": "wl_3.9" }, "next_labels": { 0: 4, 1: 7, 2: 9, 3: 10 } } hyper_dummy_wl = Hypergraph(example_graphs.gt_dummy_wl) hyper_dummy_wl, wl_state = weisfeiler_lehman.init(hyper_dummy_wl, test_mode=True) i = 1 while True: new_hyper_dummy_wl, wl_state = weisfeiler_lehman.iterate( hyper_dummy_wl, wl_state, i, test_mode=True) if weisfeiler_lehman.is_stable(hyper_dummy_wl, new_hyper_dummy_wl, i): break hyper_dummy_wl = new_hyper_dummy_wl i += 1 self.assertEqual( wl_state_exp, wl_state, "The multi-sets of labels computed by Weisfeiler-Lehman are not correct." )
def testSimilarNodesMining(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database( dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix() similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32) sketch_matrix = SketchMatrix(25, 265, ch_matrix) similarity_matrix = similar_nodes_mining.get_node_similarity_matrix( sketch_matrix) equality = (similarity_matrix_exp == similarity_matrix).all() self.assertTrue( equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)." )
def testRBallHyper(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) rball_in = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, -1) rball_out = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 1) rball_all = algorithms.r_ball_hyper(dummy_hypergraph, "n_10", 2, 0) d_rball_all = Hypergraph(example_graphs.gt_dummy_rball_10_r2_all) d_rball_out = Hypergraph(example_graphs.gt_dummy_rball_10_r2_out) d_rball_in = Hypergraph(example_graphs.gt_dummy_rball_10_r2_in) all_isomorphic = algorithms.isomorphic(d_rball_all, rball_all) out_isomorphic = algorithms.isomorphic(d_rball_out, rball_out) in_isomorphic = algorithms.isomorphic(d_rball_in, rball_in) self.assertTrue(all_isomorphic, "Problem extracting r-ball with edge_dir=0.") self.assertTrue(out_isomorphic, "Problem extracting r-ball with edge_dir=1.") self.assertTrue(in_isomorphic, "Problem extracting r-ball with edge_dir=-1.")
def testDropEdgesByProbability(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) edges_count = dummy_hypergraph.number_of_edges() for p in [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.]: new_hypergraph = algorithms.drop_edges_by_probability( dummy_hypergraph, p) new_graph = algorithms.drop_edges_by_probability( example_graphs.gt_dummy_graph, p) edges_prop_exp = ((1. - p) * edges_count) / float(edges_count) edges_prop_hyper = float( new_hypergraph.number_of_edges()) / float(edges_count) edges_prop = float( new_graph.number_of_edges()) / float(edges_count) msg = "The proportion of edges remaining after being dropping deviate too much from the expected." self.assertAlmostEquals(edges_prop_exp, edges_prop_hyper, delta=0.2, msg=msg) self.assertAlmostEquals(edges_prop_exp, edges_prop, delta=0.2, msg=msg)
def testFeatureTypes(self): dummy_hypergraph_2 = Hypergraph(example_graphs.snm_dummy_graph_2) features = [] raw_features = arnborg_proskurowski.get_reduced_features( dummy_hypergraph_2) for raw_feature in raw_features: new_features = list( feature_extraction.process_raw_feature(raw_feature, dummy_hypergraph_2)) features += new_features isomorphic = all([ algorithms.isomorphic(features[i], example_graphs.snm_dummy_graph_features_2[i]) for i in range(len(features)) ]) self.assertTrue(isomorphic, "Wrong features extracted.")
def read_chemical_compounts(in_file, process_compound_function=None): '''Read a dataset of chemical compound graphs (e.g. Mutagenicity). :param in_file: Input text file. :return: the tuple (g, p) where g is a graph database to be used in building a characteristic matrix and p is a list containing the properties of the graphs in the database. ''' chem_graph_database = [] current_graph = None current_properties = None # r = 0 with codecs.open(in_file, "r", "utf8") as fp: i = 0 for line in fp: if i == 0: # r += 1 # print "Processing row:", r if line.startswith("$"): # EOF break assert line.startswith("#") current_properties = map(lambda x: int(x), line.split(" ")[1:]) current_graph = nx.Graph() # if current_properties[0] > 10: # break elif i == 1: nodes = line.split(" ")[:-1] assert len(nodes) == current_properties[2] for node_index, node_label in enumerate(nodes): current_graph.add_node(node_index + 1, labels=[node_label]) else: edges = line.split(" ")[:-1] edges = [edges[e : e + 3] for e in itertools.imap(lambda x: 3 * x, range(len(edges)/3))] assert len(edges) == current_properties[3] for edge in edges: current_graph.add_edge(int(edge[0]), int(edge[1]), label=edge[2]) ch_db_record = (current_properties[0], [Hypergraph(current_graph)], current_properties[1]) chem_graph_database.append(ch_db_record) if process_compound_function: process_compound_function(ch_db_record) yield ch_db_record i = (i + 1) % 3
def testFeatureExtraction(self): wl_state_exp = { "labels": { "g": "wl_0.0", "n": "wl_0.1", "r": "wl_0.2", "b": "wl_0.3", "wl_0.0;in(wl_0.2),out(wl_0.2)": "wl_1.0", "wl_0.1;in(wl_0.2),out(wl_0.2)": "wl_1.1", "wl_0.2;in(wl_0.1)": "wl_1.2", "wl_0.2;out(wl_0.0,wl_0.1)": "wl_1.3", "wl_0.2;in(wl_0.0)": "wl_1.4", "wl_0.1;in(wl_0.3),out(wl_0.2)": "wl_1.5", "wl_0.3;out(wl_0.1)": "wl_1.6", "wl_0.2;in(wl_0.1,wl_0.1)": "wl_1.7" }, "next_labels": { 0: 4, 1: 8 } } dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database = [ r_ball_hyper(dummy_hypergraph, "n_2", 1, edge_dir=1), r_ball_hyper(dummy_hypergraph, "n_2", 1, edge_dir=-1) ] features = [] wl_state = None for rball in rballs_database: new_features, wl_state = feature_extraction.extract_features( rball, wl_iterations=1, wl_state=wl_state) features += new_features self.assertEqual( wl_state_exp, wl_state, "The wrong wl_state was computed by Weisfeiler-Lehman.") isomorphic = all([ algorithms.isomorphic(features[i], example_graphs.snm_dummy_graph_features[i]) for i in range(len(features)) ]) self.assertTrue(isomorphic, "Wrong features extracted.")
def get_shingle_fingerprints(): def inner(query_features): for features in query_features: for feature in features: shingles = shingle_extraction.extract_shingles(feature) fingerprints = fingerprint.get_fingerprints(shingles) for fp in fingerprints: yield fp new_wl_labels_list = wl_labels_list query_features = [] for query_graph in query_graph_list: if type(query_graph) is Hypergraph: query_hypergraph = query_graph else: query_hypergraph = Hypergraph(query_graph) features, new_wl_labels_list = feature_extraction.extract_features( query_hypergraph, wl_iterations, new_wl_labels_list) query_features.append(features) return set(inner(query_features)), new_wl_labels_list
def testHypergraph_ReadWrite(self): file_name = "test_files/dummy_hypergraph.tmp" dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) dummy_hypergraph.save_to_file(file_name) read_hypergraph = Hypergraph.load_from_file(file_name) self.assertEqual(dummy_hypergraph, read_hypergraph, "The read hypergraph is different from the saved one.")
def r_ball_hyper(hypergraph, center, r, edge_dir=0, center_default_color=False): '''The same as r_ball but for Hypergraph. ''' assert type(hypergraph) is Hypergraph visited_nodes = set() def recurse(u, i): visited_nodes.add(u) edges = hypergraph.edges_iter_dir(u, dir_code=edge_dir) skip_edges = set() for edge in edges: if edge in skip_edges: continue endpoints = hypergraph.endpoints(edge) new_endpoints = set(endpoints) - set([u]) for v in new_endpoints: if not rball.has_node(v): rball.add_node(v, attr_dict=copy.deepcopy(hypergraph.node[v])) first_new_endpoint = next(iter(new_endpoints)) # TODO: this condition may be tricky if the graph has hyperedges if not rball.has_edge(u, first_new_endpoint, edge_dir): parallel_edges = hypergraph.edges_iter_dir(u, first_new_endpoint, dir_code=edge_dir) # add all parallel edges in the same direction to the r-ball for parallel_edge in parallel_edges: skip_edges.add(parallel_edge) p_edge_attr = hypergraph.edge(parallel_edge) direction = p_edge_attr["direction"] # TODO: not safe if we have hyperedges rball.add_edge(endpoints, direction=copy.deepcopy(direction), label=u",".join( copy.deepcopy(p_edge_attr["labels"]))) if i < r: for v in new_endpoints: if v not in visited_nodes: recurse(v, i + 1) rball = Hypergraph() if center_default_color: # the center node's default color is 0 ("owl:Thing") rball.add_node(center, attr_dict={"labels": ["0"]}) else: rball.add_node(center, attr_dict=copy.deepcopy(hypergraph.node[center])) if r > 0: recurse(center, 1) rball.init_parallel_edges_groups() rball.init_nodes_with_n_neighbors() return rball
def chem_database_generator(full_graph, uri_node_map, type_color_map, compounds_and_targets): literal_colors = set() for rdf_type in type_color_map: # TODO: this condition is unsafe because it may remove not only literal colors if rdf_type.startswith(u"http://www.w3.org/2001/XMLSchema#"): literal_colors.add(type_color_map[rdf_type]) bool_colors = filter(lambda x: x.startswith(u"http://www.w3.org/2001/XMLSchema#boolean"), type_color_map) bool_colors = set(map(lambda x: type_color_map[x], bool_colors)) literal_colors -= bool_colors for node in full_graph.nodes(): node_labels_set = set(full_graph.node[node]["labels"]) # remove all literals (except booleans) if literal_colors & node_labels_set: full_graph.remove_node(node) # remove the color of named individual type from all nodes where it occurs named_indiv_uri = u"http://www.w3.org/2002/07/owl#NamedIndividual" if named_indiv_uri in type_color_map: named_indiv_color = type_color_map[named_indiv_uri] for node in full_graph.nodes_iter(): if named_indiv_color in full_graph.node[node]["labels"]: full_graph.node[node]["labels"].remove(named_indiv_color) full_hypergraph = Hypergraph(full_graph) # ################ # # INFO: use this to remove the isMutagenic property when predicting mutagenicity # is_mutag_color = type_color_map[u"http://dl-learner.org/carcinogenesis#isMutagenic"] # edges_to_remove = [] # for edge in full_hypergraph.edges_iter(): # if is_mutag_color in full_hypergraph.edge(edge)['labels']: # edges_to_remove.append(edge) # for edge in edges_to_remove: # full_hypergraph.safe_remove_edge(edge) # ################ if not compounds_and_targets: compounds_and_targets = read_compounds_and_targets() def remove_other_neighbors_of_bool_literals(hypergraph, center_node): center_neighbors = hypergraph.neighbors(center_node) bool_literals = filter(lambda n: set(hypergraph.node[n]['labels']) & bool_colors, center_neighbors) for bool_literal in bool_literals: bool_literal_neigbors = set(hypergraph.neighbors(bool_literal)) # exclude the center node from the removable nodes bool_literal_neigbors.remove(center_node) for neigh in bool_literal_neigbors: hypergraph.safe_remove_node(neigh) for comp_id, target_label in compounds_and_targets: node_id = u"n_{0}".format(uri_node_map[uri_prefix + comp_id]) comp_neighborhood_hypergraph = algorithms.r_ball_hyper(full_hypergraph, node_id, 2, 0) remove_other_neighbors_of_bool_literals(comp_neighborhood_hypergraph, node_id) ch_db_record = (comp_id, [comp_neighborhood_hypergraph], target_label) if process_compound_function: process_compound_function(ch_db_record) # ############ # def get_key(value, dictionary): # for key in dictionary: # if dictionary[key] == value: # return key # return None # g = ch_db_record[1][0].copy() # for n in g.node: # n_new_labels = [] # for n_color in g.node[n]['labels']: # n_rdf_type = get_key(n_color, type_color_map) # n_rdf_type = n_rdf_type[n_rdf_type.find(u"#") + 1:] # n_new_labels.append(n_rdf_type) # g.node[n]['labels'] = n_new_labels # g.visualize() # ############ yield ch_db_record
def testHypergraph_Copy(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) dummy_copy = dummy_hypergraph.copy() self.assertEqual(dummy_hypergraph, dummy_copy, "The copy was not correct.")
def extract_rballs_from_rdf_server(entries, output_dir, r, edge_dir, sparql_endpoint="http://localhost:3030/ds/query", entries_count_expected=-1, sort_rdf_nodes_before_processing=True): '''Extract r-balls around the given entry nodes from the graph on the server using SPARQL queries. :param entries: the entry nodes (resources, URI/IRIs) which will serve as center nodes of the r-balls :param output_dir: the directory for writing the output files :param r: radius of the r-balls :param edge_dir: the direction of edges to be considered (0 - all edges, 1 - only outgoing, -1 - only incoming) :param sparql_endpoint: URL of the SPARQL end-point. Default is http://localhost:3030/ds/query (for Apache Jena Fuseki) :param entries_count_expected: Expected number of entries to process. :param sort_rdf_nodes_before_processing: Used to yield the same colors in multiple runs. ''' colors = None next_color_id = None nodes_count_distribution = {} type_distribution = {} def update_stats(nodes_count, target_labels, colors): def get_target_uri_map(): target_uri_map = {} for uri in colors: if colors[uri] in target_labels: target_uri_map[colors[uri]] = uri if len(target_uri_map) == len(target_labels): break return target_uri_map if nodes_count not in nodes_count_distribution: nodes_count_distribution[nodes_count] = 0 nodes_count_distribution[nodes_count] += 1 target_uri_map = get_target_uri_map() for target in target_uri_map: type_uri = target_uri_map[target] if type_uri not in type_distribution: type_distribution[type_uri] = 0 type_distribution[type_uri] += 1 start_time = time.time() for i, entry_uri in enumerate(entries): # # TODO: specific case of 2-in-balls # query_status, rdf_r_ball = rdf.quary_2_in_ball(entry_uri, sparql_endpoint) query_status, rdf_r_ball = rdf.quary_r_ball(entry_uri, r, edge_dir, sparql_endpoint, ignore_type_paths=True, include_types=True) assert not query_status r_ball, uri_nodes_map, colors, next_color_id = rdf.convert_rdf_graph_to_nx_graph(rdf_r_ball, test_mode=sort_rdf_nodes_before_processing, return_colors=True, base_colors=colors, next_color_id=next_color_id) if entry_uri not in uri_nodes_map: # in case the r-ball is empty node_id = 0 r_ball.add_node(node_id, labels=["0"]) uri_nodes_map[entry_uri] = node_id center_node = uri_nodes_map[entry_uri] target_labels = list(r_ball.node[center_node]["labels"]) # Make he center node of color 0 (owl:Thing) # The original colors of the center node serve as target labels of the r-ball r_ball.node[center_node]["labels"] = ["0"] hyper_r_ball = Hypergraph(r_ball) nodes_count = r_ball.number_of_nodes() if i % 10 == 0: # print every 100 records elapsed_time = time.time() - start_time if entries_count_expected == -1 or i == 0: time_est = "Elapsed time: {0:.2f}s".format(elapsed_time) else: time_left = (elapsed_time / i) * (entries_count_expected - i) time_est = "Time left: {0:.2f}s".format(time_left) print i, time_est, nodes_count, entry_uri, target_labels update_stats(nodes_count, target_labels, colors) graph_database_record = (entry_uri, [hyper_r_ball], target_labels) inout.save_to_file(graph_database_record, output_dir + "r_ball_{0}".format(i)) return nodes_count_distribution, type_distribution
def testHypergraph_subgraph_with_labels(self): dummy_hypergraph = Hypergraph(example_graphs.gt_dummy_graph) subgraph = dummy_hypergraph.subgraph_with_labels(set(["n_1", "n_6", "n_9", "n_10"])) isomorphic = algorithms.isomorphic(example_graphs.gt_dummy_subgraph, subgraph) self.assertTrue(isomorphic, "Incorrect subgraph extraction from hypergraph.")
def testCharacteristicMatrix(self): dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph) rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0) nodes_count = dummy_hypergraph.number_of_nodes() ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0) self.assertEqual(self.raw_ch_matrix_exp, ch_matrix.sparse_matrix, "The computed characteristic matrix is wrong.")
def run_algorithm(graph, return_features=False, compute_string=True): '''Performs the algorithm proposed by Arnborg & Proskurowski on a graph with tree-width at most 3. :param graph: A NetworkX graph or a Hypergraph. :param return_features: (default False) If true, returns the features, which were reduced by the algorithm. :param compute_string: (default True) If True returns the canonical string representation of the graph. False means to perform the reduction rules without computing the canonical string. :return A tuple of the form (tree_width, canonical_string[, reduced_features]). ''' def is_done(hypergraph): if hypergraph.number_of_edges() == 0: return True else: return False def collect_labels(hypergraph): labels = [] for node in hypergraph.nodes_iter(): labels.append(hypergraph.node[node]["labels"][0]) labels.sort() return u",".join(labels) def rule_0(hypergraph, compute_string): modified = False # (originally 1.3) - remove self-loops self_loops = list(hypergraph.self_loops) if len(self_loops) > 0: modified = True if compute_string: for self_loop in self_loops: node = hypergraph.endpoints(self_loop)[0] hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0]) hypergraph.remove_edge(self_loop) else: hypergraph.remove_edges_from(self_loops, unsafe=True) # rule 0.1 if compute_string: nodes_with_more_labels = list(hypergraph.nodes_with_more_labels) if len(nodes_with_more_labels) > 0: modified = True for node in nodes_with_more_labels: labels = hypergraph.node[node]["labels"] labels.sort() new_label = u"(0.1;{0})".format(u",".join(labels)) hypergraph.set_node_labels(node, [new_label]) hypergraph.reset_nodes_with_more_labels() # rule 0.2 parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys()) if len(parallel_edges_groups_keys) > 0: modified = True for key in parallel_edges_groups_keys: edges_group = list(hypergraph.parallel_edges_groups[key]) endpoints = hypergraph.endpoints(edges_group[0]) if compute_string: perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for edge in edges_group: possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm)) possible_label["label"].sort() possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"])) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, minimal_label) else: hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, set(), "") hypergraph.reset_parallel_edges_groups() return modified def rule_1(hypergraph, return_features=False, compute_string=True): modified = False pendant_features = ReducibleFeature.extract_rule_1_features(hypergraph) if return_features: pendant_features = list(pendant_features) affected_nodes = set() for feature in pendant_features: if not modified: modified = True feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, pendant_features if return_features else None def rule_2(hypergraph, return_features=False, compute_string=True): modified = False series_features = ReducibleFeature.extract_rule_2_features(hypergraph) if return_features: series_features = list(series_features) affected_nodes = set() new_edges = set() for feature in series_features: if not modified: modified = True _new_edges = feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) new_edges |= _new_edges hypergraph.update_parallel_edges_groups(new_edges) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, series_features if return_features else None def rule_3(hypergraph): modified = False parallel_hedges_groups_keys = list(hypergraph.parallel_hedges_groups.keys()) if len(parallel_hedges_groups_keys) > 0: modified = True for key in parallel_hedges_groups_keys: hedges_group = hypergraph.parallel_hedges_groups[key] endpoints = hypergraph.endpoints(hedges_group[0]) perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for hedge in hedges_group: possible_label["label"].append(Hypergraph.hedge_to_string(hypergraph, hedge, perm)) possible_label["label"].sort() possible_label["label"] = u",".join(possible_label["label"]) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(hedges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, u"(3;{0})".format(minimal_label)) hypergraph.reset_parallel_hedges_groups() return modified def rules_4_5_6_7(hypergraph, return_features=False, compute_string=True): modified = False degree_3_features = ReducibleFeature.extract_degree_3_features(hypergraph) if return_features: degree_3_features = list(degree_3_features) affected_nodes = set() new_edges = set() for feature in degree_3_features: if not modified: modified = True _new_edges = feature.reduce(hypergraph, compute_string) affected_nodes |= set(feature.reducible_nodes) | set(feature.peripheral_nodes) new_edges |= _new_edges new_hedges = set(filter(lambda edge_id: edge_id.startswith(u"he_"), new_edges)) hypergraph.update_parallel_edges_groups(new_edges - new_hedges) hypergraph.update_parallel_hedges_groups(new_hedges) hypergraph.update_nodes_with_n_neighbors(affected_nodes) return modified, degree_3_features if return_features else None if type(graph) is not Hypergraph: hypergraph = Hypergraph(graph) else: hypergraph = graph.copy() features = [] treewidth = 0 if hypergraph.number_of_nodes() == 0: if return_features: return treewidth, "", features else: return treewidth, "" new_features = [] while True: modified = False if return_features: features += new_features # hypergraph.visualize() # no need to check if modified here to continue, just go to the next rule after rule_0(hypergraph, compute_string) modified, new_features = rule_1(hypergraph, return_features, compute_string) if modified: if treewidth < 1: treewidth = 1 continue modified, new_features = rule_2(hypergraph, return_features, compute_string) if modified: if treewidth < 2: treewidth = 2 continue if compute_string: modified = rule_3(hypergraph) if modified: new_features = [] continue modified, new_features = rules_4_5_6_7(hypergraph, return_features, compute_string) if modified: if treewidth < 3: treewidth = 3 continue else: if is_done(hypergraph): if hypergraph.number_of_nodes() == 0: sys.stderr.write("\n[ArnborgProskurowski] Error: empty graph produced.") if return_features: return treewidth, u"", features else: return treewidth, u"" else: canon_str = collect_labels(hypergraph) if compute_string else u"" if return_features: features += new_features return treewidth, canon_str, features else: return treewidth, canon_str else: if return_features: features += new_features return -1, u"Tree-width > 3", features else: return -1, u"Tree-width > 3"
def rule_0(hypergraph, compute_string): modified = False # (originally 1.3) - remove self-loops self_loops = list(hypergraph.self_loops) if len(self_loops) > 0: modified = True if compute_string: for self_loop in self_loops: node = hypergraph.endpoints(self_loop)[0] hypergraph.add_node_label(node, hypergraph.edge(self_loop)["labels"][0]) hypergraph.remove_edge(self_loop) else: hypergraph.remove_edges_from(self_loops, unsafe=True) # rule 0.1 if compute_string: nodes_with_more_labels = list(hypergraph.nodes_with_more_labels) if len(nodes_with_more_labels) > 0: modified = True for node in nodes_with_more_labels: labels = hypergraph.node[node]["labels"] labels.sort() new_label = u"(0.1;{0})".format(u",".join(labels)) hypergraph.set_node_labels(node, [new_label]) hypergraph.reset_nodes_with_more_labels() # rule 0.2 parallel_edges_groups_keys = list(hypergraph.parallel_edges_groups.keys()) if len(parallel_edges_groups_keys) > 0: modified = True for key in parallel_edges_groups_keys: edges_group = list(hypergraph.parallel_edges_groups[key]) endpoints = hypergraph.endpoints(edges_group[0]) if compute_string: perms = permutations(endpoints) possible_labels = [] for perm in perms: possible_label = {} possible_label["perm"] = perm possible_label["label"] = [] for edge in edges_group: possible_label["label"].append(Hypergraph.edge_to_string(hypergraph, edge, perm)) possible_label["label"].sort() possible_label["label"] = u"(0.2;{0})".format(u",".join(possible_label["label"])) possible_labels.append(possible_label) possible_labels = sorted(possible_labels, key=lambda element: element["label"]) minimal_label = possible_labels[0]["label"] minimal_perm_indices = filter(lambda i: possible_labels[i]["label"] == minimal_label, range(len(possible_labels))) direction = set([possible_labels[i]["perm"] for i in minimal_perm_indices]) hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, direction, minimal_label) else: hypergraph.remove_edges_from(edges_group, unsafe=True) hypergraph.add_edge(endpoints, set(), "") hypergraph.reset_parallel_edges_groups() return modified
dataset = "drugadmin" wl_iter_range = [3] # range(0, 10) k_L_range = [ (20, 1), # inflection point ~0. (15, 5), # inflection point 0.1 (10, 9), # inflection point 0.2 (7, 12), # inflection point 0.3 (5, 13), # inflection point 0.4 (4, 16), # inflection point 0.5 (3, 16), # inflection point 0.6 (2, 11), # inflection point 0.7 (2, 25), # inflection point 0.8 (1, 10), # inflection point 0.9 (1, 20), # inflection point ~1. ] infl_point_range = [0., 0.0000001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.] p_range = [1] r_in_range = [3] # range(0, 4) r_out_range = [2] # range(0, 4) r_all_range = [0] output_dir = "../output_rdf/crossval_test/" if __name__ == '__main__': in_files = helpers.datasets[dataset]["files"] graph, node_id_map = rdf.convert_rdf_to_nx_graph(in_files, discard_classes=False) hypergraph = Hypergraph(graph) best_model = crossval.loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, infl_point_range=infl_point_range) # best_model = crossval.loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, k_L_range=k_L_range) print "Best model:", best_model