def test_core_framework(): """Picklability test for the Core kernel Framework [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 4)) base_graph_kernel = (WeisfeilerLehman, dict(base_graph_kernel=VertexHistogram)) core_framework = CoreFramework(verbose=verbose, normalize=normalize, base_graph_kernel=base_graph_kernel) kernel = [{ "name": "core_framework" }, { "name": "weisfeiler_lehman" }, { "name": "vertex_histogram" }] gk = GraphKernel(kernel=kernel, verbose=verbose, normalize=normalize) core_framework.fit(train) assert is_picklable(core_framework) gk.fit(train) assert is_picklable(gk)
def test_graphlet_sampling(): """Picklability test for the Graphlet Sampling Kernel [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) gs_kernel = GraphletSampling(verbose=verbose, normalize=normalize, sampling=dict(n_samples=50)) gk = GraphKernel(kernel={ "name": "graphlet_sampling", "sampling": { "n_samples": 50 } }, verbose=verbose, normalize=normalize) gs_kernel.fit(train) assert is_picklable(gs_kernel) gk.fit(train) assert is_picklable(gk)
def test_pyramid_match(): """Picklability test for the Pyramid Match kernel [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 3)) pm_kernel = PyramidMatch(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel={"name": "pyramid_match"}, verbose=verbose, normalize=normalize) pm_kernel.fit(train) assert is_picklable(pm_kernel) gk.fit(train) assert is_picklable(gk)
def test_edge_histogram(): """Picklability test for the Edge Histogram kernel [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('el', 4)) eh_kernel = EdgeHistogram(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel={"name": "edge_histogram"}, verbose=verbose, normalize=normalize) eh_kernel.fit(train) assert is_picklable(eh_kernel) gk.fit(train) assert is_picklable(gk)
def test_odd_sth(): """Picklability test for the ODD-STh kernel [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(10, 20), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 4)) odd_sth_kernel = OddSth(verbose=verbose, normalize=normalize) gk = GraphKernel(kernel={"name": "odd_sth"}, verbose=verbose, normalize=normalize) odd_sth_kernel.fit(train) assert is_picklable(odd_sth_kernel) gk.fit(train) assert is_picklable(gk)
def test_neighborhood_subgraph_pairwise_distance(): """Picklability test for the Neighborhood Subgraph Pairwise Distance kernel [+ generic-wrapper].""" train, _ = generate_dataset(n_graphs=100, r_vertices=(5, 10), r_connectivity=(0.4, 0.8), r_weight_edges=(1, 1), n_graphs_test=40, random_state=rs, features=('nl', 5, 'el', 4)) nspd_kernel = NeighborhoodSubgraphPairwiseDistance(verbose=verbose, normalize=normalize) gk = GraphKernel( kernel={"name": "neighborhood_subgraph_pairwise_distance"}, verbose=verbose, normalize=normalize) nspd_kernel.fit(train) assert is_picklable(nspd_kernel) gk.fit(train) assert is_picklable(gk)
def untangle(graph, k_hop, with_data: bool = True, with_call: bool = True, with_name: bool = True): seeds, list_of_graphs = deltaPDG_to_list_of_Graphs(graph, khop_k=k_hop) wl_subtree = GraphKernel(kernel=[{ "name": "weisfeiler_lehman", "n_iter": 10 }, { "name": "subtree_wl" }], normalize=True) if len(list_of_graphs) > 0: similarities = defaultdict(lambda: (0, 0.0)) for g1, g2 in itertools.combinations(list_of_graphs, 2): # The graph has to be converted to {Graph, Node_Labels, Edge_Labels} wl_subtree.fit( [graph_to_grakel(g1, with_data, with_call, with_name)]) similarity = wl_subtree.transform( [graph_to_grakel(g2, with_data, with_call, with_name)])[0][0] similarities[(list_of_graphs.index(g1), list_of_graphs.index(g2))] = similarity n = len(list_of_graphs) affinity = np.zeros(shape=(scipy.special.comb(n, 2, exact=True), )) args = list(enumerate(itertools.combinations(range(n), 2))) with ThreadPool(processes=min(os.cpu_count() - 1, 1)) as wp: for k, value in wp.imap_unordered( lambda i: (i[0], similarities[(i[-1][0], i[-1][1])]), args): affinity[k] += (1 - value ) # affinity is distance! so (1 - sim) cluster = AgglomerativeClustering(n_clusters=None, distance_threshold=0.5, affinity='precomputed', linkage='complete') if len(affinity) < 2: if len(affinity) == 1: labels = np.asarray( [0, 0]) if affinity[0] <= 0.5 else np.asarray([0, 1]) else: labels = np.asarray([0]) else: labels = cluster.fit_predict( scipy.spatial.distance.squareform(affinity)) else: labels = None label = list() for node, data in graph.nodes(data=True): if 'color' in data.keys(): i = seeds.index(node) if node in seeds else -1 if labels is not None and i != -1: data['label'] = '%d: ' % labels[i] + data['label'] label.append(labels[i]) graph.add_node(node, **data) else: data['label'] = '-1: ' + data['label'] label.append(-1) graph.add_node(node, **data) return graph
def worker(work): for graph_location in tqdm(work, leave=False): chain = os.path.basename( os.path.dirname(os.path.dirname(graph_location))) q = int(os.path.basename(os.path.dirname(graph_location))) graph = obj_dict_to_networkx(read_graph_from_dot(graph_location)) graph = remove_all_except(graph, edges_kept) if len(graph.nodes) == 0: continue t0 = time.perf_counter() for i in range(times): seeds, list_of_graphs = deltaPDG_to_list_of_Graphs( graph, khop_k=k_hop) wl_subtree = GraphKernel(kernel=[{ "name": "weisfeiler_lehman", "n_iter": 10 }, { "name": "subtree_wl" }], normalize=True) if len(list_of_graphs) > 0: similarities = defaultdict(lambda: (0, 0.0)) for g1, g2 in itertools.combinations(list_of_graphs, 2): # The graph has to be converted to {Graph, Node_Labels, Edge_Labels} wl_subtree.fit([ graph_to_grakel(g1, with_data, with_call, with_name) ]) similarity = wl_subtree.transform([ graph_to_grakel(g2, with_data, with_call, with_name) ])[0][0] similarities[(list_of_graphs.index(g1), list_of_graphs.index(g2))] = similarity n = len(list_of_graphs) affinity = np.zeros( shape=(scipy.special.comb(n, 2, exact=True), )) args = list(enumerate(itertools.combinations(range(n), 2))) with ThreadPool(processes=min(os.cpu_count() - 1, 1)) as wp: for k, value in wp.imap_unordered( lambda i: (i[0], similarities[ (i[-1][0], i[-1][1])]), args): affinity[k] += ( 1 - value ) # affinity is distance! so (1 - sim) cluster = AgglomerativeClustering(n_clusters=None, distance_threshold=0.5, affinity='precomputed', linkage='complete') if len(affinity) < 2: if len(affinity) == 1: labels = np.asarray([ 0, 0 ]) if affinity[0] <= 0.5 else np.asarray([0, 1]) else: labels = np.asarray([0]) else: labels = cluster.fit_predict( scipy.spatial.distance.squareform(affinity)) else: labels = None t1 = time.perf_counter() time_ = (t1 - t0) / times truth = list() label = list() for node, data in graph.nodes(data=True): if 'color' in data.keys(): if 'community' in data.keys(): truth.append(int(data['community'])) i = seeds.index(node) if node in seeds else -1 if labels is not None and i != -1: data['label'] = '%d: ' % labels[i] + data['label'] label.append(labels[i]) graph.add_node(node, **data) else: data['label'] = '-1: ' + data['label'] label.append(-1) graph.add_node(node, **data) nx.drawing.nx_pydot.write_dot( graph, graph_location[:-4] + '_output_wl_%d.dot' % k_hop) truth = np.asarray(truth) label = np.asarray(label) acc, overlap = evaluate(truth[label > -1], label[label > -1], q=1 if len(label) == 0 else np.max(label) + 1) with open( './out/%s/wl_%s_%d_results_%s.csv' % (repository_name, edges_kept, k_hop, suffix), 'a') as f: f.write(chain + ',' + str(q) + ',' + str(acc) + ',' + str(overlap) + ',' + str(time_) + '\n')