############ # Step 1 # Run the decomposition algorithm to get anonymous walk patterns across the graphs of MUTAG ############ graph_files = sorted(utils.get_files(corpus_data_dir, ".gexf", max_files=0)) corpus, vocabulary, prob_map, num_graphs, graph_map = awe_corpus( corpus_data_dir, awe_length=10, label_setting='nodes', neighborhood_size=10) ############ # Step 2 # Compute the kernel and use a kernel method to perform classification ############ # Simple MLE Kernel which does not use substructure embeddings vocab_size = len(vocabulary) vocabulary = list(sorted(vocabulary)) P = np.zeros((num_graphs, vocab_size)) for i in range(num_graphs): for jdx, j in enumerate(vocabulary): P[i][jdx] = prob_map[i + 1].get(j, 0) K = P.dot(P.T) class_labels_fname = "data/" + dataset + ".Labels" xylabels = utils.get_class_labels_tuples(graph_files, class_labels_fname) xylabels.sort(key=lambda tup: tup[0]) kernel_row_x_id, kernel_row_y_id = zip(*xylabels) acc, std = cross_val_accuracy_rbf_bag_of_words(P, kernel_row_y_id) print('#... Accuracy score: %0.4f, Standard deviation: %0.4f' % (acc, std))
adj_matrix = nx.to_numpy_matrix(graph) return graph, adj_matrix dataset = "MUTAG" path_to_gexf_data = "data/" graph_class_labels_fh = path_to_gexf_data + dataset + ".Labels" dataset_path = path_to_gexf_data + dataset # Yanardag style dataset data = {} labels = [] graph_files = {} graph_files = utils.get_files(dataset_path, extension=".gexf", max_files=0) label_tuples = utils.get_class_labels_tuples(graph_files, graph_class_labels_fh) graph_classes = np.array( [y for z, y in sorted(label_tuples, key=lambda x: x[0])]) data['labels'] = graph_classes gf = graph_files[0] graph_data = {} for gf in graph_files: gindex = int(os.path.basename(gf).split(".")[0]) - 1 nx_graph, adj_matrix = load_graph(gf) graph_data[gindex] = {} for node_string in nx_graph.nodes(): node_label = int(nx_graph.nodes[node_string]['Label']) node_id = int(node_string)