def convert_edge_to_directed(self): """ Each column of edge_index (u, v) represents an directed edge from u to v. Note that it does not cover the edge from v to u. You should provide (v, u) to cover it. This is not convenient for users. Thus, we allow users to provide edge_index in undirected form and convert it later. That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method. :return: """ self.edge_index, [self.edge_weight, self.edge_graph_index] = \ convert_edge_to_directed(self.edge_index, [self.edge_weight, self.edge_graph_index]) return self
def process(self): dataset_str = "cora" names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): data_name = "ind.{}.{}".format(dataset_str, names[i]) data_path = os.path.join(self.raw_root_path, data_name) with open(data_path, 'rb') as f: if sys.version_info > (3, 0): objects.append(pickle.load(f, encoding='latin1')) else: objects.append(pickle.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) with open(os.path.join(self.raw_root_path, "ind.{}.test.index".format(dataset_str)), "r", encoding="utf-8") as f: test_idx_reorder = [int(line.strip()) for line in f] test_idx_range = np.sort(test_idx_reorder) features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] test_index = test_idx_range.tolist() train_index = list(range(len(y))) valid_index = list(range(len(y), len(y) + 500)) x = np.array(features.todense()).astype(np.float32) inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True) inv_sum_x[np.isnan(inv_sum_x)] = 1.0 inv_sum_x[np.isinf(inv_sum_x)] = 1.0 x *= inv_sum_x edge_index = np.array(nx.from_dict_of_lists(graph).edges).T edge_index, _ = remove_self_loop_edge(edge_index) edge_index, _ = convert_edge_to_directed(edge_index) y = np.argmax(labels, axis=-1).astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=y) return graph, (train_index, valid_index, test_index)
def process(self): dataset = NodePropPredDataset(name=self.dataset_name, root=self.download_root_path) graph, label = dataset[0] # graph: library-agnostic graph object x = graph["node_feat"] edge_index = graph["edge_index"] # convert edge_index to directed edge_index, _ = convert_edge_to_directed(edge_index, None) label = label.flatten().astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=label) split_index = dataset.get_idx_split() train_index, valid_index, test_index = split_index["train"], split_index["valid"], split_index["test"] return graph, (train_index, valid_index, test_index)
def process(self): splits = ["train", "valid", "test"] split_data_dict = {split: [] for split in splits} for split in split_data_dict.keys(): split_graph_ids = np.load( os.path.join(self.raw_root_path, "{}_graph_id.npy".format(split))) split_features = np.load( os.path.join(self.raw_root_path, "{}_feats.npy".format(split))).astype(np.float32) split_labels = np.load( os.path.join(self.raw_root_path, "{}_labels.npy".format(split))).astype(np.int32) nx_graph_path = os.path.join(self.raw_root_path, "{}_graph.json".format(split)) with open(nx_graph_path, "r", encoding="utf-8") as f: nx_graph = nx.DiGraph( nx.json_graph.node_link_graph(json.load(f))) split_unique_graph_ids = sorted(set(split_graph_ids)) for graph_id in split_unique_graph_ids: mask_indices = np.where(split_graph_ids == graph_id)[0] min_node_index = np.min(mask_indices) edge_index = nx_graph.subgraph(mask_indices).edges edge_index = np.array(edge_index).T - min_node_index edge_index, _ = convert_edge_to_directed(edge_index, edge_weight=None) graph = Graph(x=split_features[mask_indices], edge_index=edge_index, y=split_labels[mask_indices]) split_data_dict[split].append(graph) # print("split: ", split) processed_data = [split_data_dict[split] for split in splits] return processed_data
# Node Features => (num_nodes, num_features) x = np.random.randn(5, 20).astype(np.float32) # 5 nodes, 20 features # Edge Index => (2, num_edges) # Each column of edge_index (u, v) represents an directed edge from u to v. # Note that it does not cover the edge from v to u. You should provide (v, u) to cover it. # This is not convenient for users. # Thus, we allow users to provide edge_index in undirected form and convert it later. # That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method. edge_index = np.array([[0, 0, 1, 3], [1, 2, 2, 1]]) # Edge Weight => (num_edges) edge_weight = np.array([0.9, 0.8, 0.1, 0.2]).astype(np.float32) # Make the edge_index directed such that we can use it as the input of GCN edge_index, [edge_weight] = convert_edge_to_directed(edge_index, [edge_weight]) # We can convert these numpy array as TensorFlow Tensors and pass them to gnn functions outputs = tfg.nn.gcn( tf.Variable(x), tf.constant(edge_index), tf.constant(edge_weight), tf.Variable(tf.random.truncated_normal([20, 2])) # GCN Weight ) print(outputs) # Usually, we use a graph object to manager these information # edge_weight is optional, we can set it to None if you don't need it graph = tfg.Graph(x=x, edge_index=edge_index, edge_weight=edge_weight) # You can easily convert these numpy arrays as Tensors with the Graph Object API
# Edge Index => (2, num_edges) # Each column of edge_index (u, v) represents an directed edge from u to v. # Note that it does not cover the edge from v to u. You should provide (v, u) to cover it. # This is not convenient for users. # Thus, we allow users to provide edge_index in undirected form and convert it later. # That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method. edge_index = np.array([ [0, 0, 1, 3], [1, 2, 2, 1] ]) # Edge Weight => (num_edges) edge_weight = np.array([0.9, 0.8, 0.1, 0.2]).astype(np.float32) # Make the edge_index directed such that we can use it as the input of GCN edge_index, edge_weight = convert_edge_to_directed(edge_index, edge_weight=edge_weight) # We can convert these numpy array as TensorFlow Tensors and pass them to gnn functions outputs = tfg.nn.gcn( tf.Variable(x), tf.constant(edge_index), tf.constant(edge_weight), tf.Variable(tf.random.truncated_normal([20, 2])) # GCN Weight ) print(outputs) # Usually, we use a graph object to manager these information # edge_weight is optional, we can set it to None if you don't need it graph = tfg.Graph(x=x, edge_index=edge_index, edge_weight=edge_weight)
def process(self): dataset_str = self.dataset_name names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): data_name = "ind.{}.{}".format(dataset_str, names[i]) data_path = os.path.join(self.raw_root_path, data_name) with open(data_path, 'rb') as f: if sys.version_info > (3, 0): objects.append(pickle.load(f, encoding='latin1')) else: objects.append(pickle.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) with open(os.path.join(self.raw_root_path, "ind.{}.test.index".format(dataset_str)), "r", encoding="utf-8") as f: test_idx_reorder = [int(line.strip()) for line in f] test_idx_range = np.sort(test_idx_reorder) if self.dataset_name == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = list( range(min(test_idx_reorder), max(test_idx_reorder) + 1)) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] test_index = test_idx_range.tolist() if self.task == "semi_supervised": train_index = list(range(len(y))) valid_index = list(range(len(y), len(y) + 500)) else: train_index = range(len(ally) - 500) valid_index = range(len(ally) - 500, len(ally)) x = np.array(features.todense()).astype(np.float32) inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True) inv_sum_x[np.isnan(inv_sum_x)] = 1.0 inv_sum_x[np.isinf(inv_sum_x)] = 1.0 x *= inv_sum_x edge_index = np.array(nx.from_dict_of_lists(graph).edges).T edge_index, _ = remove_self_loop_edge(edge_index) edge_index, _ = convert_edge_to_directed(edge_index) y = np.argmax(labels, axis=-1).astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=y) return graph, (train_index, valid_index, test_index)
def convert_edge_to_directed(self): self.edge_index, self.edge_weight = convert_edge_to_directed(self.edge_index, self.edge_weight) return self