def __init__(self, edge_index, edge_weight=None): self.edge_index = convert_union_to_numpy(edge_index, np.int32) if edge_weight is not None: self.edge_weight = convert_union_to_numpy(edge_weight) else: self.edge_weight = np.ones([self.edge_index.shape[1]], dtype=np.float32) self.neighbor_dict = {} for (a, b), weight in zip(self.edge_index.T, self.edge_weight): if a not in self.neighbor_dict: neighbors = [] self.neighbor_dict[a] = neighbors else: neighbors = self.neighbor_dict[a] neighbors.append((b, weight)) self.num_sources = len(self.neighbor_dict) self.source_index = sorted(self.neighbor_dict.keys()) self.neighbors_list = [ self.neighbor_dict[a] for a in self.source_index ] self.num_neighbors_list = np.array( [len(neighbors) for neighbors in self.neighbors_list]) self.neighbor_index_list = [ np.arange(num_neighbors) for num_neighbors in self.num_neighbors_list ]
def extract_unique_edge(edge_index, edge_weight=None, mode="undirected"): is_edge_index_tensor = tf.is_tensor(edge_index) is_edge_weight_tensor = tf.is_tensor(edge_weight) edge_index = convert_union_to_numpy(edge_index, dtype=np.int32) edge_weight = convert_union_to_numpy(edge_weight, dtype=np.float32) edge_set = set() unique_edge_index = [] for i in range(edge_index.shape[1]): edge = edge_index[:, i] if mode == "undirected": edge = sorted(edge) edge = tuple(edge) if edge in edge_set: continue else: unique_edge_index.append(i) edge_set.add(edge) edge_index = edge_index[:, unique_edge_index] if is_edge_index_tensor: edge_index = tf.convert_to_tensor(edge_index) if edge_weight is not None: edge_weight = edge_weight[unique_edge_index] if is_edge_weight_tensor: edge_weight = tf.convert_to_tensor(edge_weight) return edge_index, edge_weight
def edge_train_test_split(edge_index, test_size, edge_weight=None, mode="undirected", **kwargs): """ :param edge_index: :param test_size: :param edge_weight: :param mode: :return: """ # todo: warn user if they pass into "num_nodes", deprecated if "num_nodes" in kwargs: warnings.warn( "argument \"num_nodes\" is deprecated for the method \"edge_train_test_split\", you can remove it" ) if mode == "undirected": is_edge_index_tensor = tf.is_tensor(edge_index) is_edge_weight_tensor = tf.is_tensor(edge_weight) edge_index = convert_union_to_numpy(edge_index, dtype=np.int32) edge_weight = convert_union_to_numpy(edge_weight, dtype=np.float32) upper_edge_index, [upper_edge_weight ] = convert_edge_to_upper(edge_index, [edge_weight]) num_unique_edges = upper_edge_index.shape[1] train_indices, test_indices = train_test_split(list( range(num_unique_edges)), test_size=test_size) undirected_train_edge_index = upper_edge_index[:, train_indices] undirected_test_edge_index = upper_edge_index[:, test_indices] if is_edge_index_tensor: undirected_train_edge_index = tf.convert_to_tensor( undirected_train_edge_index) undirected_test_edge_index = tf.convert_to_tensor( undirected_test_edge_index) if edge_weight is not None: undirected_train_edge_weight = upper_edge_weight[train_indices] undirected_test_edge_weight = upper_edge_weight[test_indices] if is_edge_weight_tensor: undirected_train_edge_weight = tf.convert_to_tensor( undirected_train_edge_weight) undirected_test_edge_weight = tf.convert_to_tensor( undirected_test_edge_weight) else: undirected_train_edge_weight = None undirected_test_edge_weight = None return undirected_train_edge_index, undirected_test_edge_index, undirected_train_edge_weight, undirected_test_edge_weight else: raise NotImplementedError()
def __init__(self, x, edge_index, edge_weight, is_undirected=True): self.num_nodes = x.shape[0] self.edge_index = convert_union_to_numpy(edge_index, np.int32) if edge_weight is not None: self.edge_weight = convert_union_to_numpy(edge_weight) else: self.edge_weight = np.ones([self.edge_index.shape[1]], dtype=np.float32) self.is_undirected = is_undirected
def edge_train_test_split(edge_index, num_nodes, test_size, edge_weight=None, mode="undirected"): """ :param edge_index: :param num_nodes: :param test_size: :param edge_weight: :param mode: :return: """ if mode == "undirected": is_edge_index_tensor = tf.is_tensor(edge_index) is_edge_weight_tensor = tf.is_tensor(edge_weight) edge_index = convert_union_to_numpy(edge_index, dtype=np.int32) edge_weight = convert_union_to_numpy(edge_weight, dtype=np.float32) upper_edge_index, upper_edge_weight = convert_edge_to_upper( edge_index, edge_weight) num_unique_edges = upper_edge_index.shape[1] train_indices, test_indices = train_test_split(list( range(num_unique_edges)), test_size=test_size) undirected_train_edge_index = upper_edge_index[:, train_indices] undirected_test_edge_index = upper_edge_index[:, test_indices] if is_edge_index_tensor: undirected_train_edge_index = tf.convert_to_tensor( undirected_train_edge_index) undirected_test_edge_index = tf.convert_to_tensor( undirected_test_edge_index) if edge_weight is not None: undirected_train_edge_weight = upper_edge_weight[train_indices] undirected_test_edge_weight = upper_edge_weight[test_indices] if is_edge_weight_tensor: undirected_train_edge_weight = tf.convert_to_tensor( undirected_train_edge_weight) undirected_test_edge_weight = tf.convert_to_tensor( undirected_test_edge_weight) else: undirected_train_edge_weight = None undirected_test_edge_weight = None return undirected_train_edge_index, undirected_test_edge_index, undirected_train_edge_weight, undirected_test_edge_weight else: raise NotImplementedError()
def convert_edge_to_nx_graph(edge_index, edge_weight=None, convert_to_directed=False): edge_index = convert_union_to_numpy(edge_index, dtype=np.int32) edge_weight = convert_union_to_numpy(edge_weight, dtype=np.float32) g = nx.Graph() for i in range(edge_index.shape[1]): g.add_edge(edge_index[0, i], edge_index[1, i], w=edge_weight[i] if edge_weight is not None else None) if convert_to_directed: g = g.to_directed() return g
def sample_by_edge_mask(data): if data is not None: data_is_tensor = tf.is_tensor(data) data = convert_union_to_numpy(data) data = data[edge_mask] if data_is_tensor: data = tf.convert_to_tensor(data) return data
def negative_sampling(num_samples, num_nodes, edge_index=None, replace=True, mode="undirected", batch_size=None): """ :param num_samples: :param num_nodes: :param edge_index: if edge_index is provided, sampled positive edges will be filtered :param replace: only works when edge_index is provided, deciding whether sampled edges should be unique :param if batch_size is None, return edge_index, otherwise return a list of batch_size edge_index :return: """ edge_index = convert_union_to_numpy(edge_index, np.int32) fake_batch_size = 1 if batch_size is None else batch_size if edge_index is None: sampled_edge_index_list = [ np.random.randint(0, num_nodes, [2, num_samples]).astype(np.int32) for _ in range(fake_batch_size) ] else: if mode == "undirected": # fast edge_index, _ = convert_edge_to_upper(edge_index) adj = np.ones([num_nodes, num_nodes]) # np.fill_diagonal(adj, 0) adj = np.triu(adj, k=1) adj[edge_index[0], edge_index[1]] = 0 neg_edges = np.nonzero(adj) neg_edge_index = np.stack(neg_edges, axis=0) sampled_edge_index_list = [] for _ in range(fake_batch_size): random_indices = np.random.choice(list( range(neg_edge_index.shape[1])), num_samples, replace=replace) sampled_edge_index = neg_edge_index[:, random_indices].astype( np.int32) sampled_edge_index_list.append(sampled_edge_index) else: raise NotImplementedError() if tf.is_tensor(edge_index): sampled_edge_index_list = [ tf.convert_to_tensor(sampled_edge_index) for sampled_edge_index in sampled_edge_index_list ] if batch_size is None: return sampled_edge_index_list[0] else: return sampled_edge_index_list
def convert_edge_to_nx_graph(edge_index, edge_properties=[], convert_to_directed=False): edge_index = convert_union_to_numpy(edge_index, dtype=np.int32) edge_properties = [convert_union_to_numpy(edge_property) for edge_property in edge_properties] g = nx.Graph() for i in range(edge_index.shape[1]): property_dict = { } for j, edge_property in enumerate(edge_properties): if edge_property is not None: property_dict["p_{}".format(j)] = edge_property[i] g.add_edge(edge_index[0, i], edge_index[1, i], **property_dict) if convert_to_directed: g = g.to_directed() return g
def compute_edge_mask_by_node_index(edge_index, node_index): edge_index_is_tensor = tf.is_tensor(edge_index) node_index = convert_union_to_numpy(node_index) edge_index = convert_union_to_numpy(edge_index) max_node_index = np.maximum(np.max(edge_index), np.max(node_index)) node_mask = np.zeros([max_node_index + 1]).astype(np.bool) node_mask[node_index] = True row, col = edge_index row_mask = node_mask[row] col_mask = node_mask[col] edge_mask = np.logical_and(row_mask, col_mask) if edge_index_is_tensor: edge_mask = tf.convert_to_tensor(edge_mask, dtype=tf.bool) return edge_mask
def negative_sampling_with_start_node(start_node_index, num_nodes, edge_index=None): """ :param start_node_index: Tensor or ndarray :param num_nodes: :param edge_index: if edge_index is provided, sampled positive edges will be filtered :return: """ start_node_index_is_tensor = tf.is_tensor(start_node_index) start_node_index = convert_union_to_numpy(start_node_index, dtype=np.int32) edge_index = convert_union_to_numpy(edge_index, np.int32) num_samples = len(start_node_index) if edge_index is None: end_node_index = np.random.randint(0, num_nodes, [num_samples]).astype(np.int32) sampled_edge_index = np.stack([start_node_index, end_node_index], axis=0) else: edge_set = set([tuple(edge) for edge in edge_index.T]) sampled_edges = [] for a in start_node_index: while True: b = np.random.randint(0, num_nodes, dtype=np.int32) if a == b: continue edge = (a, b) if edge not in edge_set: sampled_edges.append(edge) break sampled_edge_index = np.array(sampled_edges, dtype=np.int32).T if start_node_index_is_tensor: sampled_edge_index = tf.convert_to_tensor(sampled_edge_index) return sampled_edge_index
def sample_common_data(data): if data is not None: data_is_tensor = tf.is_tensor(data) if data_is_tensor: data = tf.gather(data, sampled_node_index) else: data = convert_union_to_numpy(data) data = data[sampled_node_index] if data_is_tensor: data = tf.convert_to_tensor(data) return data
def sample_new_graph_by_node_index(self, sampled_node_index): """ :param sampled_node_index: Tensor/NDArray, shape: [num_sampled_nodes] :return: A new cloned graph where nodes that are not in sampled_node_index are removed, as well as the associated information, such as edges. """ is_batch_graph = isinstance(self, BatchGraph) x = self.x edge_index = self.edge_index y = self.y edge_weight = self.edge_weight if is_batch_graph: node_graph_index = self.node_graph_index edge_graph_index = self.edge_graph_index def sample_common_data(data): if data is not None: data_is_tensor = tf.is_tensor(data) if data_is_tensor: data = tf.gather(data, sampled_node_index) else: data = convert_union_to_numpy(data) data = data[sampled_node_index] if data_is_tensor: data = tf.convert_to_tensor(data) return data x = sample_common_data(x) y = sample_common_data(y) if is_batch_graph: node_graph_index = sample_common_data(node_graph_index) if edge_index is not None: sampled_node_index = convert_union_to_numpy(sampled_node_index) edge_index_is_tensor = tf.is_tensor(edge_index) edge_index = convert_union_to_numpy(edge_index) edge_mask = compute_edge_mask_by_node_index( edge_index, sampled_node_index) edge_index = edge_index[:, edge_mask] row, col = edge_index max_sampled_node_index = np.max(sampled_node_index) + 1 new_node_range = list(range(len(sampled_node_index))) reverse_index = np.full([max_sampled_node_index + 1], -1, dtype=np.int32) reverse_index[sampled_node_index] = new_node_range row = reverse_index[row] col = reverse_index[col] edge_index = np.stack([row, col], axis=0) if edge_index_is_tensor: edge_index = tf.convert_to_tensor(edge_index) def sample_by_edge_mask(data): if data is not None: data_is_tensor = tf.is_tensor(data) data = convert_union_to_numpy(data) data = data[edge_mask] if data_is_tensor: data = tf.convert_to_tensor(data) return data edge_weight = sample_by_edge_mask(edge_weight) if is_batch_graph: edge_graph_index = sample_by_edge_mask(edge_graph_index) if is_batch_graph: return BatchGraph(x=x, edge_index=edge_index, node_graph_index=node_graph_index, edge_graph_index=edge_graph_index, y=y, edge_weight=edge_weight) else: return Graph(x=x, edge_index=edge_index, y=y, edge_weight=edge_weight)
def sample_new_graph_by_node_index(self, sampled_node_index): is_batch_graph = isinstance(self, BatchGraph) x = self.x edge_index = self.edge_index y = self.y edge_weight = self.edge_weight if is_batch_graph: node_graph_index = self.node_graph_index edge_graph_index = self.edge_graph_index def sample_common_data(data): if data is not None: data_is_tensor = tf.is_tensor(data) if data_is_tensor: data = tf.gather(data, sampled_node_index) else: data = convert_union_to_numpy(data) data = data[sampled_node_index] if data_is_tensor: data = tf.convert_to_tensor(data) return data x = sample_common_data(x) y = sample_common_data(y) if is_batch_graph: node_graph_index = sample_common_data(node_graph_index) if edge_index is not None: sampled_node_index = convert_union_to_numpy(sampled_node_index) edge_index_is_tensor = tf.is_tensor(edge_index) edge_index = convert_union_to_numpy(edge_index) edge_mask = compute_edge_mask_by_node_index(edge_index, sampled_node_index) edge_index = edge_index[:, edge_mask] row, col = edge_index max_sampled_node_index = np.max(sampled_node_index) + 1 new_node_range = list(range(len(sampled_node_index))) reverse_index = np.full([max_sampled_node_index + 1], -1, dtype=np.int32) reverse_index[sampled_node_index] = new_node_range row = reverse_index[row] col = reverse_index[col] edge_index = np.stack([row, col], axis=0) if edge_index_is_tensor: edge_index = tf.convert_to_tensor(edge_index) def sample_by_edge_mask(data): if data is not None: data_is_tensor = tf.is_tensor(data) data = convert_union_to_numpy(data) data = data[edge_mask] if data_is_tensor: data = tf.convert_to_tensor(data) return data edge_weight = sample_by_edge_mask(edge_weight) if is_batch_graph: edge_graph_index = sample_by_edge_mask(edge_graph_index) if is_batch_graph: return BatchGraph(x=x, edge_index=edge_index, node_graph_index=node_graph_index, edge_graph_index=edge_graph_index, y=y, edge_weight=edge_weight)