def prepare_test_adj(self, input_graph, ground_truth_adj): coords, values, shape = sparse_to_tuple(input_graph) ground_truth_adj = (ground_truth_adj[:input_graph.shape[0], :input_graph.shape[1]]).todense() for coord in coords: ground_truth_adj[coord[0], coord[1]] = 0. ground_truth_adj[coord[1], coord[0]] = 0. return sps.triu(sps.csr_matrix(ground_truth_adj, dtype=float))
def construct_dataset(self, graph, window_size, negative_sample): start_graph = max(0, graph - window_size + 1) max_id = 0 for i in range(start_graph, graph + 1): adj = self.graph_loader.read_adjacency(i, max_id) max_id = adj.shape[0] - 1 train_adj_sps = [] total_train_edges = np.zeros((max_id + 1, max_id + 1)) for i in range(start_graph, graph + 1): adj = self.graph_loader.read_adjacency(i, max_id) tmp_train_adj_dense = adj.todense() tmp_train_adj_dense = np.where(tmp_train_adj_dense > 0.2, tmp_train_adj_dense, 0) tmp_train_adj_sparse = sps.csr_matrix(tmp_train_adj_dense) coords, values, shape = sparse_to_tuple(tmp_train_adj_sparse) for coord in coords: total_train_edges[coord[0], coord[1]] = 1 train_adj_sps.append(tmp_train_adj_sparse) # Construct a full matrix with ones to generate negative sample tuples train_ns = np.ones_like( total_train_edges) - total_train_edges - sps.identity( total_train_edges.shape[0]) ns_coord, ns_values, ns_shape = sparse_to_tuple(train_ns) train_adj_norm = [] features = [] train_adj_labels = [] train_adj_inds = [] features_tuples = sparse_to_tuple( sps.identity(adj.shape[0], dtype=np.float32, format='coo')) for i, adj in enumerate(train_adj_sps): adj_norm_coord, adj_norm_values, adj_norm_shape = sparse_to_tuple( self.normalize(adj)) train_adj_norm.append( tf.SparseTensor( indices=adj_norm_coord, values=np.array(adj_norm_values, dtype='float32'), dense_shape=[adj_norm_shape[0], adj_norm_shape[1]])) features.append( tf.SparseTensor( indices=features_tuples[0], values=features_tuples[1], dense_shape=[features_tuples[2][0], features_tuples[2][1]])) tmp_train_adj_dense = adj.todense() train_coord, train_values, train_shape = sparse_to_tuple(adj) tmp_train_adj_ind = np.zeros_like(tmp_train_adj_dense) sequence = [i for i in range(len(ns_coord))] random_coords = set( random.sample(sequence, negative_sample * len(train_coord))) for coord in train_coord: tmp_train_adj_ind[coord[0], coord[1]] = 1 for coord in random_coords: tmp_train_adj_ind[ns_coord[coord][0], ns_coord[coord][1]] = 1 nnz_ind = np.nonzero(tmp_train_adj_ind) tmp_train_label_val = tmp_train_adj_dense[nnz_ind] train_adj_label_tensor = tf.convert_to_tensor(tmp_train_label_val, dtype=tf.float32) train_adj_labels.append(train_adj_label_tensor) ind_list = [] for i in range(len(nnz_ind[0])): ind_list.append([nnz_ind[0][i], nnz_ind[1][i]]) train_adj_inds.append( tf.convert_to_tensor(ind_list, dtype=tf.int32)) test_adj_dense = self.prepare_test_adj( sps.csr_matrix(total_train_edges), self.graph_loader.read_adjacency(graph + 1, max_id)).todense() test_adj_high = np.where(test_adj_dense > 0.2, test_adj_dense, 0) test_adj_ind = np.where(test_adj_high > 0., 1, 0) nnz_ind = np.nonzero(test_adj_ind) ind_list = [] for i in range(len(nnz_ind[0])): ind_list.append([nnz_ind[0][i], nnz_ind[1][i]]) test_adj = tf.convert_to_tensor(test_adj_high[nnz_ind], dtype=tf.float32) test_adj_ind = tf.convert_to_tensor(ind_list, dtype=tf.int32) return train_adj_norm, train_adj_labels, train_adj_inds, features, test_adj, test_adj_ind
def construct_dataset(self, graph, window_size, negative_sample): start_graph = max(0, graph - window_size) max_id = 0 for i in range(start_graph, graph + 1): adj = self.graph_loader.read_adjacency(i, max_id) max_id = adj.shape[0] - 1 train_adj = [] total_train_edges = np.zeros((max_id + 1, max_id + 1)) for i in range(start_graph, graph + 1): adj = self.graph_loader.read_adjacency(i, max_id) train_adj_dense = adj.todense() train_adj_dense = np.where(train_adj_dense > 0.2, train_adj_dense, 0) train_adj_sparse = sps.csr_matrix(train_adj_dense) coords, values, shape = sparse_to_tuple(train_adj_sparse) for coord in coords: total_train_edges[coord[0], coord[1]] = 1 train_adj.append(train_adj_sparse) train_adj_norm = [] features = [] for i, adj in enumerate(train_adj): train_adj_norm.append( sparse_mx_to_torch_sparse_tensor( self.normalize(adj)).to(device=self.device)) features.append( torch.tensor( sps.identity(adj.shape[0], dtype=np.float32, format='coo').todense(), dtype=torch.float32).to_sparse().to(device=self.device)) # Generate the train_adj_label with negative sampling if i == len(train_adj) - 1: # Construct a full matrix with ones to generate negative sample tuples train_ns = np.ones_like( adj.todense()) - total_train_edges - sps.identity( total_train_edges.shape[0]) ns_coord, ns_values, ns_shape = sparse_to_tuple(train_ns) train_coord, train_values, train_shape = sparse_to_tuple(adj) train_label_ind = np.zeros_like(adj.todense()) for coord in train_coord: train_label_ind[coord[0], coord[1]] = 1 sequence = [i for i in range(len(ns_coord))] random_coords = set( random.sample(sequence, negative_sample * len(train_coord))) for coord in random_coords: train_label_ind[ns_coord[coord][0], ns_coord[coord][1]] = 1 train_adj_dense = adj.todense() nnz_ind = np.nonzero(train_label_ind) train_label_val = train_adj_dense[nnz_ind][0] train_adj_label = torch.reshape( torch.tensor(train_label_val, dtype=torch.float32).to(device=self.device), (-1, )) train_adj_ind = [ torch.tensor(nnz_ind[0], requires_grad=False).to(device=self.device), torch.tensor(nnz_ind[1], requires_grad=False).to(device=self.device) ] test_adj_dense = self.prepare_test_adj( total_train_edges, self.graph_loader.read_adjacency(graph + 1, max_id)).todense() test_adj_high = np.where(test_adj_dense > 0.2, test_adj_dense, 0) test_adj_ind = np.where(test_adj_high > 0., 1, 0) nnz_ind = np.nonzero(test_adj_ind) test_adj = torch.tensor(test_adj_high[nnz_ind], dtype=torch.float32, requires_grad=False).to(device=self.device) test_adj_ind = [ torch.tensor(nnz_ind[0], requires_grad=False).to(device=self.device), torch.tensor(nnz_ind[1], requires_grad=False).to(device=self.device) ] return train_adj_norm, train_adj_label, train_adj_ind, features, test_adj, test_adj_ind