def load_node_pair(adj, file_path, n_hop=10): from os.path import join as pjoin if file_path and os.path.exists(pjoin(file_path, 'pos_node_edge.pt')): pos_neighbor = torch.load(pjoin(file_path, 'pos_node_edge.pt')) neg_neighbor = torch.load(pjoin(file_path, 'neg_node_edge.pt')) else: adj = adj.to('cuda:8') adj = adj > 0 neighbor = torch.mm(adj.float(), adj.float().t()) > 0 size = neighbor.size(1) pos_neighbor = torch.zeros_like(neighbor) sort_weight, indices = torch.sum(neighbor.float(), dim=0).sort() min_count, max_count = int(size * 0.1), int(size * 0.9) min_pos, max_pos = sort_weight[min_count], sort_weight[max_count] min_count = (sort_weight < max(min_pos, 2)).sum() max_count = (sort_weight <= max_pos).sum() pos_select = indices[min_count:max_count] pos_neighbor[:, pos_select] = neighbor[:, pos_select] neg_neighbor = neighbor for i in range(1, n_hop): neg_neighbor = torch.mm(neg_neighbor.float(), neighbor.float()) > 0 neg_neighbor = ~neg_neighbor pos_neighbor, _ = dense_to_sparse(pos_neighbor) pos_neighbor = remove_self_loops(pos_neighbor)[0] neg_neighbor, _ = dense_to_sparse(neg_neighbor) pos_neighbor = pos_neighbor.cpu() neg_neighbor = neg_neighbor.cpu() if file_path: torch.save(pos_neighbor, pjoin(file_path, 'pos_node_edge.pt')) torch.save(neg_neighbor, pjoin(file_path, 'neg_node_edge.pt')) return pos_neighbor, neg_neighbor
def load_pattern_pair(adj, file_path, n_hop=10): if file_path and os.path.exists(file_path + '/neg_edge.pt'): pos_mask = torch.load(file_path + '/pos_edge.pt') neg_mask = torch.load(file_path + '/neg_edge.pt') else: adj = adj.to('cuda:8') adj = adj > 0 size = adj.size(1) pos_mask = torch.zeros_like(adj) sort_weight, indices = torch.sum(adj.float(), dim=0).sort() min_count, max_count = int(size * 0.1), int(size * 0.9) min_pos, max_pos = sort_weight[min_count], sort_weight[max_count] min_count = (sort_weight < max(min_pos, 2)).sum() max_count = (sort_weight <= max_pos).sum() pos_select = indices[min_count:max_count] pos_mask[:, pos_select] = adj[:, pos_select] neg_mask = adj adj = adj.float() for i in range(1, n_hop): neg_mask = torch.mm(neg_mask.float(), adj.t()) > 0 neg_mask = torch.mm(neg_mask.float(), adj) > 0 neg_mask = ~neg_mask pos_mask, _ = dense_to_sparse(pos_mask) neg_mask, _ = dense_to_sparse(neg_mask) pos_mask = pos_mask.cpu() neg_mask = neg_mask.cpu() if file_path: torch.save(pos_mask, file_path + '/pos_edge.pt') torch.save(neg_mask, file_path + '/neg_edge.pt') return pos_mask, neg_mask
def forward(self, data, mask): # x0, edge_index0, edge_weight0 = data.x, data.edge_index, data.edge_attr edge_index0, _ = dropout_adj( data.edge_index, p=self.initial_dropout_adj, force_undirected=True, num_nodes=data.num_nodes, training=self.training) x0 = F.dropout(data.x, p=self.initial_dropout_nodes, training=self.training) # level 0 conv x0_ = self.gcn0_in(x0, edge_index0) # pooled 1 s1 = F.relu(self.conv_pool1(x0_, edge_index0)) x1, adj1, l1, e1 = dense_diff_pool(x0_, data.adj, s1, mask) x1 = torch.squeeze(x1) # get edge index level 1 adj1_sparse_tuple = dense_to_sparse(torch.squeeze(adj1)) edge_index1 = adj1_sparse_tuple[0] edge_weight1 = adj1_sparse_tuple[1] # level 1 conv x1_ = self.gcn1_in(x1, edge_index1, edge_weight1) # pooled 2 s2 = self.conv_pool2(x1_, edge_index1, edge_weight1) s2 = F.relu(s2) x2, adj2, l2, e2 = dense_diff_pool(x1_, adj1, s2) x2 = torch.squeeze(x2) # get edge index level 2 adj2_sparse_tuple = dense_to_sparse(torch.squeeze(adj2)) edge_index2 = adj2_sparse_tuple[0] edge_weight2 = adj2_sparse_tuple[1] # level 2 conv x2_out = self.gcn2_in(x2, edge_index2, edge_weight2) x2_out_up = torch.matmul(s2, x2_out) # unpool level 2 # output level 1 x1_out = self.gcn1_out(torch.cat((x1_, x2_out_up), 1), edge_index1, edge_weight1) x1_out_up = torch.matmul(s1, x1_out) # unpool level 1 # output level 0 x0_out = self.gcn0_out(torch.cat((x0_, x1_out_up), 1), edge_index0) edge_loss = l1 + e1 +l2 + e2 edges = {'e1' :{'e': edge_index1, 'w': edge_weight1}, 'e2' :{'e': edge_index2, 'w': edge_weight2}} output_dict = {'prediction': F.log_softmax(x0_out, dim=1), 's01': s1, 'edge_loss': edge_loss, 'adj1': adj1, 'edges': edges} return output_dict
def get_k_hop_adjacency(adj, k, file_path, bi_graph=False): '''计算k-hop以内(含k)的邻接节点 ''' file_name = os.path.join(file_path, '%d_hop_neighbor.pt' % k) if file_path and os.path.exists(file_name): output, depth = torch.load(file_name) else: if k < 2: output, depth = dense_to_sparse(adj.long().cpu()) return output, depth adj = adj.bool() neighbor = adj.float() output = adj.long() k_neighbor = neighbor for i in range(2, k + 1): # find the long-tail nodes ''' degrees, indices = k_neighbor.sum(dim=1).sort() long_tail_degree = min(2, degrees[int(0.9 * degrees.size(0))]) long_tail_indices = indices[degrees <= long_tail_degree] ''' if bi_graph: k_neighbor = torch.mm(k_neighbor, neighbor.t()).bool() k_neighbor = torch.mm(k_neighbor.float(), neighbor) else: k_neighbor = torch.mm(k_neighbor, neighbor.t()) # only retain k-hop neighborhood for long-tail nodes ''' long_tail = torch.zeros_like(k_neighbor) long_tail[long_tail_indices, :] = k_neighbor[long_tail_indices, :] ''' # control the augmented links are less than existing links ''' new_mask = long_tail.bool() & ~output.bool() counts = long_tail[new_mask] counts = counts.sort()[0] existing_count = output.bool().sum().long() add_count = min(counts.size(0), int(0.5 * existing_count.item())) min_count = max(1, counts[-add_count]) ''' # add augmented links with their depth k_adj = (k_neighbor > 1) & ~output.bool() output.masked_fill_(k_adj, i) k_neighbor = k_neighbor.bool().float() output = output.cpu() output, depth = dense_to_sparse(output) torch.save((output, depth), file_name) return output, depth
def forward(self, x, edge_index): total_loss1 = 0 total_loss2 = 0 edge_attr = None for i in range(len(self.graph_convs)): if i < len(self.graph_convs) - 1: if self.mode == 'mincut': s = self.assignment_ws[2 * i + 1](F.relu( self.assignment_ws[2 * i](x))) else: s = self.pool_convs[i](x, edge_index, edge_attr) x = F.relu((self.graph_convs[i](x, edge_index, edge_attr) + self.graph_skips[i](x))) if i < len(self.graph_convs) - 1: x, adj, loss1, loss2 = self.pooling_fn( x, to_dense_adj(edge_index, x.size(0), edge_attr=edge_attr), s) edge_index, edge_attr = dense_to_sparse(adj.squeeze(0)) x = x.squeeze(0) total_loss1 += loss1 total_loss2 += loss2 x_avg = torch.mean(x, dim=0).unsqueeze(0) out = self.classifier(x_avg) return out, total_loss1, total_loss2
def process(self): with open(os.path.join(self.raw_dir, 'abide_raw.pkl'), 'rb') as f: dataset = pickle.load(f) dataset_list = [] sub_list = np.loadtxt(self.sub_list, dtype=str, delimiter='\n') for subj in sub_list: data = dataset[subj] if self.target_name is not None: data.y = data.y[self.target_name] if self.feature_mask is not None: data.features = [data.features[i] for i in self.feature_mask] edge_index, _ = dense_to_sparse( torch.ones(data.features[0].shape, dtype=torch.float32)) edge_attr = [] for feature in data.features: edge_attr.append(feature[edge_index[0], edge_index[1]]) data.edge_index = edge_index data.edge_attr = torch.stack(edge_attr, dim=-1) data.features = torch.stack(data.features, dim=-1) data.features = torch.unsqueeze(data.features, dim=0) dataset_list.append(data) self.data, self.slices = self.collate(dataset_list) torch.save((self.data, self.slices), self.processed_paths[0]) print('Processed dataset saved as', self.processed_paths[0])
def get_torch_data(df, threshold=3): atoms = df['atom'].values energy = np.array([-1 * df['Energy(Ry)'].values[0]]) atoms = np.expand_dims(atoms, axis=1) one_hot_encoding = OneHotEncoder(sparse=False).fit_transform(atoms) coords = df[['x(angstrom)', 'y(angstrom)', 'z(angstrom)']].values edge_index = None edge_attr = None while True: dist = distance.cdist(coords, coords) dist[dist > threshold] = 0 dist = torch.from_numpy(dist) edge_index, edge_attr = data_utils.dense_to_sparse(dist) edge_attr = edge_attr.unsqueeze(dim=1).type(torch.FloatTensor) edge_index = torch.LongTensor(edge_index) if (data_utils.contains_isolated_nodes(edge_index, num_nodes=13)): threshold += 0.5 else: break x = torch.from_numpy(one_hot_encoding).type(torch.FloatTensor) y = torch.from_numpy(energy).type(torch.FloatTensor) data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) return data
def do_trans(data): node_num, _ = data.x.size() if add_self_loop: sl = torch.tensor([[n, n] for n in range(node_num)]).t() edge_index = torch.cat((data.edge_index, sl), dim=1) else: edge_index = data.edge_index.detach().clone() orig_adj = to_dense_adj(edge_index)[0] orig_adj = torch.where(orig_adj>1, torch.ones_like(orig_adj), orig_adj) d = torch.diag(torch.sum(orig_adj, 1)) if mode == 'ppr': dinv = torch.inverse(torch.sqrt(d)) at = torch.matmul(torch.matmul(dinv, orig_adj), dinv) diff_adj = alpha * torch.inverse((torch.eye(orig_adj.shape[0]) - (1 - alpha) * at)) elif mode == 'heat': diff_adj = torch.exp(t * (torch.matmul(orig_adj, torch.inverse(d)) - 1)) else: raise Exception("Must choose one diffusion instantiation mode from 'ppr' and 'heat'!") edge_ind, edge_attr = dense_to_sparse(diff_adj) return Data(x=data.x, edge_index=edge_ind, edge_attr=edge_attr)
def load_vgrnn(dataset): datasets = ['fb', 'dblp', 'enron10'] assert dataset in datasets, \ "Dataset %s not in allowed list: %s" % (dataset, str(datasets)) adj = os.path.join('/mnt/raid0_24TB/isaiah/code/TGCN/src/data', dataset, 'adj_orig_dense_list.pickle') with open(adj, 'rb') as f: fbytes = f.read() dense_adj_list = pickle.loads(fbytes, encoding='bytes') num_nodes = dense_adj_list[0].size(0) eis = [] splits = [] for adj in dense_adj_list: # Remove self loops for i in range(adj.size(0)): adj[i, i] = 0 ei = dense_to_sparse(adj)[0] ei = to_undirected(ei) eis.append(ei) splits.append(edge_tvt_split(ei)) data = TData(x=torch.eye(num_nodes), eis=eis, masks=splits, num_nodes=num_nodes, dynamic_feats=False, T=len(eis)) return data
def process(self): with open(os.path.join(self.raw_dir, 'pnc_features_raw.pkl'), 'rb') as f: labels, path_data, filelist, _, min_ts_length = pickle.load(f) if self.feature_mask is not None: if np.isscalar(self.feature_mask): self.feature_mask = [ i for i in range(len(filelist)) if self.feature_mask == int(filelist[i].split('_')[1]) ] filelist = [filelist[i] for i in self.feature_mask] ts_index = [ i for i in range(len(filelist)) if 'timeseries' in filelist[i] ] sc_index = [ i for i in range(len(filelist)) if 'connmat' in filelist[i] ] dataset_list = [] sub_list = np.loadtxt(self.sub_list, dtype=str, delimiter='\n') epsilon = 1e-5 for subj in sub_list: print('processing', subj, '...') features = [] for filename in filelist: filepath = os.path.join(path_data, subj, filename) if not os.path.exists(filepath): raise ValueError('invalid path ' + filepath) matrix = np.loadtxt(filepath) features.append(matrix) data = Data(x=None, y=None) data.y = {'ScanAgeYears': labels[0][subj], 'Sex': labels[1][subj]} data.subj = int(subj.split('_')[0]) if self.target_name is not None: data.y = data.y[self.target_name] ts = [] for i in ts_index: ts.append(features[i][:min_ts_length, :]) data.fconn = torch.tensor( ConnectivityMeasure(kind='correlation').fit_transform(ts), dtype=torch.float32) sc = [] for i in sc_index: sc_matrix = features[i] + epsilon sc.append(sc_matrix / np.sum(sc_matrix, axis=0)) data.sconn = torch.tensor(sc, dtype=torch.float32) data.x = data.fconn[0] data.edge_index, _ = dense_to_sparse( torch.ones(data.sconn[0].shape, dtype=torch.float32)) data.edge_attr = data.sconn[0].clone().detach()[data.edge_index[0], data.edge_index[1]] dataset_list.append(data) self.data, self.slices = self.collate(dataset_list) torch.save((self.data, self.slices), self.processed_paths[0]) print('Processed dataset saved as', self.processed_paths[0])
def get_sample_adj(self, extend_adj, sub_node_num, sample_id): sample1 = extend_adj[sample_id, :] #print(sub_node_num) sample_adj_mid = torch.cat((extend_adj[:sub_node_num, :], sample1), 0) sample2 = sample_adj_mid[:, sample_id] sample_adj = torch.cat((sample_adj_mid[:, :sub_node_num], sample2), 1) sample_adj, sample_adj_weight = dense_to_sparse(sample_adj) return sample_adj, sample_adj_weight
def decode(self, z, edge_index=None): if edge_index is None: # inner product decoder adj = torch.relu(torch.matmul(z, z.t())) # take nonzero elements edge_index, _ = dense_to_sparse(adj) x, edge_index = self.decoder(z, edge_index) return x, edge_index
def read_Genetic(self, root): BioGrid = pd.read_csv( root + '/BIOGRID-ORGANISM-Escherichia_coli_K12_W3110-3.5.180.tab2.txt', delimiter='\t') BioGrid['Official Symbol Interactor A'] = BioGrid[ 'Official Symbol Interactor A'].str.lower() BioGrid['Official Symbol Interactor B'] = BioGrid[ 'Official Symbol Interactor B'].str.lower() BioGrid = BioGrid.rename( columns={ "Official Symbol Interactor A": "Gene_A", "Official Symbol Interactor B": "Gene_B" }) Ecoli = pd.read_table(root + '/avg_E_coli_v4_Build_6_exps466probes4297.tab') Ecoli['E_coli_v4_Build_6:genes'] = Ecoli[ 'E_coli_v4_Build_6:genes'].str.split('_').str[0] Ecoli = Ecoli.apply(lambda x: x.astype(str).str.lower()) Ecoli = Ecoli.rename(columns={"E_coli_v4_Build_6:genes": "Genes"}) Filt_BioGrid_indeces = BioGrid.Gene_A.isin( Ecoli.Genes ) & BioGrid.Gene_B.isin( Ecoli.Genes ) # & BioGrid['Experimental System Name'] != 'Biochemical Activity' Filt_BioGrid = BioGrid[Filt_BioGrid_indeces] Filt_BioGrid_Genetic = Filt_BioGrid[ Filt_BioGrid['Experimental System Type'] == 'genetic'] Filt_BioGrid_Genetic_Genes = np.union1d( Filt_BioGrid_Genetic.Gene_A.unique(), Filt_BioGrid_Genetic.Gene_B.unique()) Ecoli_Filt_Genetic = Ecoli[Ecoli.Genes.isin( Filt_BioGrid_Genetic_Genes)] Adj = np.zeros( [len(Filt_BioGrid_Genetic_Genes), len(Filt_BioGrid_Genetic_Genes)]) features = np.zeros( [len(Filt_BioGrid_Genetic_Genes), Ecoli_Filt_Genetic.shape[1] - 1]) for i in range(len(Filt_BioGrid_Genetic)): row = np.where(Filt_BioGrid_Genetic_Genes == Filt_BioGrid_Genetic.iloc[i][7])[0][0] col = np.where(Filt_BioGrid_Genetic_Genes == Filt_BioGrid_Genetic.iloc[i][8])[0][0] Adj[row][col] = 1 Adj[col][row] = 1 for i in range(len(Filt_BioGrid_Genetic_Genes)): features[i] = Ecoli[Ecoli.Genes == Filt_BioGrid_Genetic_Genes[i]].iloc[:, 1:] return dense_to_sparse(torch.tensor(Adj))[0], torch.tensor( features, dtype=torch.float32)
def forward(self, x, powers_adj): # x is powers adjacency matrixs # output is list of tuple of edge_index and weight edge_index_powers = [dense_to_sparse(adj)[0] for adj in powers_adj] edge_weight_powers = [ self._learn_adjacencies(x, edge_index, i) for i, edge_index in enumerate(edge_index_powers) ] return [(i, w) for i, w in zip(edge_index_powers, edge_weight_powers)]
def test_dense_to_sparse(): adj = torch.Tensor([ [3, 1], [2, 0], ]) edge_index, edge_attr = dense_to_sparse(adj) assert edge_index.tolist() == [[0, 0, 1], [0, 1, 0]] assert edge_attr.tolist() == [3, 1, 2] adj = torch.Tensor([[ [3, 1], [2, 0], ], [ [0, 1], [0, 2], ]]) edge_index, edge_attr = dense_to_sparse(adj) assert edge_index.tolist() == [[0, 0, 1, 2, 3], [0, 1, 0, 3, 3]] assert edge_attr.tolist() == [3, 1, 2, 1, 2]
def read_ba2motif_data(folder: str, prefix): with open(os.path.join(folder, f"{prefix}.pkl"), 'rb') as f: dense_edges, node_features, graph_labels = pickle.load(f) data_list = [] for graph_idx in range(dense_edges.shape[0]): data_list.append(Data(x=torch.from_numpy(node_features[graph_idx]).float(), edge_index=dense_to_sparse(torch.from_numpy(dense_edges[graph_idx]))[0], y=torch.from_numpy(np.where(graph_labels[graph_idx])[0]))) return data_list
def forward(self, x, A_q, A_h, A): """ :param X: Input data of shape (batch_size, num_timesteps, num_nodes) :A_q: The forward random walk matrix (num_nodes, num_nodes) :A_h: The backward random walk matrix (num_nodes, num_nodes) :return: Reconstructed X of shape (batch_size, num_timesteps, num_nodes) """ # X = (4, 24, 62) # TODO, add x_dim with weather x = x.permute(0, 2, 1) batch_size = x.size(0) num_features = x.size(2) num_nodes = x.size(1) edge_index, edge_weight = dense_to_sparse(A.to(torch.device("cuda:0"))) edge_index = edge_index.view(2, 1, -1).repeat( 1, batch_size, 1) + torch.arange(batch_size).view(1, -1, 1).to( torch.device("cuda:0")) * num_nodes edge_index = edge_index.view(2, -1) x = x.contiguous().view(4, -1) ### For t #################################### x = ... # use any PyG operator now #################################### x = x.view(batch_size, num_nodes, num_features) ### For t # TODO: batch_size = 1 X_S = X_S[0] h, c = None, None for i in range(self.time_dimension): y_hat, h, c = self.recurrent(X_S[:, i, None], edge_index, edge_weight, h, c) self.recurrent(X_S.view(X_S.shape[0], 1, X_S.shape[1]), edge_index, edge_weight) h = self.tgnn(x, edge_index) # X_s1 = self.GNN1(X_S, A_q, A_h) # X_s2 = self.GNN2(X_s1, A_q, A_h) + X_s1 #num_nodes, rank # X_s3 = self.GNN3(X_s2, A_q, A_h) X_res = X_s3.permute(0, 2, 1) return X_res
def _get_buffer(self, x, graph, bsz, len_): if not hasattr(self, 'buffer_edge_index') or True: adj_mat = graph.new_zeros(x.size(0), x.size(0)) for i in range(bsz): adj_mat[i * len_:(i + 1) * len_, i * len_:(i + 1) * len_] = graph edge_index, edge_attr = dense_to_sparse(adj_mat) assert edge_index.size(1) % bsz == 0 setattr(self, 'num_edges_per_graph', edge_index.size(1) // bsz) setattr(self, 'buffer_edge_index', edge_index) total_edges = getattr(self, 'num_edges_per_graph') * bsz return getattr(self, 'buffer_edge_index')[:, :total_edges]
def mock_batch(batch_size): """construct pyG batch""" graphs = [] while len(graphs) < batch_size: G = nx.erdos_renyi_graph(np.random.choice([300, 500]), 0.5) if G.number_of_edges() > 1: graphs.append(G) adjs = [torch.from_numpy(nx.to_numpy_array(G)) for G in graphs] graph_data = [dense_to_sparse(A) for A in adjs] data_list = [Data(x=x, edge_index=e) for (e, x) in graph_data] return Batch.from_data_list(data_list)
def do_trans(data): node_num, _ = data.x.size() _, edge_num = data.edge_index.size() drop_num = int(node_num * ratio) idx_drop = np.random.choice(node_num, drop_num, replace=False) idx_nondrop = [n for n in range(node_num) if not n in idx_drop] adj = to_dense_adj(data.edge_index)[0] adj = adj[idx_nondrop, :][:, idx_nondrop] return Data(x=data.x[idx_nondrop], edge_index=dense_to_sparse(adj)[0])
def read_syn_data(folder: str, prefix): with open(os.path.join(folder, f"{prefix}.pkl"), 'rb') as f: adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_label_matrix = pickle.load(f) x = torch.from_numpy(features).float() y = train_mask.reshape(-1, 1) * y_train + val_mask.reshape(-1, 1) * y_val + test_mask.reshape(-1, 1) * y_test y = torch.from_numpy(np.where(y)[1]) edge_index = dense_to_sparse(torch.from_numpy(adj))[0] data = Data(x=x, y=y, edge_index=edge_index) data.train_mask = torch.from_numpy(train_mask) data.val_mask = torch.from_numpy(val_mask) data.test_mask = torch.from_numpy(test_mask) return data
def main(): x_dim = 512 x_len = 10000 x = sparse.rand(x_len, x_dim, density=10 / x_dim, format='csr', dtype=np.float) adj = sparse.rand(x_len, x_len, density=10 / x_len, format='csr', dtype=np.float) w = sparse.rand(x_dim, x_dim, density=10 / x_dim, format='csr', dtype=np.float) start = time.time() adj.dot(x.dot(w)) print(time.time() - start) x1 = x.todense().astype(np.float) adj1 = adj.todense().astype(np.float) w1 = w.todense().astype(np.float) start = time.time() adj1.dot(x1.dot(w1)) print(time.time() - start) x2 = torch.tensor(x1, dtype=torch.float) adj2 = torch.tensor(adj1, dtype=torch.float) w2 = torch.tensor(w1, dtype=torch.float) start = time.time() adj2.matmul(x2.matmul(w2)) print(time.time() - start) adj2alt = torch.rand((x_len, x_len), dtype=torch.float) start = time.time() adj2alt.matmul(x2.matmul(w2)) print(time.time() - start) conv = GCNConv(x_dim, x_dim) edge_index, _ = dense_to_sparse(adj2) start = time.time() x3 = conv(x2, edge_index) print(time.time() - start)
def mol_to_pyg_graph(mol, idm=False, ratio=2.): nodes = [] for atom in mol.GetAtoms(): nodes.append(atom_to_node(atom)) idx = [n[0] for n in nodes] assert is_sorted(idx) nodes = np.array(nodes, dtype=float)[:, 1:] edges = [] for bond in mol.GetBonds(): edges.append(bond_to_edge(bond)) g_adj = construct_graph(nodes, edges) if idm: # inverse distance weighting matrix try: if AllChem.EmbedMolecule( mol, randomSeed=0xf00d ) == -1: # optional random seed for reproducibility) AllChem.Compute2DCoords(mol) with np.errstate(divide='ignore'): W = 1. / Chem.rdmolops.Get3DDistanceMatrix(mol) W[np.isinf(W)] = 0 except Exception as e: try: mol = Chem.AddHs(mol) if AllChem.EmbedMolecule( mol, randomSeed=0xf00d ) == -1: # optional random seed for reproducibility) AllChem.Compute2DCoords(mol) mol = Chem.RemoveHs(mol) with np.errstate(divide='ignore'): W = 1. / Chem.rdmolops.Get3DDistanceMatrix(mol) W[np.isinf(W)] = 0 except Exception: num_atoms = mol.GetNumAtoms() W = np.zeros((num_atoms, num_atoms)) # preserve top ratio*n entries threshold = np.sort( W, axis=None)[::-1][min(int(ratio * len(W)) + 1, len(W)**2) - 1] W[W < threshold] = 0 # convert to sparse representation W_spr = dense_to_sparse(torch.FloatTensor(W)) g_idm = Data(x=g_adj.x, edge_index=W_spr[0], edge_attr=W_spr[1]) return [g_adj, g_idm] return [g_adj, None]
def load_node_neighbor(adj, file_path): from os.path import join as pjoin if file_path and os.path.exists(pjoin(file_path, 'node_neighbor.pt')): neighbor = torch.load(pjoin(file_path, 'node_neighbor.pt')) else: adj = adj.to('cuda:8') adj = adj > 0 neighbor = torch.mm(adj.float(), adj.float().t()) > 0 neighbor, _ = dense_to_sparse(neighbor) neighbor = remove_self_loops(neighbor)[0] neighbor = neighbor.cpu() if file_path: torch.save(neighbor, pjoin(file_path, 'node_neighbor.pt')) return neighbor
def read_syn_data(self): with open(self.raw_paths[0], 'rb') as f: adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_label_matrix = pickle.load( f) x = torch.from_numpy(features).float() y = train_mask.reshape(-1, 1) * y_train + val_mask.reshape( -1, 1) * y_val + test_mask.reshape(-1, 1) * y_test y = torch.from_numpy(np.where(y)[1]) edge_index = dense_to_sparse(torch.from_numpy(adj))[0] data = Data(x=x, y=y, edge_index=edge_index) data.train_mask = torch.from_numpy(train_mask) data.val_mask = torch.from_numpy(val_mask) data.test_mask = torch.from_numpy(test_mask) return data
def __call__(self, data: Data): N = data.num_nodes adj = to_dense_adj(data.edge_index).squeeze(0) adj_order = get_higher_order_adj_matrix(adj, self.order) # (N, N) type_mat = to_dense_adj(data.edge_index, edge_attr=data.edge_type).squeeze(0) # (N, N) type_highorder = torch.where(adj_order > 1, self.num_types + adj_order - 1, torch.zeros_like(adj_order)) assert (type_mat * type_highorder == 0).all() type_new = type_mat + type_highorder new_edge_index, new_edge_type = dense_to_sparse(type_new) _, edge_order = dense_to_sparse(adj_order) data.bond_edge_index = data.edge_index # Save original edges data.edge_index, data.edge_type = coalesce(new_edge_index, new_edge_type.long(), N, N) edge_index_1, data.edge_order = coalesce(new_edge_index, edge_order.long(), N, N) assert (data.edge_index == edge_index_1).all() return data
def reward(self): #Get black-box labels for all nodes edge_indices, _ = dense_to_sparse(self.adj) features = torch.ones((self.num_current_nodes, self.num_features)) logits = self.blackbox_model(features, edge_indices) probs = F.softmax(logits, dim=1) #Reward is probability of node 0 being predicted as class c #reward = probs[0, self.c].detach().item() reward = probs[:, self.c].detach().sum().item( ) #*10#/self.num_current_nodes return reward
def forward(self, x, edge_index, pool=True): for i in range(len(self.hidden_dims)): x = self.embed_nets[i](x, edge_index) if pool: x, adj = mpr_pool(x, to_dense_adj(edge_index, x.size(0), edge_attr=None)[0], clusters=self.cluster_dims[i], overlap=self.overlap) edge_index, edge_attr = dense_to_sparse(adj.squeeze(0)) y_pred = self.g_classifier(x, edge_index, None) return y_pred
def get_ecc_conv_parameters(self, data, layer_no): v_plus_list, laplacians = data.v_plus, data.laplacians # print([v_plus[layer_no] for v_plus in v_plus_list]) v_plus_batch = torch.cat([v_plus[layer_no] for v_plus in v_plus_list], dim=0) laplacian_layer_list = [laplacians[i][layer_no] for i in range(len(laplacians))] laplacian_block_diagonal = self.make_block_diag(laplacian_layer_list) if self.config["dataset_name"] == 'DD': laplacian_block_diagonal[laplacian_block_diagonal<1e-4] = 0 # First layer lap_edge_idx, lap_edge_weights = dense_to_sparse(laplacian_block_diagonal) # Convert v_plus_batch to boolean return lap_edge_idx, lap_edge_weights, (v_plus_batch == 1)
def process(self): r"""Processes the dataset to the :obj:`self.processed_dir` folder.""" with open(os.path.join(self.raw_dir, 'MUTAG_node_labels.txt'), 'r') as f: nodes_all_temp = f.read().splitlines() nodes_all = [int(i) for i in nodes_all_temp] adj_all = np.zeros((len(nodes_all), len(nodes_all))) with open(os.path.join(self.raw_dir, 'MUTAG_A.txt'), 'r') as f: adj_list = f.read().splitlines() for item in adj_list: lr = item.split(', ') l = int(lr[0]) r = int(lr[1]) adj_all[l - 1, r - 1] = 1 with open(os.path.join(self.raw_dir, 'MUTAG_graph_indicator.txt'), 'r') as f: graph_indicator_temp = f.read().splitlines() graph_indicator = [int(i) for i in graph_indicator_temp] graph_indicator = np.array(graph_indicator) with open(os.path.join(self.raw_dir, 'MUTAG_graph_labels.txt'), 'r') as f: graph_labels_temp = f.read().splitlines() graph_labels = [int(i) for i in graph_labels_temp] data_list = [] for i in range(1, 189): idx = np.where(graph_indicator == i) graph_len = len(idx[0]) adj = adj_all[idx[0][0]:idx[0][0] + graph_len, idx[0][0]:idx[0][0] + graph_len] label = int(graph_labels[i - 1] == 1) feature = nodes_all[idx[0][0]:idx[0][0] + graph_len] nb_clss = 7 targets = np.array(feature).reshape(-1) one_hot_feature = np.eye(nb_clss)[targets] data_example = Data(x=torch.from_numpy(one_hot_feature).float(), edge_index=dense_to_sparse( torch.from_numpy(adj))[0], y=label) data_list.append(data_example) torch.save(self.collate(data_list), self.processed_paths[0])