def SD1Matrix(N, h, sparse=False): """ Compute first derivative using interpolant of degree 4 with central difference, assuming periodic boundary conditions. Parameters ---------- N : int Number of nodes. h : float Step size. sparse : boolean If true, return sparse matrix. Returns ------- D1 : (N, N) ndarray 4th degree interpolant difference matrix; or sD1 : sparse-like 4th degree interpolant difference sparse matrix. """ c = np.zeros(N) j = np.arange(1, N) c[1:] = 0.5 *((-1)**j)*(np.tan(j*h/2.)**(-1)) r = np.zeros(N) r[0] = c[0] r[1:] = c[-1:0:-1] D = toeplitz(c, r=r) if sparse: sD = sp.csr_matrix(D) return sD else: return D
def P4D1Matrix(N, h, sparse=False): """ Compute first derivative using interpolant of degree 4 with central difference, assuming periodic boundary conditions. Parameters ---------- N : int Number of nodes. h : float Step size. sparse : boolean If true, return sparse matrix. Returns ------- D1 : (N, N) ndarray 4th degree interpolant difference matrix; or sD1 : sparse-like 4th degree interpolant difference sparse matrix. """ d1 = np.zeros(N) d1[1] = -2/3 d1[2] = 1/12 d1[-2] = -1/12 d1[-1] = 2/3 D1 = h ** -1 * circulant(d1) if sparse: sD1 = sp.csr_matrix(D1) return sD1 else: return D1
def _to_sparse_matrix(data, n_items): n_users = data['user_id'].max() + 1 score = np.ones(data.shape[0]) interactions = sp.csr_matrix((score, (data['user_id'], data['pid'])), dtype=np.double, shape=(n_users, n_items)) return interactions
def prepare_coding(self, Z_quant): # L dimensional array, # each entry is the hash code (row idx) self.hash_code_arr = Z_quant.dot(self.hash_func) rows, cols, vals = [], [], [] for l in range(self.L): rows.append(self.hash_code_arr[l]) cols.append(l) vals.append(1) m = self.K**self.D n = self.L M = sp.csr_matrix( (vals, (rows, cols)), shape=(m,n) ) self.code2label_mat = M code2label_set = {} for code in np.nonzero(M.indptr[1:] - M.indptr[:-1])[0]: code2label_set[code] = set(M.indices[M.indptr[code]:M.indptr[code+1]]) self.code2label_set = code2label_set
def load_data(path="../data/cora/", dataset="cora"): """Load citation network dataset (cora only for now)""" print('Loading {} dataset...'.format(dataset)) idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str)) features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) labels = encode_onehot(idx_features_labels[:, -1]) # build graph idx = np.array(idx_features_labels[:, 0], dtype=np.int32) idx_map = {j: i for i, j in enumerate(idx)} edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32) edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape) adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32) # build symmetric adjacency matrix adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) features = normalize(features) adj = normalize(adj + sp.eye(adj.shape[0])) idx_train = range(140) idx_val = range(200, 500) idx_test = range(500, 1500) features = torch.FloatTensor(np.array(features.todense())) labels = torch.LongTensor(np.where(labels)[1]) adj = sparse_mx_to_torch_sparse_tensor(adj) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) return adj, features, labels, idx_train, idx_val, idx_test
def _adj_to_sparse_matrix(adj_ent, adj_rel, n_ent, type): row = adj_ent[0] col = adj_ent[1] value = adj_rel remove_dup = [] tuple_dup = set() for cnt, (i, j) in enumerate(zip(row, col)): if (i, j) in tuple_dup: remove_dup.append(cnt) else: tuple_dup.add((i, j)) value = np.delete(value, remove_dup) row = np.delete(row, remove_dup) col = np.delete(col, remove_dup) if type == 'torch': indices = np.vstack((row, col)) i = torch.LongTensor(indices) v = torch.LongTensor(value) return torch.sparse.FloatTensor(i, v, torch.Size((n_ent, n_ent))) elif type == 'csr': return sp.csr_matrix((value, (row, col)), shape=(n_ent, n_ent))
def sparse_pagerank( A, beta = 0.85, one = None, niter = 1000, rel_eps = 1e-6 ) : ## Initialize the iterations one = one if one is not None else np.ones( ( 1, A.shape[ 0 ] ), dtype = np.float ) one = sp.csr_matrix( one / one.sum( axis = 1 ) ) ## Get the out-degree out = np.asarray( A.sum( axis = 1 ).getA1( ), dtype = np.float ) ## Obtain the mask of dangling vertices dangling = np.where( out == 0.0 )[ 0 ] ## Correct the out-degree for sink nodes out[ dangling ] = 1.0 ## Just one iteration: all dangling nodes add to the importance of all vertices. pi = np.full( ( one.shape[0], A.shape[0] ), 1.0 / A.shape[ 0 ], dtype = np.float ) ## If there are no dangling vertices then use simple iterations kiter, status = 0, -1 ## Make a stochastic matrix P = sp.diags( 1.0 / out, 0, dtype = np.float ).dot( A ).tocsc( ) while kiter < niter : ## make a copy of hte current ranking estimates pi_last = pi.copy( ) ## Use sparse inplace operations for speed. Firstt the random walk part pi *= beta ; pi *= P ## Now the teleportaiton ... pi += ( 1 - beta ) * one ## ... and dangling vertices part if len( dangling ) > 0 : pi += beta * one.multiply( np.sum( pi_last[ :, dangling ], axis = 1 ).reshape( ( -1, 1 ) ) ) ## Normalize pi /= np.sum( pi, axis = 1 ) if np.sum( np.abs( pi - pi_last ) ) <= one.shape[0] * rel_eps * np.sum( np.abs( pi_last ) ) : status = 0 break ## Next iteration kiter += 1 if kiter % 10 == 0 : print kiter return pi, status, kiter
def drop_dissimilar_edges(features, adj, threshold=0.01, binary_fea=True): if not sp.issparse(adj): adj = sp.csr_matrix(adj) modified_adj = adj.copy().tolil() print('=== GCN-Jaccrad ===') # isSparse = sp.issparse(features) edges = np.array(modified_adj.nonzero()).T removed_cnt = 0 for edge in tqdm( edges, disable=True): # disable=True to turn off the progress bar n1 = edge[0] n2 = edge[1] if n1 > n2: continue if binary_fea == True: # if it is binary J = _jaccard_similarity(features[n1], features[n2]) if J < threshold: modified_adj[n1, n2] = 0 modified_adj[n2, n1] = 0 removed_cnt += 1 print('removed', removed_cnt, 'edges in the original graph') return modified_adj
def to_csr_matrix(data, n_users, n_items): interactions = sp.csr_matrix( (data['score'], (data['clientid'], data['pid'])), dtype=np.double, shape=(n_users, n_items)) return interactions
def one_batch(self, mode='train'): """ Generate one minibatch for trainer. In the 'train' mode, one minibatch corresponds to one subgraph of the training graph. In the 'val' or 'test' mode, one batch corresponds to the full graph (i.e., full-batch rather than minibatch evaluation for validation / test sets). Inputs: mode str, can be 'train', 'val', 'test' or 'valtest' Outputs: node_subgraph np array, IDs of the subgraph / full graph nodes adj scipy CSR, adj matrix of the subgraph / full graph norm_loss np array, loss normalization coefficients. In 'val' or 'test' modes, we don't need to normalize, and so the values in this array are all 1. """ if mode in ['val', 'test', 'valtest']: self.node_subgraph = np.arange(self.adj_full_norm.shape[0]) adj = self.adj_full_norm else: assert mode == 'train' if len(self.subgraphs_remaining_nodes) == 0: self.par_graph_sample('train') print() self.node_subgraph = self.subgraphs_remaining_nodes.pop() self.size_subgraph = len(self.node_subgraph) self.subgraphs_remaining_data.pop() col = self.subgraphs_remaining_indices.pop() indptr = self.subgraphs_remaining_indptr.pop() row = [] for i in range(len(indptr) - 1): row.extend([i] * len(col[indptr[i]:indptr[i + 1]])) t_row = [self.node_subgraph[i] for i in row] t_col = [self.node_subgraph[i] for i in col] data = [self.adj_full[i, j] for i, j in zip(t_row, t_col)] adj = sp.csr_matrix((data, (row, col)), shape=(self.size_subgraph, self.size_subgraph)) adj_edge_index = self.subgraphs_remaining_edge_index.pop() print("{} nodes, {} edges, {} degree".format( self.node_subgraph.size, adj.size, adj.size / self.node_subgraph.size)) # norm_aggr(adj.data, adj_edge_index, self.norm_aggr_train, num_proc=2) # adj.data[:] = self.norm_aggr_train[adj_edge_index][:] # this line is interchangable with the above line # adj = adj_norm(adj, deg=self.deg_train[self.node_subgraph]) # tmp = adj.tocoo() # adj = _adj_to_sparse_matrix((tmp.row, tmp.col), tmp.data, self.node_subgraph.size, type='torch') # if self.use_cuda: # adj = adj.cuda() self.batch_num += 1 # norm_loss = self.norm_loss_test if mode in ['val', 'test', 'valtest'] else self.norm_loss_train # norm_loss = norm_loss[self.node_subgraph] # return self.node_subgraph, adj, norm_loss # t1 = time.time() adj_matrix = build_adj_matrix(self.node_subgraph, adj) # t2 = time.time() # print(f'san dcm {t2-t1}') rel_matrix = build_rel_matrix(self.node_subgraph, adj, adj_matrix) # print(f'san dcm {time.time() - t2}') return self.node_subgraph, adj_matrix, rel_matrix
confusion[ i, j ] = f print confusion import numpy as np import scipy.sparse as sp A = sp.csr_matrix( [ [0,1,0,0,1,0], [0,0,1,0,0,1], [0,0,0,1,1,0], [0,0,0,0,1,0], [0,1,0,0,0,0], [0,0,0,0,0,0], ], shape = ( 6, 6 ), dtype = np.float ) ## Out-degree out_deg = np.asarray( A.sum( axis = 1 ).getA1( ), dtype = np.float ) dangling = np.where( out_deg == 0 )[ 0 ] out_deg[ dangling ] = 1.0 beta = 0.85 E = np.full( A.shape[ 0 ], 1.0 / A.shape[ 0 ], dtype = np.float ) ## Since matrix is a linear operator and the eigenvalues we seek is one, the requirement ## that the scores vector sum to one is automatically stisfied once it has been imposed. x_0 = E.copy( )
# we need to set the random seed to be the same as that when you generate the perturbed graph # data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15) # Or we can just use setting='prognn' to get the splits # data = Dataset(root='./tmp/', name=args.dataset, setting='prognn') # adj, features, labels_1 = data.adj, data.features, data.labels # idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test # load pre-attacked graph # perturbed_data = PrePtbDataset(root='./tmp/', # name=args.dataset, # attack_method='meta', # ptb_rate=args.ptb_rate) # use data splist provided by prognn data = Dataset(root='./tmp/', name=args.dataset, setting='prognn') data.adj = sp.csr_matrix(adj_per) data.features = sp.csr_matrix(base_feat) data.labels = labels data.idx_train = idx_train data.idx_val = idx_val data.idx_test = idx_test perturbed_adj = sp.csr_matrix(adj_per, dtype=float) features = sp.csr_matrix(base_feat, dtype=float) # Setup Defense Model gat = GAT(nfeat=features.shape[1], nhid=8, heads=8, nclass=labels.max().item() + 1, dropout=0.5,
def multi_test_poison(): # test nodes on poisoning attack cnt = 0 data_load = dataset.c_dataset_loader(opt.dataset, ".{}".format(opt.data_path)) adj, features, label, _, _, _ = data_load.process_data() labels = F_Info.F_one_hot_to_label(label) degrees = adj.sum(0) adj = sp.csr_matrix(adj) features = sp.csr_matrix(features) total_att_node = 2000 # 攻击的节点数目 # node_list = np.random.choice(np.arange(adj.shape[0]), 10, False) node_list = np.random.choice(np.arange(adj.shape[0]), total_att_node, replace=False) # 每100个节点记录一次平均asr adj_pert_record = {} acc_pert_record = {} num = len(node_list) print('=== [Poisoning] Attacking %s nodes respectively ===' % num) acc = 0 target_node_id = 1 for target_node_idx in tqdm(node_list): model = Nettack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=False, device='cuda:0') model = model.to('cuda:0') model.attack(features, adj, labels, target_node_idx, n_perturbations, verbose=False) modified_adj = model.modified_adj if target_node_id % 50 == 0: print("\ntarget_node_idx : {}".format(target_node_idx)) acc = test_acc(modified_adj, features, target_node_idx) if target_node_id % 100 == 0 or target_node_id == 1: print("-------------Recoding---------\n") adj_pert_record[target_node_id / 100] = sp.csr_matrix(modified_adj) acc_pert_record[target_node_id / 100] = acc pass adj = modified_adj target_node_id = target_node_id + 1 info_collect = {} info_collect['adj_per'] = adj_pert_record info_collect['surrogate'] = surrogate info_collect['idx_train'] = idx_train info_collect['idx_val'] = idx_val info_collect['idx_test'] = idx_test info_collect['acc_after_att'] = acc_pert_record info_collect['total_att_node'] = total_att_node info_collect['random_seed'] = rand_seed info_collect['pert_node_list'] = node_list return info_collect