def __init__(self, adj, X_obs, z_obs, W1, W2, u, verbose=False): # Adjacency matrix self.adj = adj.copy().tolil() self.adj_no_selfloops = self.adj.copy() self.adj_no_selfloops.setdiag(0) self.adj_orig = self.adj.copy().tolil() self.u = u # the node being attacked self.adj_preprocessed = utils.preprocess_graph(self.adj).tolil() # Number of nodes self.N = adj.shape[0] # Node attributes self.X_obs = X_obs.copy().tolil() self.X_obs_orig = self.X_obs.copy().tolil() # Node labels self.z_obs = z_obs.copy() self.label_u = self.z_obs[self.u] self.K = np.max(self.z_obs) + 1 # GCN weight matrices self.W1 = W1 self.W2 = W2 self.W = sp.csr_matrix(self.W1.dot(self.W2)) self.cooc_matrix = self.X_obs.T.dot(self.X_obs).tolil() self.cooc_constraint = None self.structure_perturbations = [] self.feature_perturbations = [] self.influencer_nodes = [] self.potential_edges = [] self.verbose = verbose
def main(argv): # config the CPU/GPU in TF, assume only one GPU is in use. # For multi-gpu setting, please refer to # https://www.tensorflow.org/guide/gpu#using_multiple_gpus gpus = tf.config.experimental.list_physical_devices('GPU') if len(gpus) == 0 or FLAGS.gpu_id is None: device_id = "/device:CPU:0" else: tf.config.experimental.set_visible_devices(gpus[FLAGS.gpu_id], 'GPU') device_id = '/device:GPU:0' A_mat, X_mat, z_vec, train_idx, val_idx, test_idx = load_data_planetoid( FLAGS.dataset) An_mat = preprocess_graph(A_mat) # N = A_mat.shape[0] K = z_vec.max() + 1 with tf.device(device_id): gcn = GCN(An_mat, X_mat, [FLAGS.hidden1, K]) gcn.train(train_idx, z_vec[train_idx], val_idx, z_vec[val_idx]) test_res = gcn.evaluate(test_idx, z_vec[test_idx], training=False) # gcn = GCN(An_mat_diag, X_mat_stack, [FLAGS.hidden1, K]) # gcn.train(train_idx_recal, z_vec[train_idx], val_idx_recal, z_vec[val_idx]) # test_res = gcn.evaluate(test_idx_recal, z_vec[test_idx], training=False) print("Dataset {}".format(FLAGS.dataset), "Test loss {:.4f}".format(test_res[0]), "test acc {:.4f}".format(test_res[1]))
def attack_surrogate(self, n_perturbations, perturb_structure=True, perturb_features=True, direct=True, n_influencers=0, delta_cutoff=0.004): """ Perform an attack on the surrogate model. Parameters ---------- n_perturbations: int The number of perturbations (structure or feature) to perform. perturb_structure: bool, default: True Indicates whether the structure can be changed. perturb_features: bool, default: True Indicates whether the features can be changed. direct: bool, default: True indicates whether to directly modify edges/features of the node attacked or only those of influencers. n_influencers: int, default: 0 Number of influencing nodes -- will be ignored if direct is True delta_cutoff: float The critical value for the likelihood ratio test of the power law distributions. See the Chi square distribution with one degree of freedom. Default value 0.004 corresponds to a p-value of roughly 0.95. Returns ------- None. """ assert not (direct is False and n_influencers == 0 ), "indirect mode requires at least one influencer node" assert n_perturbations > 0, "need at least one perturbation" assert perturb_features or perturb_structure, "either perturb_features or perturb_structure must be true" logits_start = self.compute_logits() best_wrong_class = self.strongest_wrong_class(logits_start) surrogate_losses = [ logits_start[self.label_u] - logits_start[best_wrong_class] ] if self.verbose: print("##### Starting attack #####") if perturb_structure and perturb_features: print( "##### Attack node with ID {} using structure and feature perturbations #####" .format(self.u)) elif perturb_features: print("##### Attack only using feature perturbations #####") elif perturb_structure: print("##### Attack only using structure perturbations #####") if direct: print("##### Attacking the node directly #####") else: print( "##### Attacking the node indirectly via {} influencer nodes #####" .format(n_influencers)) print("##### Performing {} perturbations #####".format( n_perturbations)) if perturb_structure: # Setup starting values of the likelihood ratio test. degree_sequence_start = self.adj_orig.sum(0).A1 current_degree_sequence = self.adj.sum(0).A1 d_min = 2 S_d_start = np.sum( np.log(degree_sequence_start[degree_sequence_start >= d_min])) current_S_d = np.sum( np.log( current_degree_sequence[current_degree_sequence >= d_min])) n_start = np.sum(degree_sequence_start >= d_min) current_n = np.sum(current_degree_sequence >= d_min) alpha_start = compute_alpha(n_start, S_d_start, d_min) log_likelihood_orig = compute_log_likelihood( n_start, alpha_start, S_d_start, d_min) if len(self.influencer_nodes) == 0: if not direct: # Choose influencer nodes infls, add_infls = self.get_attacker_nodes( n_influencers, add_additional_nodes=True) self.influencer_nodes = np.concatenate( (infls, add_infls)).astype("int") # Potential edges are all edges from any attacker to any other node, except the respective # attacker itself or the node being attacked. self.potential_edges = np.row_stack([ np.column_stack((np.tile(infl, self.N - 2), np.setdiff1d(np.arange(self.N), np.array([self.u, infl])))) for infl in self.influencer_nodes ]) if self.verbose: print("Influencer nodes: {}".format(self.influencer_nodes)) else: # direct attack influencers = [self.u] self.potential_edges = np.column_stack( (np.tile(self.u, self.N - 1), np.setdiff1d(np.arange(self.N), self.u))) self.influencer_nodes = np.array(influencers) self.potential_edges = self.potential_edges.astype("int32") for _ in range(n_perturbations): if self.verbose: print("##### ...{}/{} perturbations ... #####".format( _ + 1, n_perturbations)) if perturb_structure: # Do not consider edges that, if removed, result in singleton edges in the graph. singleton_filter = filter_singletons(self.potential_edges, self.adj) filtered_edges = self.potential_edges[singleton_filter] # Update the values for the power law likelihood ratio test. deltas = 2 * \ (1 - self.adj[tuple(filtered_edges.T)].toarray()[0]) - 1 d_edges_old = current_degree_sequence[filtered_edges] d_edges_new = current_degree_sequence[filtered_edges] + \ deltas[:, None] new_S_d, new_n = update_Sx(current_S_d, current_n, d_edges_old, d_edges_new, d_min) new_alphas = compute_alpha(new_n, new_S_d, d_min) new_ll = compute_log_likelihood(new_n, new_alphas, new_S_d, d_min) alphas_combined = compute_alpha(new_n + n_start, new_S_d + S_d_start, d_min) new_ll_combined = compute_log_likelihood( new_n + n_start, alphas_combined, new_S_d + S_d_start, d_min) new_ratios = -2 * new_ll_combined + \ 2 * (new_ll + log_likelihood_orig) # Do not consider edges that, if added/removed, would lead to a violation of the # likelihood ration Chi_square cutoff value. powerlaw_filter = filter_chisquare(new_ratios, delta_cutoff) filtered_edges_final = filtered_edges[powerlaw_filter] # Compute new entries in A_hat_square_uv a_hat_uv_new = self.compute_new_a_hat_uv(filtered_edges_final) # Compute the struct scores for each potential edge struct_scores = self.struct_score(a_hat_uv_new, self.compute_XW()) best_edge_ix = struct_scores.argmin() best_edge_score = struct_scores.min() best_edge = filtered_edges_final[best_edge_ix] if perturb_features: # Compute the feature scores for each potential feature perturbation feature_ixs, feature_scores = self.feature_scores() best_feature_ix = feature_ixs[0] best_feature_score = feature_scores[0] if perturb_structure and perturb_features: # decide whether to choose an edge or feature to change if best_edge_score < best_feature_score: if self.verbose: print("Edge perturbation: {}".format(best_edge)) change_structure = True else: if self.verbose: print( "Feature perturbation: {}".format(best_feature_ix)) change_structure = False elif perturb_structure: change_structure = True elif perturb_features: change_structure = False if change_structure: # perform edge perturbation self.adj[tuple(best_edge)] = self.adj[tuple( best_edge[::-1])] = 1 - self.adj[tuple(best_edge)] self.adj_preprocessed = utils.preprocess_graph(self.adj) self.structure_perturbations.append(tuple(best_edge)) self.feature_perturbations.append(()) surrogate_losses.append(best_edge_score) # Update likelihood ratio test values current_S_d = new_S_d[powerlaw_filter][best_edge_ix] current_n = new_n[powerlaw_filter][best_edge_ix] current_degree_sequence[best_edge] += deltas[powerlaw_filter][ best_edge_ix] else: self.X_obs[tuple(best_feature_ix)] = 1 - \ self.X_obs[tuple(best_feature_ix)] self.feature_perturbations.append(tuple(best_feature_ix)) self.structure_perturbations.append(()) surrogate_losses.append(best_feature_score)
flags.DEFINE_integer('gpu_id', None, 'Specify the GPU id') # config the CPU/GPU in TF, assume only one GPU is in use. # For multi-gpu setting, please refer to # https://www.tensorflow.org/guide/gpu#using_multiple_gpus gpus = tf.config.experimental.list_physical_devices('GPU') if len(gpus) == 0 or FLAGS.gpu_id is None: device_id = "/device:CPU:0" else: tf.config.experimental.set_visible_devices(gpus[FLAGS.gpu_id], 'GPU') device_id = '/device:GPU:0' A_mat, X_mat, z_vec, train_idx, val_idx, test_idx = load_data_planetoid( FLAGS.dataset) An_mat = preprocess_graph(A_mat) # N = A_mat.shape[0] K = z_vec.max() + 1 with tf.device(device_id): gcn = GCN(An_mat, X_mat, [FLAGS.hidden1, K]) train_stats = gcn.train(train_idx, z_vec[train_idx], val_idx, z_vec[val_idx]) train_losses = train_stats[0] val_losses = train_stats[1] train_accuracies = train_stats[2] val_accuracies = train_stats[3] with open("learned_lapl.pkl", "rb") as pkl: lrnd = pickle.load(pkl)
A_mat = A_mat + A_mat.T A_mat[A_mat > 1] = 1 lcc = utils.largest_connected_components(A_mat) A_mat = A_mat[lcc][:, lcc] assert np.abs(A_mat - A_mat.T).sum() == 0, "Input graph is not symmetric" assert A_mat.max() == 1 and len(np.unique(A_mat[A_mat.nonzero()].A1)) == 1, "Graph must be unweighted" assert A_mat.sum(0).A1.min() > 0, "Graph contains singleton nodes" X_mat = X_mat[lcc].astype('float32') z_vec = z_vec[lcc] _N = A_mat.shape[0] K = z_vec.max() + 1 Z_mat = np.eye(K)[z_vec] An_mat = utils.preprocess_graph(A_mat) sizes = [16, K] degrees = A_mat.sum(0).A1 val_size = int(_N * FLAGS.train_share) train_size = int(_N * 0.1) # train_size = _N - unlabeled_size - val_size unlabeled_size = _N - train_size - val_size train_idx, val_idx, test_idx = utils.train_val_test_split_tabular(np.arange(_N), train_size=train_size, val_size=val_size, test_size=unlabeled_size, stratify=z_vec) main()