def test_random_sphere(self): x = random_sphere(10, 10, 1, 1) self.assertEqual(x.shape, (10, 10)) self.assertTrue(np.all(np.sum(np.abs(x), axis=1) <= 1.0)) x = random_sphere(10, 10, 1, 2) self.assertTrue(np.all(np.linalg.norm(x, axis=1) < 1.0)) x = random_sphere(10, 10, 1, np.inf) self.assertTrue(np.all(np.abs(x) < 1.0))
def _compute(self, x, x_init, y, eps, eps_step, project, random_init): if random_init: n = x.shape[0] m = np.prod(x.shape[1:]) x_adv = x.astype(NUMPY_DTYPE) + random_sphere(n, m, eps, self.norm).reshape(x.shape).astype(NUMPY_DTYPE) if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None: clip_min, clip_max = self.classifier.clip_values x_adv = np.clip(x_adv, clip_min, clip_max) else: x_adv = x.astype(NUMPY_DTYPE) # Compute perturbation with implicit batching for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size batch = x_adv[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels) # Apply perturbation and clip x_adv[batch_index_1:batch_index_2] = self._apply_perturbation(batch, perturbation, eps_step) if project: perturbation = projection(x_adv[batch_index_1:batch_index_2] - x_init[batch_index_1:batch_index_2], eps, self.norm) x_adv[batch_index_1:batch_index_2] = x_init[batch_index_1:batch_index_2] + perturbation return x_adv
def _compute(self, x, y, eps, eps_step, random_init): if random_init: n = x.shape[0] m = np.prod(x.shape[1:]) adv_x = x.astype(NUMPY_DTYPE) + random_sphere( n, m, eps, self.norm).reshape(x.shape) if hasattr( self.classifier, 'clip_values') and self.classifier.clip_values is not None: clip_min, clip_max = self.classifier.clip_values adv_x = np.clip(adv_x, clip_min, clip_max) else: adv_x = x.astype(NUMPY_DTYPE) # Compute perturbation with implicit batching for batch_id in range( int(np.ceil(adv_x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = batch_id * self.batch_size, ( batch_id + 1) * self.batch_size batch = adv_x[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels) # Apply perturbation and clip adv_x[batch_index_1:batch_index_2] = self._apply_perturbation( batch, perturbation, eps_step) return adv_x
def _compute(self, x, y, eps, random_init): if random_init: n = x.shape[0] m = np.prod(x.shape[1:]) adv_x = x.copy() + random_sphere(n, m, eps, self.norm).reshape( x.shape) else: adv_x = x.copy() # Compute perturbation with implicit batching for batch_id in range( int(np.ceil(adv_x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = batch_id * self.batch_size, ( batch_id + 1) * self.batch_size batch = adv_x[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels) # Apply perturbation and clip adv_x[batch_index_1:batch_index_2] = self._apply_perturbation( batch, perturbation, eps) return adv_x
def _compute( self, x: np.ndarray, x_init: np.ndarray, y: np.ndarray, mask: Optional[np.ndarray], eps: float, eps_step: float, project: bool, random_init: bool, ) -> np.ndarray: if random_init: n = x.shape[0] m = np.prod(x.shape[1:]).item() random_perturbation = random_sphere(n, m, eps, self.norm).reshape(x.shape).astype(ART_NUMPY_DTYPE) if mask is not None: random_perturbation = random_perturbation * (mask.astype(ART_NUMPY_DTYPE)) x_adv = x.astype(ART_NUMPY_DTYPE) + random_perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_adv = np.clip(x_adv, clip_min, clip_max) else: x_adv = x.astype(ART_NUMPY_DTYPE) # Compute perturbation with implicit batching for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size batch = x_adv[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] mask_batch = mask if mask is not None: # Here we need to make a distinction: if the masks are different for each input, we need to index # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is. if len(mask.shape) == len(x.shape): mask_batch = mask[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels, mask_batch) # Apply perturbation and clip x_adv[batch_index_1:batch_index_2] = self._apply_perturbation(batch, perturbation, eps_step) if project: perturbation = projection( x_adv[batch_index_1:batch_index_2] - x_init[batch_index_1:batch_index_2], eps, self.norm ) x_adv[batch_index_1:batch_index_2] = x_init[batch_index_1:batch_index_2] + perturbation return x_adv
def clever_t( classifier: "CLASSIFIER_CLASS_LOSS_GRADIENTS_TYPE", x: np.ndarray, target_class: int, nb_batches: int, batch_size: int, radius: float, norm: int, c_init: float = 1.0, pool_factor: int = 10, ) -> float: """ Compute CLEVER score for a targeted attack. | Paper link: https://arxiv.org/abs/1801.10578 :param classifier: A trained model. :param x: One input sample. :param target_class: Targeted class. :param nb_batches: Number of repetitions of the estimate. :param batch_size: Number of random examples to sample per batch. :param radius: Radius of the maximum perturbation. :param norm: Current support: 1, 2, np.inf. :param c_init: Initialization of Weibull distribution. :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s. :return: CLEVER score. """ # Check if the targeted class is different from the predicted class y_pred = classifier.predict(np.array([x])) pred_class = np.argmax(y_pred, axis=1)[0] if target_class == pred_class: raise ValueError("The targeted class is the predicted class.") # Check if pool_factor is smaller than 1 if pool_factor < 1: raise ValueError("The `pool_factor` must be larger than 1.") # Some auxiliary vars rand_pool_grad_set = [] grad_norm_set = [] dim = reduce(lambda x_, y: x_ * y, x.shape, 1) shape = [pool_factor * batch_size] shape.extend(x.shape) # Generate a pool of samples rand_pool = np.reshape( random_sphere(nb_points=pool_factor * batch_size, nb_dims=dim, radius=radius, norm=norm), shape, ) rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0) rand_pool = rand_pool.astype(ART_NUMPY_DTYPE) if hasattr(classifier, "clip_values") and classifier.clip_values is not None: np.clip(rand_pool, classifier.clip_values[0], classifier.clip_values[1], out=rand_pool) # Change norm since q = p / (p-1) if norm == 1: norm = np.inf elif norm == np.inf: norm = 1 elif norm != 2: raise ValueError("Norm {} not supported".format(norm)) # Compute gradients for all samples in rand_pool for i in range(batch_size): rand_pool_batch = rand_pool[i * pool_factor:(i + 1) * pool_factor] # Compute gradients grad_pred_class = classifier.class_gradient(rand_pool_batch, label=pred_class) grad_target_class = classifier.class_gradient(rand_pool_batch, label=target_class) if np.isnan(grad_pred_class).any() or np.isnan( grad_target_class).any(): raise Exception("The classifier results NaN gradients.") grad = grad_pred_class - grad_target_class grad = np.reshape(grad, (pool_factor, -1)) grad = np.linalg.norm(grad, ord=norm, axis=1) rand_pool_grad_set.extend(grad) rand_pool_grads = np.array(rand_pool_grad_set) # Loop over the batches for _ in range(nb_batches): # Random selection of gradients grad_norm = rand_pool_grads[np.random.choice(pool_factor * batch_size, batch_size)] grad_norm = np.max(grad_norm) grad_norm_set.append(grad_norm) # Maximum likelihood estimation for max gradient norms [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set), c_init, optimizer=scipy_optimizer) # Compute function value values = classifier.predict(np.array([x])) value = values[:, pred_class] - values[:, target_class] # Compute scores score = np.min([-value[0] / loc, radius]) return score
def _batch_process(self, x_batch: np.ndarray, y_batch: np.ndarray, l_r: float) -> Tuple[float, float, float]: """ Perform the operations of FBF for a batch of data. See class documentation for more information on the exact procedure. :param x_batch: batch of x. :param y_batch: batch of y. :param l_r: learning rate for the optimisation step. """ import torch if self._classifier._optimizer is None: # pylint: disable=W0212 raise ValueError( "Optimizer of classifier is currently None, but is required for adversarial training." ) n = x_batch.shape[0] m = np.prod(x_batch.shape[1:]).item() delta = random_sphere(n, m, self._eps, np.inf).reshape( x_batch.shape).astype(ART_NUMPY_DTYPE) delta_grad = self._classifier.loss_gradient(x_batch + delta, y_batch) delta = np.clip(delta + 1.25 * self._eps * np.sign(delta_grad), -self._eps, +self._eps) if self._classifier.clip_values is not None: x_batch_pert = np.clip(x_batch + delta, self._classifier.clip_values[0], self._classifier.clip_values[1]) else: x_batch_pert = x_batch + delta # Apply preprocessing x_preprocessed, y_preprocessed = self._classifier._apply_preprocessing( # pylint: disable=W0212 x_batch_pert, y_batch, fit=True) # Check label shape if self._classifier._reduce_labels: # pylint: disable=W0212 y_preprocessed = np.argmax(y_preprocessed, axis=1) i_batch = torch.from_numpy(x_preprocessed).to(self._classifier._device) # pylint: disable=W0212 o_batch = torch.from_numpy(y_preprocessed).to(self._classifier._device) # pylint: disable=W0212 # Zero the parameter gradients self._classifier._optimizer.zero_grad() # pylint: disable=W0212 # Perform prediction model_outputs = self._classifier._model(i_batch) # pylint: disable=W0212 # Form the loss function loss = self._classifier._loss(model_outputs[-1], o_batch) # pylint: disable=W0212 self._classifier._optimizer.param_groups[0].update(lr=l_r) # pylint: disable=W0212 # Actual training if self._use_amp: import apex.amp as amp # pylint: disable=E0611 with amp.scale_loss(loss, self._classifier._optimizer) as scaled_loss: # pylint: disable=W0212 scaled_loss.backward() else: loss.backward() # clip the gradients torch.nn.utils.clip_grad_norm_(self._classifier._model.parameters(), 0.5) # pylint: disable=W0212 self._classifier._optimizer.step() # pylint: disable=W0212 train_loss = loss.item() * o_batch.size(0) train_acc = (model_outputs[0].max(1)[1] == o_batch).sum().item() train_n = o_batch.size(0) return train_loss, train_acc, train_n
def clever_t(classifier, x, target_class, nb_batches, batch_size, radius, norm, c_init=1, pool_factor=10): """ Compute CLEVER score for a targeted attack. Paper link: https://arxiv.org/abs/1801.10578 :param classifier: A trained model :type classifier: :class:`.Classifier` :param x: One input sample :type x: `np.ndarray` :param target_class: Targeted class :type target_class: `int` :param nb_batches: Number of repetitions of the estimate :type nb_batches: `int` :param batch_size: Number of random examples to sample per batch :type batch_size: `int` :param radius: Radius of the maximum perturbation :type radius: `float` :param norm: Current support: 1, 2, np.inf :type norm: `int` :param c_init: Initialization of Weibull distribution :type c_init: `float` :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s :type pool_factor: `int` :return: CLEVER score :rtype: `float` """ # Check if the targeted class is different from the predicted class y_pred = classifier.predict(np.array([x]), logits=True) pred_class = np.argmax(y_pred, axis=1)[0] if target_class == pred_class: raise ValueError("The targeted class is the predicted class.") # Check if pool_factor is smaller than 1 if pool_factor < 1: raise ValueError("The `pool_factor` must be larger than 1.") # Some auxiliary vars grad_norm_set = [] dim = reduce(lambda x_, y: x_ * y, x.shape, 1) shape = [pool_factor * batch_size] shape.extend(x.shape) # Generate a pool of samples rand_pool = np.reshape(random_sphere(nb_points=pool_factor * batch_size, nb_dims=dim, radius=radius, norm=norm), shape) rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0) rand_pool = rand_pool.astype(NUMPY_DTYPE) np.clip(rand_pool, classifier.clip_values[0], classifier.clip_values[1], out=rand_pool) # Change norm since q = p / (p-1) if norm == 1: norm = np.inf elif norm == np.inf: norm = 1 elif norm != 2: raise ValueError("Norm {} not supported".format(norm)) # Loop over the batches for _ in range(nb_batches): # Random generation of data points sample_xs = rand_pool[np.random.choice(pool_factor * batch_size, batch_size)] # Compute gradients grads = classifier.class_gradient(sample_xs, logits=True) if np.isnan(grads).any(): raise Exception("The classifier results NaN gradients.") grad = grads[:, pred_class] - grads[:, target_class] grad = np.reshape(grad, (batch_size, -1)) grad_norm = np.max(np.linalg.norm(grad, ord=norm, axis=1)) grad_norm_set.append(grad_norm) # Maximum likelihood estimation for max gradient norms [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set), c_init, optimizer=scipy_optimizer) # Compute function value values = classifier.predict(np.array([x]), logits=True) value = values[:, pred_class] - values[:, target_class] # Compute scores s = np.min([-value[0] / loc, radius]) return s
def main(args): mode = args.mode eps = args.eps norm = norm_list.get(args.norm) p_mod = args.p_mod temperature = args.temperature (x_train, y_train), (x_test, y_test), min_, max_ = load_data() x_train = x_train[:500] y_train = y_train[:500] model = VGG('VGG16') model.load_state_dict(torch.load("./logs/pytorch_vgg16.model")) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-2) classifier = PyTorchClassifier(model=model, clip_values=(min_, max_), loss=criterion, optimizer=optimizer, input_shape=(3, 32, 32), nb_classes=10) predictions = classifier.predict(x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len( y_test) print('Accuracy on benign test examples: {}%'.format(accuracy * 100)) x = x_train.copy().astype(np.float32) nb_instances = len(x) pred_y = classifier.predict(x) nb_dims = x.shape[1] * x.shape[2] * x.shape[3] # 変更を加えるピクセル数 n_mod = int(round(nb_dims * p_mod)) v = random_sphere(nb_points=1, nb_dims=nb_dims, radius=eps, norm=norm) v = v.reshape(1, x.shape[1], x.shape[2], x.shape[3]) v = np.array(v, dtype='float32') v_max = v.copy() current_y = classifier.predict(x + v) fooling_rate = loss_fn(pred_y, current_y, mode=mode) fooling_rate_max = fooling_rate # Go through the data set and compute the perturbation increments sequentially for j in range(len(x) * 1000): v_ = v.reshape(nb_dims).copy() # 変更を加えるピクセルの番地 idx = random.sample(range(len(v_)), n_mod) # 正規分布からの乱数で置き換え #v_[idx] = np.random.normal(loc=0.0, scale=np.std(v_), size=n_mod) # 正規分布からの乱数を加算 v_[idx] = v_[idx] + np.random.normal( loc=0.0, scale=np.std(v_), size=n_mod) # 均一分布からの乱数を加算 #v_[idx] = v_[idx] + np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 均一分布から乱数で置き換え #v_[idx] = np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 摂動が任意の長さに収まるように射影 v_ = projection(v_.reshape(1, 3, 32, 32), eps, norm) # fooling rate current_y = classifier.predict(x + v_) fooling_rate_ = loss_fn(pred_y, current_y, mode=mode) # 判定 if random.random() < np.exp( (fooling_rate_ - fooling_rate_max) / (temperature + 1e-10)): print(j, fooling_rate_, fooling_rate_max, temperature, np.linalg.norm(v_)) v = v_.copy() if fooling_rate_max < fooling_rate_: fooling_rate_max = fooling_rate_ v_max = v_.copy() fooling_rate = fooling_rate_ temperature = 0.99 * temperature # Compute fooling rate adv_x = x + v_max plot_image(v_max[0], 'sample1.png') plot_image(adv_x[1], 'sample2.png') plot_image(x[1], 'sample3.png') adv_y = classifier.predict(adv_x) fooling_rate = loss_fn(pred_y, adv_y, mode="fool_rate") print(fooling_rate)
default='../COVID-Net/test_split_v3.txt', type=str, help='Path to testfile') parser.add_argument('--norm', type=str, default='inf') parser.add_argument('--eps', type=float, default=0.02) args = parser.parse_args() (x_train, y_train), (x_test, y_test), (mean_l2_train, mean_inf_train), norm, eps, classifier = set_up(args) # # Generate adversarial examples noise_rand = random_sphere(nb_points=1, nb_dims=(224 * 224 * 1), radius=eps, norm=norm) noise_rand = noise_rand.reshape(224, 224, 1).astype('float32') noise_rand = np.concatenate((noise_rand, ) * 3, axis=-1) noise_rand = noise_rand.astype(np.float32) np.save('output/random_uap', noise_rand) # # Evaluate the ART classifier on adversarial examples acc_train, preds_train = get_preds(classifier, x_train, y_train) acc_test, preds_test = get_preds(classifier, x_test, y_test) x_train_adv_rand = x_train + noise_rand x_test_adv_rand = x_test + noise_rand
def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Generate adversarial samples and return them in an array. :param x: An array with the original inputs. :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`. :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations. Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any features for which the mask is zero will not be adversarially perturbed. :type mask: `np.ndarray` :return: An array holding the adversarial examples. """ mask = kwargs.get("mask") y = check_and_transform_label_format(y, self.estimator.nb_classes) if y is None: if self.targeted: raise ValueError( "Target labels `y` need to be provided for a targeted attack." ) y = get_labels_np_array( self.estimator.predict(x, batch_size=self.batch_size)).astype( np.int32) x_adv = x.astype(ART_NUMPY_DTYPE) for _ in trange(max(1, self.nb_random_init), desc="AutoPGD - restart", disable=not self.verbose): # Get classifier scores estimator_y_scores = self.estimator.predict(x_adv) # get the detector prediction (1 means the sample is predicted # as malicious, 0 as benign). detector_scores = self.detector.predict(x_adv) detector_pred = np.argmax(detector_scores, axis=1) #print("num detected adv {:}/{:}".format( np.sum(detector_pred # ==1), # detector_pred.size)) # the element of sample_is_robust will be 0 if the sample is # classified as the attacker wants, 1 otherwise if self.targeted: sample_is_robust = np.argmax(estimator_y_scores, axis=1) != np.argmax(y, axis=1) elif not self.targeted: sample_is_robust = np.argmax(estimator_y_scores, axis=1) == np.argmax(y, axis=1) # 1 if the sample is still correct (not classified as the target # class) or detected as adversarial example by the detector sample_is_robust = np.logical_or(sample_is_robust, detector_pred) # stop the attack if all the samples are classified as the # attacker want: misclassied (classified as the target class) # and predicted by the detector as benign samples if np.sum(sample_is_robust) == 0: break x_robust = x_adv[sample_is_robust] y_robust = y[sample_is_robust] x_init = x[sample_is_robust] n = x_robust.shape[0] m = np.prod(x_robust.shape[1:]).item() random_perturbation = (random_sphere( n, m, self.eps, self.norm).reshape(x_robust.shape).astype(ART_NUMPY_DTYPE)) x_robust = x_robust + random_perturbation / 10 # / 100.0 if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_robust = np.clip(x_robust, clip_min, clip_max) perturbation = projection(x_robust - x_init, self.eps, self.norm) x_robust = x_init + perturbation # Compute perturbation with implicit batching for batch_id in trange( int(np.ceil(x_robust.shape[0] / float(self.batch_size))), desc="AutoPGD - batch", leave=False, disable=not self.verbose, ): self.eta = 2 * self.eps_step batch_index_1, batch_index_2 = batch_id * self.batch_size, ( batch_id + 1) * self.batch_size x_k = x_robust[batch_index_1:batch_index_2].astype( ART_NUMPY_DTYPE) x_init_batch = x_init[batch_index_1:batch_index_2].astype( ART_NUMPY_DTYPE) y_batch = y_robust[batch_index_1:batch_index_2] p_0 = 0 p_1 = 0.22 W = [p_0, p_1] while True: p_j_p_1 = W[-1] + max(W[-1] - W[-2] - 0.03, 0.06) if p_j_p_1 > 1: break W.append(p_j_p_1) W = [math.ceil(p * self.max_iter) for p in W] eta = self.eps_step self.count_condition_1 = 0 for k_iter in trange(self.max_iter, desc="AutoPGD - iteration", leave=False, disable=not self.verbose): # Get perturbation, use small scalar to avoid division by 0 tol = 10e-8 # Get loss gradient wrt input; invert it if attack is # targeted grad = self._cmpt_grad(x_k, y_batch) assert x_k.shape == grad.shape perturbation = grad if mask is not None: perturbation = perturbation * ( mask.astype(ART_NUMPY_DTYPE)) # Apply perturbation and clip z_k_p_1 = x_k + eta * perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values z_k_p_1 = np.clip(z_k_p_1, clip_min, clip_max) if k_iter == 0: x_1 = z_k_p_1 perturbation = projection(x_1 - x_init_batch, self.eps, self.norm) x_1 = x_init_batch + perturbation f_0 = self._cmpt_loss_func(x_k, y_batch) f_1 = self._cmpt_loss_func(x_1, y_batch) self.eta_w_j_m_1 = eta self.f_max_w_j_m_1 = f_0 if f_1 >= f_0: self.f_max = f_1 self.x_max = x_1 self.x_max_m_1 = x_init_batch self.count_condition_1 += 1 else: self.f_max = f_0 self.x_max = x_k.copy() self.x_max_m_1 = x_init_batch # Settings for next iteration k x_k_m_1 = x_k.copy() x_k = x_1 else: perturbation = projection(z_k_p_1 - x_init_batch, self.eps, self.norm) z_k_p_1 = x_init_batch + perturbation alpha = 0.75 x_k_p_1 = x_k + alpha * (z_k_p_1 - x_k) + ( 1 - alpha) * (x_k - x_k_m_1) if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max) perturbation = projection(x_k_p_1 - x_init_batch, self.eps, self.norm) x_k_p_1 = x_init_batch + perturbation f_k_p_1 = self._cmpt_loss_func(x_k_p_1, y_batch) if f_k_p_1 > self.f_max: self.count_condition_1 += 1 self.x_max = x_k_p_1 self.x_max_m_1 = x_k self.f_max = f_k_p_1 if k_iter in W: rho = 0.75 condition_1 = self.count_condition_1 < rho * ( k_iter - W[W.index(k_iter) - 1]) condition_2 = self.eta_w_j_m_1 == eta and self.f_max_w_j_m_1 == self.f_max if condition_1 or condition_2: eta = eta / 2 x_k_m_1 = self.x_max_m_1 x_k = self.x_max else: x_k_m_1 = x_k x_k = x_k_p_1.copy() self.count_condition_1 = 0 self.eta_w_j_m_1 = eta self.f_max_w_j_m_1 = self.f_max else: x_k_m_1 = x_k x_k = x_k_p_1.copy() # at the enx of the iteration apply the detector's clips if self.detector_clip_fun is not None: x_k = self.detector_clip_fun(x_k, self.estimator) y_pred_adv_k = self.estimator.predict(x_k) # get the detector prediction. detector_scores_k = self.detector.predict(x_k) detector_pred_k = np.argmax(detector_scores_k, axis=1) # invert the detector prediction inv_detector_pred_k = np.invert(detector_pred_k) #print("num detected adv {:}/{:}".format( # np.sum(detector_pred_k == 1), # detector_pred_k.size)) # the element of sample_is_not_robust_k will be 1 if the # sample is classified as the attacker wants, 0 otherwise if self.targeted: # invert makes the bitwise not. sample_is_not_robust_k = np.invert( np.argmax(y_pred_adv_k, axis=1) != np.argmax(y_batch, axis=1)) elif not self.targeted: sample_is_not_robust_k = np.invert( np.argmax(y_pred_adv_k, axis=1) == np.argmax(y_batch, axis=1)) # to be classified as the attacker want the samples should # be misclassified (classified as the target) and not # classified by the detector as a malicious sample sample_is_not_robust_k = np.logical_and( sample_is_not_robust_k, # (1 if classified by the # detector as benign) inv_detector_pred_k) x_robust[batch_index_1:batch_index_2][ sample_is_not_robust_k] = x_k[sample_is_not_robust_k] x_adv[sample_is_robust] = x_robust if self.detector_clip_fun is not None: x_adv = self.detector_clip_fun(x_adv, self.estimator) return x_adv
x_train_adv = x_train + noise x_test_adv = x_test + noise preds_train_adv = np.argmax(classifier.predict(x_train_adv), axis=1) preds_test_adv = np.argmax(classifier.predict(x_test_adv), axis=1) targeted_success_rate_train = np.sum(preds_train_adv == target) / len(x_train) targeted_success_rate_test = np.sum(preds_test_adv == target) / len(x_test) print('targeted_success_rate_train: {:.1f} %'.format(targeted_success_rate_train*100)) print('targeted_success_rate_test: {:.1f} %'.format(targeted_success_rate_test*100)) np.save('noise.npy', noise) # random noise result print('=== Random Noise ===') rescaled_noise_rand = random_sphere(nb_points=1,nb_dims=(32*32*3),radius=norm2_ori,norm=2) rescaled_noise_rand = rescaled_noise_rand.reshape(32,32,3) noise_rand = rescaled_noise_rand.copy() for i in range(3): noise_rand[:, :, i] = noise_rand[:, :, i] * channel_std[i] norm2_rand = np.linalg.norm(noise_rand.flatten(), ord=2) normInf_rand = np.abs(noise_rand.flatten()).max() print('norm2_rand: {:.1f} %'.format(norm2_rand/norm2_mean*100)) x_train_adv_rand = x_train + noise_rand x_test_adv_rand = x_test + noise_rand preds_train_adv_rand = np.argmax(classifier.predict(x_train_adv_rand), axis=1) preds_test_adv_rand = np.argmax(classifier.predict(x_test_adv_rand), axis=1) targeted_success_rate_train_rand = np.sum(preds_train_adv_rand == target) / len(x_train) targeted_success_rate_test_rand = np.sum(preds_test_adv_rand == target) / len(x_test)
raise ValueError('limit_first_n_inner is greater than number of samples per cycle') print(f"Only takes into account the first {args.limit_first_n_inner} samples per cycle.") for i, cycle_list in enumerate(path_cycles): path_cycles[i] = cycle_list[0:args.limit_first_n_inner] # shuffle cycles if args.shuffle_outer: shuffle(path_cycles) # random init X_adv_tmp = X.copy() if args.n_random_init_outer == 1: n = X_adv_tmp.shape[0] m = np.prod(X_adv_tmp.shape[1:]) X_adv_tmp = X_adv_tmp + ( random_sphere(n, m, radius=args.max_norm, norm=args.norm).reshape(X_adv_tmp.shape).astype(ART_NUMPY_DTYPE) ) X_adv_tmp = np.clip(X_adv_tmp, data.min_pixel_value, data.max_pixel_value) elif args.n_random_init_outer > 1: raise NotImplementedError("Multiple random restarts not implemented yet. Try 0 or 1.") if args.ensemble_outer > 1: raise NotImplementedError('Ensembling cycles not implemented yet.') # create stats df df_metrics = pd.DataFrame(columns=['outer_iter', 'inner_iter', 'acc_ensemble_prob', 'acc_ensemble_logit', 'norm_mean', 'norm_min', 'norm_max', 'time']) # compute benign acc acc_ens_prob, acc_ens_logit = compute_accuracy_ensemble(models_dir=args.dir_models, X=X, y=y, data=data) print(f"Accuracy on ensemble benign test examples: {acc_ens_prob*100:.3f}% (prob ens), {acc_ens_logit*100:.3f}% (logit ens)") lpnorm = compute_norm(X_adv=X_adv_tmp, X=X, norm=args.norm)
def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray: """ Generate adversarial samples and return them in an array. :param x: An array with the original inputs. :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`. :param mask: An array with a mask broadcastable to input `x` defining where to apply adversarial perturbations. Shape needs to be broadcastable to the shape of x and can also be of the same shape as `x`. Any features for which the mask is zero will not be adversarially perturbed. :type mask: `np.ndarray` :return: An array holding the adversarial examples. """ mask = kwargs.get("mask") y = check_and_transform_label_format(y, self.estimator.nb_classes) if y is None: if self.targeted: raise ValueError("Target labels `y` need to be provided for a targeted attack.") y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size)).astype(np.int32) x_adv = x.astype(ART_NUMPY_DTYPE) for _ in trange(max(1, self.nb_random_init), desc="AutoPGD - restart", disable=not self.verbose): # Determine correctly predicted samples y_pred = self.estimator.predict(x_adv) if self.targeted: sample_is_robust = np.argmax(y_pred, axis=1) != np.argmax(y, axis=1) elif not self.targeted: sample_is_robust = np.argmax(y_pred, axis=1) == np.argmax(y, axis=1) if np.sum(sample_is_robust) == 0: break x_robust = x_adv[sample_is_robust] y_robust = y[sample_is_robust] x_init = x[sample_is_robust] n = x_robust.shape[0] m = np.prod(x_robust.shape[1:]).item() random_perturbation = ( random_sphere(n, m, self.eps, self.norm).reshape(x_robust.shape).astype(ART_NUMPY_DTYPE) ) x_robust = x_robust + random_perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_robust = np.clip(x_robust, clip_min, clip_max) perturbation = projection(x_robust - x_init, self.eps, self.norm) x_robust = x_init + perturbation # Compute perturbation with implicit batching for batch_id in trange( int(np.ceil(x_robust.shape[0] / float(self.batch_size))), desc="AutoPGD - batch", leave=False, disable=not self.verbose, ): self.eta = 2 * self.eps_step batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size x_k = x_robust[batch_index_1:batch_index_2].astype(ART_NUMPY_DTYPE) x_init_batch = x_init[batch_index_1:batch_index_2].astype(ART_NUMPY_DTYPE) y_batch = y_robust[batch_index_1:batch_index_2] p_0 = 0 p_1 = 0.22 W = [p_0, p_1] while True: p_j_p_1 = W[-1] + max(W[-1] - W[-2] - 0.03, 0.06) if p_j_p_1 > 1: break W.append(p_j_p_1) W = [math.ceil(p * self.max_iter) for p in W] eta = self.eps_step self.count_condition_1 = 0 for k_iter in trange(self.max_iter, desc="AutoPGD - iteration", leave=False, disable=not self.verbose): # Get perturbation, use small scalar to avoid division by 0 tol = 10e-8 # Get gradient wrt loss; invert it if attack is targeted grad = self.estimator.loss_gradient(x_k, y_batch) * (1 - 2 * int(self.targeted)) # Apply norm bound if self.norm in [np.inf, "inf"]: grad = np.sign(grad) elif self.norm == 1: ind = tuple(range(1, len(x_k.shape))) grad = grad / (np.sum(np.abs(grad), axis=ind, keepdims=True) + tol) elif self.norm == 2: ind = tuple(range(1, len(x_k.shape))) grad = grad / (np.sqrt(np.sum(np.square(grad), axis=ind, keepdims=True)) + tol) assert x_k.shape == grad.shape perturbation = grad if mask is not None: perturbation = perturbation * (mask.astype(ART_NUMPY_DTYPE)) # Apply perturbation and clip z_k_p_1 = x_k + eta * perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values z_k_p_1 = np.clip(z_k_p_1, clip_min, clip_max) if k_iter == 0: x_1 = z_k_p_1 perturbation = projection(x_1 - x_init_batch, self.eps, self.norm) x_1 = x_init_batch + perturbation f_0 = self.estimator.compute_loss(x=x_k, y=y_batch, reduction="mean") f_1 = self.estimator.compute_loss(x=x_1, y=y_batch, reduction="mean") self.eta_w_j_m_1 = eta self.f_max_w_j_m_1 = f_0 if f_1 >= f_0: self.f_max = f_1 self.x_max = x_1 self.x_max_m_1 = x_init_batch self.count_condition_1 += 1 else: self.f_max = f_0 self.x_max = x_k.copy() self.x_max_m_1 = x_init_batch # Settings for next iteration k x_k_m_1 = x_k.copy() x_k = x_1 else: perturbation = projection(z_k_p_1 - x_init_batch, self.eps, self.norm) z_k_p_1 = x_init_batch + perturbation alpha = 0.75 x_k_p_1 = x_k + alpha * (z_k_p_1 - x_k) + (1 - alpha) * (x_k - x_k_m_1) if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_k_p_1 = np.clip(x_k_p_1, clip_min, clip_max) perturbation = projection(x_k_p_1 - x_init_batch, self.eps, self.norm) x_k_p_1 = x_init_batch + perturbation f_k_p_1 = self.estimator.compute_loss(x=x_k_p_1, y=y_batch, reduction="mean") if f_k_p_1 == 0.0: x_k = x_k_p_1.copy() break if (not self.targeted and f_k_p_1 > self.f_max) or (self.targeted and f_k_p_1 < self.f_max): self.count_condition_1 += 1 self.x_max = x_k_p_1 self.x_max_m_1 = x_k self.f_max = f_k_p_1 if k_iter in W: rho = 0.75 condition_1 = self.count_condition_1 < rho * (k_iter - W[W.index(k_iter) - 1]) condition_2 = self.eta_w_j_m_1 == eta and self.f_max_w_j_m_1 == self.f_max if condition_1 or condition_2: eta = eta / 2 x_k_m_1 = self.x_max_m_1 x_k = self.x_max else: x_k_m_1 = x_k x_k = x_k_p_1.copy() self.count_condition_1 = 0 self.eta_w_j_m_1 = eta self.f_max_w_j_m_1 = self.f_max else: x_k_m_1 = x_k x_k = x_k_p_1.copy() y_pred_adv_k = self.estimator.predict(x_k) if self.targeted: sample_is_not_robust_k = np.invert(np.argmax(y_pred_adv_k, axis=1) != np.argmax(y_batch, axis=1)) elif not self.targeted: sample_is_not_robust_k = np.invert(np.argmax(y_pred_adv_k, axis=1) == np.argmax(y_batch, axis=1)) x_robust[batch_index_1:batch_index_2][sample_is_not_robust_k] = x_k[sample_is_not_robust_k] x_adv[sample_is_robust] = x_robust return x_adv
model = load_model( dataset=args.dataset, nb_class=y_train.shape[1], model_type=args.model, mode='inference' ) # # Generate adversarial examples classifier, norm, eps = set_art( model, args.norm, args.eps, mean_l2_train, mean_inf_train) h, w, c = X_train.shape[1], X_train.shape[2], X_train.shape[3] noise = random_sphere(nb_points=1, nb_dims=(h * w * c), radius=eps, norm=norm) noise = noise.reshape(h, w, c).astype('float32') base_f = 'random_{}_{}_eps{:.3f}'.format( args.model, args.norm, args.eps) save_f_noise = 'result/{}/noise/{}'.format(args.dataset, base_f) np.save(save_f_noise, noise) # # Evaluate the ART classifier on adversarial examples preds_train = np.argmax(classifier.predict(X_train), axis=1) preds_test = np.argmax(classifier.predict(X_test), axis=1) X_train_adv = X_train + noise X_test_adv = X_test + noise
def _compute_tf( self, x: "tf.Tensor", x_init: "tf.Tensor", y: "tf.Tensor", mask: "tf.Tensor", eps: float, eps_step: float, random_init: bool, ) -> "tf.Tensor": """ Compute adversarial examples for one iteration. :param x: Current adversarial examples. :param x_init: An array with the original inputs. :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices of shape (nb_samples,). Only provide this parameter if you'd like to use true labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is `None`. :param mask: An array with a mask to be applied to the adversarial perturbations. Shape needs to be broadcastable to the shape of x. Any features for which the mask is zero will not be adversarially perturbed. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param random_init: Random initialisation within the epsilon ball. For random_init=False starting at the original input. :return: Adversarial examples. """ import tensorflow as tf # lgtm [py/repeated-import] if random_init: n = x.shape[0] m = np.prod(x.shape[1:]) random_perturbation = random_sphere(n, m, eps, self.norm).reshape( x.shape).astype(ART_NUMPY_DTYPE) random_perturbation = tf.convert_to_tensor(random_perturbation) if mask is not None: random_perturbation = random_perturbation * mask x_adv = x + random_perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_adv = tf.clip_by_value(x_adv, clip_min, clip_max) else: x_adv = x # Get perturbation perturbation = self._compute_perturbation(x_adv, y, mask) # Apply perturbation and clip x_adv = self._apply_perturbation(x_adv, perturbation, eps_step) # Do projection perturbation = self._projection(x_adv - x_init, eps, self.norm) # Recompute x_adv x_adv = tf.add(perturbation, x_init) return x_adv
def simulated_anniling(classifier, X, args): mode = args.mode eps = args.eps norm = norm_list.get(args.norm) p_mod = args.p_mod x = X.copy().astype(np.float32) nb_instances = len(x) pred_y = classifier.predict(x) nb_dims = x.shape[1] * x.shape[2] * x.shape[3] # 変更を加えるピクセル数 n_mod = int(round(nb_dims * p_mod)) v = random_sphere(nb_points=1, nb_dims=nb_dims, radius=eps, norm=norm) v = v.reshape(1, x.shape[1], x.shape[2], x.shape[3]) v = np.array(v, dtype='float32') v_max = v.copy() current_y = classifier.predict(x + v) fooling_rate = loss_fn(pred_y, current_y, mode=mode) fooling_rate_max = fooling_rate # 初期温度 temperature = 100 # Go through the data set and compute the perturbation increments sequentially for j in range(len(x)): v_ = v.reshape(nb_dims).copy() # 変更を加えるピクセルの番地 idx = random.sample(range(len(v_)), n_mod) # 正規分布からの乱数で置き換え #v_[idx] = np.random.normal(loc=0.0, scale=np.std(v_), size=n_mod) # 正規分布からの乱数を加算 v_[idx] = v_[idx] + np.random.normal( loc=0.0, scale=np.std(v_), size=n_mod) # 均一分布からの乱数を加算 #v_[idx] = v_[idx] + np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 均一分布から乱数で置き換え #v_[idx] = np.random.uniform(low=v_.min(), high=v_.max(), size=n_mod) # 摂動が任意の長さに収まるように射影 v_ = projection(v_.reshape(1, 3, 32, 32), eps, norm) # fooling rate current_y = classifier.predict(x + v_) fooling_rate_ = loss_fn(pred_y, current_y, mode=mode) # 判定 if random.random() < np.exp( (fooling_rate_ - fooling_rate_max) / temperature): print(j, fooling_rate_, fooling_rate_max, temperature, np.linalg.norm(v_)) v = v_.copy() if fooling_rate_max < fooling_rate_: fooling_rate_max = fooling_rate_ v_max = v_.copy() fooling_rate = fooling_rate_ temperature = 0.99 * temperature del pred_y del x gc.collect() print(f"foolig_rate = {fooling_rate_max}") return v_max
def _compute( self, x: np.ndarray, x_init: np.ndarray, y: np.ndarray, mask: Optional[np.ndarray], eps: Union[int, float, np.ndarray], eps_step: Union[int, float, np.ndarray], project: bool, random_init: bool, batch_id_ext: Optional[int] = None, ) -> np.ndarray: if random_init: n = x.shape[0] m = np.prod(x.shape[1:]).item() random_perturbation = random_sphere(n, m, eps, self.norm).reshape( x.shape).astype(ART_NUMPY_DTYPE) if mask is not None: random_perturbation = random_perturbation * ( mask.astype(ART_NUMPY_DTYPE)) x_adv = x.astype(ART_NUMPY_DTYPE) + random_perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_adv = np.clip(x_adv, clip_min, clip_max) else: if x.dtype == object: x_adv = x.copy() else: x_adv = x.astype(ART_NUMPY_DTYPE) # Compute perturbation with implicit batching for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))): if batch_id_ext is None: self._batch_id = batch_id else: self._batch_id = batch_id_ext batch_index_1, batch_index_2 = batch_id * self.batch_size, ( batch_id + 1) * self.batch_size batch_index_2 = min(batch_index_2, x.shape[0]) batch = x_adv[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] mask_batch = mask if mask is not None: # Here we need to make a distinction: if the masks are different for each input, we need to index # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is. if len(mask.shape) == len(x.shape): mask_batch = mask[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels, mask_batch) # Compute batch_eps and batch_eps_step if isinstance(eps, np.ndarray) and isinstance( eps_step, np.ndarray): if len(eps.shape) == len( x.shape) and eps.shape[0] == x.shape[0]: batch_eps = eps[batch_index_1:batch_index_2] batch_eps_step = eps_step[batch_index_1:batch_index_2] else: batch_eps = eps batch_eps_step = eps_step else: batch_eps = eps batch_eps_step = eps_step # Apply perturbation and clip x_adv[batch_index_1:batch_index_2] = self._apply_perturbation( batch, perturbation, batch_eps_step) if project: if x_adv.dtype == object: for i_sample in range(batch_index_1, batch_index_2): if isinstance( batch_eps, np.ndarray ) and batch_eps.shape[0] == x_adv.shape[0]: perturbation = projection( x_adv[i_sample] - x_init[i_sample], batch_eps[i_sample], self.norm) else: perturbation = projection( x_adv[i_sample] - x_init[i_sample], batch_eps, self.norm) x_adv[i_sample] = x_init[i_sample] + perturbation else: perturbation = projection( x_adv[batch_index_1:batch_index_2] - x_init[batch_index_1:batch_index_2], batch_eps, self.norm) x_adv[batch_index_1:batch_index_2] = x_init[ batch_index_1:batch_index_2] + perturbation return x_adv
def _batch_process(self, x_batch, y_batch, lr): """ Perform the operations of FBF for a batch of data. See class documentation for more information on the exact procedure. :param x_batch: batch of x. :type x_batch: `np.ndarray` :param y_batch: batch of y. :type y_batch: `np.ndarray` :param lr: learning rate for the optimisation step. :type lr: `float` :return: `(float, float, float)` """ import torch n = x_batch.shape[0] m = np.prod(x_batch.shape[1:]) delta = random_sphere(n, m, self._eps, np.inf).reshape( x_batch.shape).astype(ART_NUMPY_DTYPE) delta_grad = self._classifier.loss_gradient(x_batch + delta, y_batch) delta = np.clip(delta + 1.25 * self._eps * np.sign(delta_grad), -self._eps, +self._eps) x_batch_pert = np.clip(x_batch + delta, self._classifier.clip_values[0], self._classifier.clip_values[1]) # Apply preprocessing x_preprocessed, y_preprocessed = self._classifier._apply_preprocessing( x_batch_pert, y_batch, fit=True) # Check label shape if self._classifier._reduce_labels: y_preprocessed = np.argmax(y_preprocessed, axis=1) i_batch = torch.from_numpy(x_preprocessed).to(self._classifier._device) o_batch = torch.from_numpy(y_preprocessed).to(self._classifier._device) # Zero the parameter gradients self._classifier._optimizer.zero_grad() # Perform prediction model_outputs = self._classifier._model(i_batch) # Form the loss function loss = self._classifier._loss(model_outputs[-1], o_batch) self._classifier._optimizer.param_groups[0].update(lr=lr) # Actual training if self._use_amp: import apex.amp as amp with amp.scale_loss(loss, self._classifier._optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # clip the gradients torch.nn.utils.clip_grad_norm_(self._classifier._model.parameters(), 0.5) self._classifier._optimizer.step() train_loss = loss.item() * o_batch.size(0) train_acc = (model_outputs[0].max(1)[1] == o_batch).sum().item() train_n = o_batch.size(0) return train_loss, train_acc, train_n