def __init__(self, inputs, labels, adv_inputs, adv_preds, targeted=False, target_label=None): self._inputs, self._labels = check_pair_numpy_param( 'inputs', inputs, 'labels', labels) self._adv_inputs, self._adv_preds = check_pair_numpy_param( 'adv_inputs', adv_inputs, 'adv_preds', adv_preds) targeted = check_param_type('targeted', targeted, bool) self._targeted = targeted if target_label is not None: target_label = check_numpy_param('target_label', target_label) self._target_label = target_label self._true_label = np.argmax(self._labels, axis=1) self._adv_label = np.argmax(self._adv_preds, axis=1) idxes = np.arange(self._adv_preds.shape[0]) if self._targeted: if target_label is None: msg = 'targeted attack need target_label, but got None.' LOGGER.error(TAG, msg) raise ValueError(msg) self._adv_preds, self._target_label = check_pair_numpy_param( 'adv_pred', self._adv_preds, 'target_label', target_label) self._success_idxes = idxes[self._adv_label == self._target_label] else: self._success_idxes = idxes[self._adv_label != self._true_label]
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and origin/target labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (Union[numpy.ndarray, tuple]): Original/target labels. \ For each input if it has more than one label, it is wrapped in a tuple. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv_x = attack.generate([[0.5, 0.2, 0.6], >>> [0.3, 0, 0.2]], >>> [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0], >>> [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]) """ if isinstance(labels, tuple): for i, labels_item in enumerate(labels): inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels[{}]'.format(i), labels_item) else: inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels', labels) arr_x = inputs momentum = 0 if self._bounds is not None: clip_min, clip_max = self._bounds clip_diff = clip_max - clip_min for _ in range(self._nb_iter): if 'self._prob' in globals(): d_inputs = _transform_inputs(inputs, self._prob) else: d_inputs = inputs gradient = self._gradient(d_inputs, labels) momentum = self._decay_factor * momentum + gradient adv_x = d_inputs + self._eps_iter * np.sign(momentum) perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff, self._eps * clip_diff) adv_x = arr_x + perturs adv_x = np.clip(adv_x, clip_min, clip_max) inputs = adv_x else: for _ in range(self._nb_iter): if 'self._prob' in globals(): d_inputs = _transform_inputs(inputs, self._prob) else: d_inputs = inputs gradient = self._gradient(d_inputs, labels) momentum = self._decay_factor * momentum + gradient adv_x = d_inputs + self._eps_iter * np.sign(momentum) adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps) inputs = adv_x return adv_x
def generate(self, inputs, labels): """ Iteratively generate adversarial examples based on BIM method. The perturbation is normalized by projected method with parameter norm_level . Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (Union[numpy.ndarray, tuple]): Original/target labels. \ For each input if it has more than one label, it is wrapped in a tuple. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv_x = attack.generate([[0.6, 0.2, 0.6], >>> [0.3, 0.3, 0.4]], >>> [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1], >>> [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) """ if isinstance(labels, tuple): for i, labels_item in enumerate(labels): inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels[{}]'.format(i), labels_item) else: inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels', labels) arr_x = inputs if self._bounds is not None: clip_min, clip_max = self._bounds clip_diff = clip_max - clip_min for _ in range(self._nb_iter): adv_x = self._attack.generate(inputs, labels) perturs = _projection(adv_x - arr_x, self._eps, norm_level=self._norm_level) perturs = np.clip(perturs, (0 - self._eps) * clip_diff, self._eps * clip_diff) adv_x = arr_x + perturs inputs = adv_x else: for _ in range(self._nb_iter): adv_x = self._attack.generate(inputs, labels) perturs = _projection(adv_x - arr_x, self._eps, norm_level=self._norm_level) adv_x = arr_x + perturs adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps) inputs = adv_x return adv_x
def generate(self, inputs, labels): """ Generate adversarial examples based on input samples and original/target labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, generated adversarial examples. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) self._dtype = inputs.dtype gradient = self._gradient(inputs, labels) # use random method or not if self._alpha is not None: random_part = self._alpha * np.sign( np.random.normal(size=inputs.shape)).astype(self._dtype) perturbation = (self._eps - self._alpha) * gradient + random_part else: perturbation = self._eps * gradient if self._bounds is not None: clip_min, clip_max = self._bounds perturbation = perturbation * (clip_max - clip_min) adv_x = inputs + perturbation adv_x = np.clip(adv_x, clip_min, clip_max) else: adv_x = inputs + perturbation return adv_x
def __init__(self, raw_preds, def_preds, raw_query_counts, def_query_counts, raw_query_time, def_query_time, def_detection_counts, true_labels, max_queries): self._raw_preds, self._def_preds = check_pair_numpy_param( 'raw_preds', raw_preds, 'def_preds', def_preds) self._num_samples = self._raw_preds.shape[0] self._raw_query_counts, _ = check_equal_length('raw_query_counts', raw_query_counts, 'number of sample', self._raw_preds) self._def_query_counts, _ = check_equal_length('def_query_counts', def_query_counts, 'number of sample', self._raw_preds) self._raw_query_time, _ = check_equal_length('raw_query_time', raw_query_time, 'number of sample', self._raw_preds) self._def_query_time, _ = check_equal_length('def_query_time', def_query_time, 'number of sample', self._raw_preds) self._num_adv_samples = self._raw_query_counts[ self._raw_query_counts > 0].shape[0] self._num_adv_samples = check_int_positive( 'the number of adversarial samples', self._num_adv_samples) self._num_ben_samples = self._num_samples - self._num_adv_samples self._max_queries = check_int_positive('max_queries', max_queries) self._def_detection_counts = check_numpy_param('def_detection_counts', def_detection_counts) self._true_labels = check_numpy_param('true_labels', true_labels)
def batch_defense(self, inputs, labels, batch_size=32, epochs=5): """ Defense model with samples in batch. Args: inputs (numpy.ndarray): Samples based on which adversarial examples are generated. labels (numpy.ndarray): Labels of input samples. batch_size (int): Number of samples in one batch. epochs (int): Number of epochs. Returns: numpy.ndarray, loss of batch_defense operation. Raises: ValueError: If batch_size is 0. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) x_len = len(inputs) batch_size = check_int_positive('batch_size', batch_size) iters_per_epoch = int(x_len / batch_size) loss = None for _ in range(epochs): for step in range(iters_per_epoch): x_batch = inputs[step * batch_size:(step + 1) * batch_size] y_batch = labels[step * batch_size:(step + 1) * batch_size] loss = self.defense(x_batch, y_batch) return loss
def defense(self, inputs, labels): """ Enhance model via training with adversarial examples generated from input samples. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Labels of input samples. Returns: numpy.ndarray, loss of adversarial defense operation. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._graph_initialized: self._train_net(Tensor(inputs), Tensor(labels)) self._graph_initialized = True x_len = inputs.shape[0] n_adv = int(np.ceil(self._replace_ratio*x_len)) n_adv_per_attack = int(n_adv / len(self._attacks)) adv_ids = np.random.choice(x_len, size=n_adv, replace=False) start = 0 for attack in self._attacks: idx = adv_ids[start:start + n_adv_per_attack] inputs[idx] = attack.generate(inputs[idx], labels[idx]) start += n_adv_per_attack loss = self._train_net(Tensor(inputs), Tensor(labels)) return loss.asnumpy()
def _dist(self, before, after): """ Calculate the distance between the model outputs of a raw sample and its smoothed counterpart. Args: before (numpy.ndarray): Model output of raw samples. after (numpy.ndarray): Model output of smoothed counterparts. Returns: float, distance based on specified norm. """ before, after = check_pair_numpy_param('before', before, 'after', after) before, after = check_equal_shape('before', before, 'after', after) res = [] diff = after - before for _, elem in enumerate(diff): if self._metric == 'l1': res.append(np.linalg.norm(elem, ord=1)) elif self._metric == 'l2': res.append(np.linalg.norm(elem, ord=2)) else: res.append(np.linalg.norm(elem, ord=1)) return res
def generate(self, inputs, labels): """ Main algorithm for NES. Args: inputs (numpy.ndarray): Benign input samples. labels (numpy.ndarray): Target labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Raises: ValueError: If the top_k less than 0 in Label-Only or Partial-Info setting. ValueError: If the target_imgs is None in Label-Only or Partial-Info setting. ValueError: If scene is not in ['Label_Only', 'Partial_Info', 'Query_Limit'] Examples: >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]], >>> [1, 2]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) if self._scene == 'Label_Only' or self._scene == 'Partial_Info': if self._k < 1: msg = "In 'Label_Only' or 'Partial_Info' mode, 'top_k' must more than 0." LOGGER.error(TAG, msg) raise ValueError(msg) if self.target_imgs is None: msg = "In 'Label_Only' or 'Partial_Info' mode, 'target_imgs' must be set." LOGGER.error(TAG, msg) raise ValueError(msg) elif self._scene == 'Query_Limit': self._k = self._num_class else: msg = "scene must be string in 'Label_Only', 'Partial_Info' or 'Query_Limit' " LOGGER.error(TAG, msg) raise ValueError(msg) is_advs = [] advs = [] queries = [] for sample, label, target_img in zip(inputs, labels, self.target_imgs): is_adv, adv, query = self._generate_one(sample, label, target_img) is_advs.append(is_adv) advs.append(adv) queries.append(query) return is_advs, advs, queries
def generate(self, inputs, labels): """ Generate adversarial examples based on input samples and original/target labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (Union[numpy.ndarray, tuple]): Original/target labels. \ For each input if it has more than one label, it is wrapped in a tuple. Returns: numpy.ndarray, generated adversarial examples. """ if isinstance(labels, tuple): for i, labels_item in enumerate(labels): inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels[{}]'.format(i), labels_item) else: inputs, _ = check_pair_numpy_param('inputs', inputs, \ 'labels', labels) self._dtype = inputs.dtype gradient = self._gradient(inputs, labels) # use random method or not if self._alpha is not None: random_part = self._alpha * np.sign( np.random.normal(size=inputs.shape)).astype(self._dtype) perturbation = (self._eps - self._alpha) * gradient + random_part else: perturbation = self._eps * gradient if self._bounds is not None: clip_min, clip_max = self._bounds perturbation = perturbation * (clip_max - clip_min) adv_x = inputs + perturbation adv_x = np.clip(adv_x, clip_min, clip_max) else: adv_x = inputs + perturbation return adv_x
def defense(self, inputs, labels): """ Enhance model via training with input samples. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Labels of input samples. Returns: numpy.ndarray, loss of defense operation. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) loss = self._train_net(Tensor(inputs), Tensor(labels)) return loss.asnumpy()
def get_attack_model(features, labels, config, n_jobs=-1): """ Get trained attack model specify by config. Args: features (numpy.ndarray): Loss and logits characteristics of each sample. labels (numpy.ndarray): Labels of each sample whether belongs to training set. config (dict): Config of attacker, with key in ["method", "params"]. The format is {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}}, params of each method must within the range of changeable parameters. Tips of params implement can be found in "https://scikit-learn.org/0.16/modules/generated/sklearn.grid_search.GridSearchCV.html". n_jobs (int): Number of jobs run in parallel. -1 means using all processors, otherwise the value of n_jobs must be a positive integer. Returns: sklearn.BaseEstimator, trained model specify by config["method"]. Examples: >>> features = np.random.randn(10, 10) >>> labels = np.random.randint(0, 2, 10) >>> config = {"method": "knn", "params": {"n_neighbors": [3, 5, 7]}} >>> attack_model = get_attack_model(features, labels, config) """ features, labels = check_pair_numpy_param("features", features, "labels", labels) config = check_param_type("config", config, dict) n_jobs = check_param_type("n_jobs", n_jobs, int) if not (n_jobs == -1 or n_jobs > 0): msg = "Value of n_jobs must be -1 or positive integer." raise ValueError(msg) method = str.lower(config["method"]) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=ConvergenceWarning) if method == "knn": return _attack_knn(features, labels, config["params"], n_jobs) if method == "lr": return _attack_lr(features, labels, config["params"], n_jobs) if method == "mlp": return _attack_mlpc(features, labels, config["params"], n_jobs) if method == "rf": return _attack_rf(features, labels, config["params"], n_jobs) msg = "Method {} is not supported.".format(config["method"]) LOGGER.error(TAG, msg) raise NameError(msg)
def fit(self, inputs, labels=None): """ Train detector to decide the best radius. Args: inputs (numpy.ndarray): Benign samples. labels (numpy.ndarray): Ground truth labels of the input samples. Default:None. Returns: float, the best radius. """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) LOGGER.debug(TAG, 'enter fit() function.') time_start = time.time() search_iters = (self._max_radius - self._initial_radius) / self._search_step search_iters = np.round(search_iters).astype(int) radius = self._initial_radius pred = self._model.predict(Tensor(inputs)) raw_preds = np.argmax(pred.asnumpy(), axis=1) if not self._sparse: labels = np.argmax(labels, axis=1) raw_preds, labels = check_equal_shape('raw_preds', raw_preds, 'labels', labels) raw_acc = np.sum(raw_preds == labels) / inputs.shape[0] for _ in range(search_iters): rc_preds = self._rc_forward(inputs, radius) rc_preds, labels = check_equal_shape('rc_preds', rc_preds, 'labels', labels) def_acc = np.sum(rc_preds == labels) / inputs.shape[0] if def_acc >= raw_acc - self._degrade_limit: radius += self._search_step continue break self._radius = radius - self._search_step LOGGER.debug(TAG, 'best radius is: %s', self._radius) LOGGER.debug( TAG, 'time used to train detector of %d samples is: %s seconds', inputs.shape[0], time.time() - time_start) return self._radius
def generate(self, inputs, labels): """ Generate adversarial images in a for loop. Args: inputs (numpy.ndarray): Origin images. labels (numpy.ndarray): Target labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> generate([[0.1,0.2,0.2],[0.2,0.3,0.4]],[2,6]) """ if labels is not None: inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) x_adv = [] is_advs = [] queries_times = [] if labels is not None: self.y_targets = labels for i, x_single in enumerate(inputs): self.queries = 0 if self.image_targets is not None: self.image_target = self.image_targets[i] if self.y_targets is not None: self.y_target = self.y_targets[i] is_adv, adv_img, query_time = self._generate_one(x_single) x_adv.append(adv_img) is_advs.append(is_adv) queries_times.append(query_time) return np.asarray(is_advs), \ np.asarray(x_adv), \ np.asarray(queries_times)
def generate(self, inputs, labels): """ Simple iterative FGSM method to generate adversarial examples. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv_x = attack.generate([[0.3, 0.2, 0.6], >>> [0.3, 0.2, 0.4]], >>> [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0], >>> [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) arr_x = inputs if self._bounds is not None: clip_min, clip_max = self._bounds clip_diff = clip_max - clip_min for _ in range(self._nb_iter): if 'self.prob' in globals(): d_inputs = _transform_inputs(inputs, self.prob) else: d_inputs = inputs adv_x = self._attack.generate(d_inputs, labels) perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff, self._eps * clip_diff) adv_x = arr_x + perturs inputs = adv_x else: for _ in range(self._nb_iter): if 'self.prob' in globals(): d_inputs = _transform_inputs(inputs, self.prob) else: d_inputs = inputs adv_x = self._attack.generate(d_inputs, labels) adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps) inputs = adv_x return adv_x
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and target labels. Args: inputs (numpy.ndarray): The original, unperturbed inputs. labels (numpy.ndarray): The target labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> adv_list = attack.generate(([[0.1, 0.2, 0.6], >>> [0.3, 0, 0.4]], >>> [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], >>> [0, , 0, 1, 0, 0, 0, 0, 0, 0, 0]]) """ arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: arr_y = np.argmax(arr_y, axis=1) is_adv_list = list() adv_list = list() query_times_each_adv = list() for sample, label in zip(arr_x, arr_y): start_t = time.time() is_adv, perturbed, query_times = self._generate_one(sample, label) is_adv_list.append(is_adv) adv_list.append(perturbed) query_times_each_adv.append(query_times) LOGGER.info( TAG, 'Finished one sample, adversarial is {}, ' 'cost time {:.2}s'.format(is_adv, time.time() - start_t)) is_adv_list = np.array(is_adv_list) adv_list = np.array(adv_list) query_times_each_adv = np.array(query_times_each_adv) return is_adv_list, adv_list, query_times_each_adv
def batch_generate(self, inputs, labels, batch_size=64): """ Generate adversarial examples in batch, based on input samples and their labels. Args: inputs (numpy.ndarray): Samples based on which adversarial examples are generated. labels (numpy.ndarray): Labels of samples, whose values determined by specific attacks. batch_size (int): The number of samples in one batch. Returns: numpy.ndarray, generated adversarial examples Examples: >>> inputs = Tensor([[0.2, 0.4, 0.5, 0.2], [0.7, 0.2, 0.4, 0.3]]) >>> labels = [3, 0] >>> advs = attack.batch_generate(inputs, labels, batch_size=2) """ arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels) len_x = arr_x.shape[0] batch_size = check_int_positive('batch_size', batch_size) batchs = int(len_x / batch_size) rest = len_x - batchs * batch_size res = [] for i in range(batchs): x_batch = arr_x[i * batch_size:(i + 1) * batch_size] y_batch = arr_y[i * batch_size:(i + 1) * batch_size] adv_x = self.generate(x_batch, y_batch) # Black-attack methods will return 3 values, just get the second. res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x) if rest != 0: x_batch = arr_x[batchs * batch_size:] y_batch = arr_y[batchs * batch_size:] adv_x = self.generate(x_batch, y_batch) # Black-attack methods will return 3 values, just get the second. res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x) adv_x = np.concatenate(res, axis=0) return adv_x
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and origin/target labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv_x = attack.generate([[0.5, 0.2, 0.6], >>> [0.3, 0, 0.2]], >>> [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0], >>> [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) arr_x = inputs momentum = 0 if self._bounds is not None: clip_min, clip_max = self._bounds clip_diff = clip_max - clip_min for _ in range(self._nb_iter): gradient = self._gradient(inputs, labels) momentum = self._decay_factor * momentum + gradient adv_x = inputs + self._eps_iter * np.sign(momentum) perturs = np.clip(adv_x - arr_x, (0 - self._eps) * clip_diff, self._eps * clip_diff) adv_x = arr_x + perturs adv_x = np.clip(adv_x, clip_min, clip_max) inputs = adv_x else: for _ in range(self._nb_iter): gradient = self._gradient(inputs, labels) momentum = self._decay_factor * momentum + gradient adv_x = inputs + self._eps_iter * np.sign(momentum) adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps) inputs = adv_x return adv_x
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and target labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> adv = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [2, 2]) """ LOGGER.debug(TAG, 'start to generate adv image.') arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels) self._dtype = arr_x.dtype adv_list = list() for original_x, label_y in zip(arr_x, arr_y): adv_list.append(self._optimize( original_x, label_y, epsilon=self._eps)) return np.array(adv_list)
def generate(self, inputs, labels): """ Generate adversarial examples in batch. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Target labels. Returns: numpy.ndarray, adversarial samples. Examples: >>> advs = generate([[0.2, 0.3, 0.4], [0.3, 0.4, 0.5]], [1, 2]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) LOGGER.debug(TAG, 'start to generate adversarial samples.') res = [] for i in range(inputs.shape[0]): res.append(self._generate_one(inputs[i], labels[i])) LOGGER.debug(TAG, 'finished.') return np.asarray(res)
def _best_logits_of_other_class(logits, target_class, value=1): """ Choose the index of the largest logits exclude target class. Args: logits (numpy.ndarray): Predict logits of samples. target_class (numpy.ndarray): Target labels. value (float): Maximum value of output logits. Default: 1. Returns: numpy.ndarray, the index of the largest logits exclude the target class. Examples: >>> other_class = _best_logits_of_other_class([[0.2, 0.3, 0.5], >>> [0.3, 0.4, 0.3]], [2, 1]) """ LOGGER.debug(TAG, "enter the func _best_logits_of_other_class.") logits, target_class = check_pair_numpy_param('logits', logits, 'target_class', target_class) res = np.zeros_like(logits) for i in range(logits.shape[0]): res[i][target_class[i]] = value return np.argmax(logits - res, axis=1)
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and targeted labels (or ground_truth labels). Args: inputs (Union[numpy.ndarray, tuple]): Input samples. The format of inputs should be numpy.ndarray if model_type='classification'. The format of inputs can be (input1, input2, ...) or only one array if model_type='detection'. labels (Union[numpy.ndarray, tuple]): Targeted labels or ground-truth labels. The format of labels should be numpy.ndarray if model_type='classification'. The format of labels should be (gt_boxes, gt_labels) if model_type='detection'. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]], >>> [1, 2]) """ # inputs check if self._model_type == 'classification': inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if self._sparse: if labels.size > 1: label_squ = np.squeeze(labels) else: label_squ = labels if len(label_squ.shape) >= 2 or label_squ.shape[0] != inputs.shape[0]: msg = "The parameter 'sparse' of PSOAttack is True, but the input labels is not sparse style and " \ "got its shape as {}.".format(labels.shape) LOGGER.error(TAG, msg) raise ValueError(msg) else: labels = np.argmax(labels, axis=1) images = inputs elif self._model_type == 'detection': images, auxiliary_inputs, gt_boxes, gt_labels = check_detection_inputs(inputs, labels) # generate one adversarial each time adv_list = [] success_list = [] query_times_list = [] for i in range(images.shape[0]): is_success = False q_times = 0 x_ori = images[i] if not self._bounds: self._bounds = [np.min(x_ori), np.max(x_ori)] pixel_deep = self._bounds[1] - self._bounds[0] q_times += 1 if self._model_type == 'classification': label_i = labels[i] confi_ori = self._confidence_cla(x_ori, label_i) elif self._model_type == 'detection': auxiliary_input_i = tuple() for item in auxiliary_inputs: auxiliary_input_i += (np.expand_dims(item[i], axis=0),) gt_boxes_i, gt_labels_i = np.expand_dims(gt_boxes[i], axis=0), np.expand_dims(gt_labels[i], axis=0) inputs_i = (images[i],) + auxiliary_input_i confi_ori, gt_object_num = self._detection_scores(inputs_i, gt_boxes_i, gt_labels_i, self._model) LOGGER.info(TAG, 'The number of ground-truth objects is %s', gt_object_num[0]) # step1, initializing # initial global optimum fitness value, cannot set to be -inf best_fitness = -np.inf # initial global optimum position best_position = x_ori x_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0) cur_noise = np.clip(np.random.random(x_copies.shape)*pixel_deep, (0 - self._per_bounds)*(np.abs(x_copies) + 0.1), self._per_bounds*(np.abs(x_copies) + 0.1)) # initial advs par = np.clip(x_copies + cur_noise, self._bounds[0], self._bounds[1]) # initial optimum positions for particles par_best_poi = np.copy(par) # initial optimum fitness values par_best_fit = -np.inf*np.ones(self._pop_size) # step2, optimization # initial velocities for particles v_particles = np.zeros(par.shape) is_mutation = False iters = 0 while iters < self._t_max: last_best_fit = best_fitness ran_1 = np.random.random(par.shape) ran_2 = np.random.random(par.shape) v_particles = self._step_size*( v_particles + self._c1*ran_1*(best_position - par)) \ + self._c2*ran_2*(par_best_poi - par) par += v_particles if iters > 6 and is_mutation: par = self._mutation_op(par) par = np.clip(np.clip(par, x_copies - (np.abs(x_copies) + 0.1*pixel_deep)*self._per_bounds, x_copies + (np.abs(x_copies) + 0.1*pixel_deep)*self._per_bounds), self._bounds[0], self._bounds[1]) if self._model_type == 'classification': confi_adv = self._confidence_cla(par, label_i) elif self._model_type == 'detection': confi_adv, _ = self._detection_scores( (par,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model) q_times += self._pop_size fit_value = self._fitness(confi_ori, confi_adv, x_ori, par) for k in range(self._pop_size): if fit_value[k] > par_best_fit[k]: par_best_fit[k] = fit_value[k] par_best_poi[k] = par[k] if fit_value[k] > best_fitness: best_fitness = fit_value[k] best_position = par[k].copy() iters += 1 if best_fitness < -2: LOGGER.debug(TAG, 'best fitness value is %s, which is too small. We recommend that you decrease ' 'the value of the initialization parameter c.', best_fitness) if iters < 3 and best_fitness > 100: LOGGER.debug(TAG, 'best fitness value is %s, which is too large. We recommend that you increase ' 'the value of the initialization parameter c.', best_fitness) is_mutation = False if (best_fitness - last_best_fit) < last_best_fit*0.05: is_mutation = True q_times += 1 if self._model_type == 'classification': cur_pre = self._model.predict(best_position) cur_label = np.argmax(cur_pre) if (self._targeted and cur_label == label_i) or (not self._targeted and cur_label != label_i): is_success = True elif self._model_type == 'detection': _, correct_nums_adv = self._detection_scores( (best_position,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model) LOGGER.info(TAG, 'The number of correctly detected objects in adversarial image is %s', correct_nums_adv[0]) if correct_nums_adv <= int(gt_object_num*self._reserve_ratio): is_success = True if is_success: LOGGER.debug(TAG, 'successfully find one adversarial ' 'sample and start Reduction process') # step3, reduction if self._model_type == 'classification': best_position, q_times = self._reduction(x_ori, q_times, label_i, best_position, self._model, targeted_attack=self._targeted) break if self._model_type == 'detection': best_position, q_times = self._fast_reduction(x_ori, best_position, q_times, auxiliary_input_i, gt_boxes_i, gt_labels_i, self._model) if not is_success: LOGGER.debug(TAG, 'fail to find adversarial sample, iteration ' 'times is: %d and query times is: %d', iters, q_times) adv_list.append(best_position) success_list.append(is_success) query_times_list.append(q_times) del x_copies, cur_noise, par, par_best_poi return np.asarray(success_list), \ np.asarray(adv_list), \ np.asarray(query_times_list)
def __init__(self, raw_preds, def_preds, true_labels): self._raw_preds, self._def_preds = check_pair_numpy_param( 'raw_preds', raw_preds, 'def_preds', def_preds) self._true_labels = check_numpy_param('true_labels', true_labels) self._num_samples = len(true_labels)
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and targeted labels (or ground_truth labels). Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Targeted labels or ground_truth labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> advs = attack.generate([[0.2, 0.3, 0.4], [0.3, 0.3, 0.2]], >>> [1, 2]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) # generate one adversarial each time if self._targeted: target_labels = labels adv_list = [] success_list = [] query_times_list = [] pixel_deep = self._bounds[1] - self._bounds[0] for i in range(inputs.shape[0]): is_success = False q_times = 0 x_ori = inputs[i] confidences = self._model.predict(np.expand_dims(x_ori, axis=0))[0] q_times += 1 true_label = labels[i] if self._targeted: t_label = target_labels[i] confi_ori = confidences[t_label] else: confi_ori = max(confidences) # step1, initializing # initial global optimum fitness value, cannot set to be 0 best_fitness = -np.inf # initial global optimum position best_position = x_ori x_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0) cur_noise = np.clip((np.random.random(x_copies.shape) - 0.5) *self._step_size, (0 - self._per_bounds)*(x_copies + 0.1), self._per_bounds*(x_copies + 0.1)) par = np.clip(x_copies + cur_noise, x_copies*(1 - self._per_bounds), x_copies*(1 + self._per_bounds)) # initial advs par_ori = np.copy(par) # initial optimum positions for particles par_best_poi = np.copy(par) # initial optimum fitness values par_best_fit = -np.inf*np.ones(self._pop_size) # step2, optimization # initial velocities for particles v_particles = np.zeros(par.shape) is_mutation = False iters = 0 while iters < self._t_max: last_best_fit = best_fitness ran_1 = np.random.random(par.shape) ran_2 = np.random.random(par.shape) v_particles = self._step_size*( v_particles + self._c1*ran_1*(best_position - par)) \ + self._c2*ran_2*(par_best_poi - par) par = np.clip(par + v_particles, (par_ori + 0.1*pixel_deep)*( 1 - self._per_bounds), (par_ori + 0.1*pixel_deep)*( 1 + self._per_bounds)) if iters > 30 and is_mutation: par = self._mutation_op(par) if self._targeted: confi_adv = self._model.predict(par)[:, t_label] else: confi_adv = np.max(self._model.predict(par), axis=1) q_times += self._pop_size fit_value = self._fitness(confi_ori, confi_adv, x_ori, par) for k in range(self._pop_size): if fit_value[k] > par_best_fit[k]: par_best_fit[k] = fit_value[k] par_best_poi[k] = par[k] if fit_value[k] > best_fitness: best_fitness = fit_value[k] best_position = par[k] iters += 1 cur_pre = self._model.predict(np.expand_dims(best_position, axis=0))[0] is_mutation = False if (best_fitness - last_best_fit) < last_best_fit*0.05: is_mutation = True cur_label = np.argmax(cur_pre) q_times += 1 if self._targeted: if cur_label == t_label: is_success = True else: if cur_label != true_label: is_success = True if is_success: LOGGER.debug(TAG, 'successfully find one adversarial ' 'sample and start Reduction process') # step3, reduction if self._targeted: best_position, q_times = self._reduction( x_ori, q_times, t_label, best_position) else: best_position, q_times = self._reduction( x_ori, q_times, true_label, best_position) break if not is_success: LOGGER.debug(TAG, 'fail to find adversarial sample, iteration ' 'times is: %d and query times is: %d', iters, q_times) adv_list.append(best_position) success_list.append(is_success) query_times_list.append(q_times) del x_copies, cur_noise, par, par_ori, par_best_poi return np.asarray(success_list), \ np.asarray(adv_list), \ np.asarray(query_times_list)
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and targeted labels. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): The ground truth label of input samples or target labels. Returns: numpy.ndarray, generated adversarial examples. Examples: >>> advs = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [1, 2]] """ LOGGER.debug(TAG, "enter the func generate.") inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) self._dtype = inputs.dtype att_original = self._to_attack_space(inputs) reconstructed_original, _ = self._to_model_space(att_original) # find an adversarial sample const = np.ones_like(labels, dtype=self._dtype) * self._initial_const lower_bound = np.zeros_like(labels, dtype=self._dtype) upper_bound = np.ones_like(labels, dtype=self._dtype) * np.inf adversarial_res = inputs.copy() adversarial_loss = np.ones_like(labels, dtype=self._dtype) * np.inf samples_num = labels.shape[0] adv_flag = np.zeros_like(labels) for binary_search_step in range(self._bin_search_steps): if (binary_search_step == self._bin_search_steps - 1) and \ (self._bin_search_steps >= 10): const = min(1e10, upper_bound) LOGGER.debug(TAG, 'starting optimization with const = %s', str(const)) att_perturbation = np.zeros_like(att_original, dtype=self._dtype) loss_at_previous_check = np.ones_like(labels, dtype=self._dtype) * np.inf # create a new optimizer to minimize the perturbation optimizer = _AdamOptimizer(att_perturbation.shape) for iteration in range(self._max_iterations): x_input, dxdp = self._to_model_space(att_original + att_perturbation) logits = self._network(Tensor(x_input)).asnumpy() current_l2_loss, current_loss, dldx = self._loss_function( logits, x_input, reconstructed_original, labels, const, self._confidence) # check if attack success (include all examples) if self._targeted: is_adv = (np.argmax(logits, axis=1) == labels) else: is_adv = (np.argmax(logits, axis=1) != labels) for i in range(samples_num): if is_adv[i]: adv_flag[i] = True if current_l2_loss[i] < adversarial_loss[i]: adversarial_res[i] = x_input[i] adversarial_loss[i] = current_l2_loss[i] if np.all(adv_flag): if self._fast: LOGGER.debug(TAG, "succeed find adversarial examples.") msg = 'iteration: {}, logits_att: {}, ' \ 'loss: {}, l2_dist: {}' \ .format(iteration, np.argmax(logits, axis=1), current_loss, current_l2_loss) LOGGER.debug(TAG, msg) return adversarial_res dldx, inputs = check_equal_shape('dldx', dldx, 'inputs', inputs) gradient = dldx * dxdp att_perturbation += \ optimizer(gradient, self._learning_rate) # check if should stop iteration early flag = True iter_check = iteration % (np.ceil( self._max_iterations * self._abort_early_check_ratio)) if self._abort_early and iter_check == 0: # check progress for i in range(inputs.shape[0]): if current_loss[i] <= .9999 * loss_at_previous_check[i]: flag = False # stop Adam if all samples has no progress if flag: LOGGER.debug( TAG, 'step:%d, no progress yet, stop iteration', binary_search_step) break loss_at_previous_check = current_loss for i in range(samples_num): # update bound based on search result if adv_flag[i]: LOGGER.debug( TAG, 'example %d, found adversarial with const=%f', i, const[i]) upper_bound[i] = const[i] else: LOGGER.debug( TAG, 'example %d, failed to find adversarial' ' with const=%f', i, const[i]) lower_bound[i] = const[i] if upper_bound[i] == np.inf: const[i] *= 10 else: const[i] = (lower_bound[i] + upper_bound[i]) / 2 return adversarial_res
def _loss_function(self, logits, new_x, org_x, org_or_target_class, constant, confidence): """ Calculate the value of loss function and gradients of loss w.r.t inputs. Args: logits (numpy.ndarray): The output of network before softmax. new_x (numpy.ndarray): Adversarial examples. org_x (numpy.ndarray): Original benign input samples. org_or_target_class (numpy.ndarray): Original/target labels. constant (float): A trade-off constant to use to balance loss and perturbation norm. confidence (float): Confidence level of the output of adversarial examples. Returns: numpy.ndarray, norm of perturbation, sum of the loss and the norm, and gradients of the sum w.r.t inputs. Raises: ValueError: If loss is less than 0. Examples: >>> L2_loss, total_loss, dldx = self._loss_function([0.2 , 0.3, >>> 0.5], [0.1, 0.2, 0.2, 0.4], [0.12, 0.2, 0.25, 0.4], [1], 2, 0) """ LOGGER.debug(TAG, "enter the func _loss_function.") logits = check_numpy_param('logits', logits) org_x = check_numpy_param('org_x', org_x) new_x, org_or_target_class = check_pair_numpy_param( 'new_x', new_x, 'org_or_target_class', org_or_target_class) new_x, org_x = check_equal_shape('new_x', new_x, 'org_x', org_x) other_class_index = _best_logits_of_other_class(logits, org_or_target_class, value=np.inf) loss1 = np.sum((new_x - org_x)**2, axis=tuple(range(len(new_x.shape))[1:])) loss2 = np.zeros_like(loss1, dtype=self._dtype) loss2_grade = np.zeros_like(new_x, dtype=self._dtype) jaco_grad = jacobian_matrix(self._net_grad, new_x, self._num_classes) if self._targeted: for i in range(org_or_target_class.shape[0]): loss2[i] = max( 0, logits[i][other_class_index[i]] - logits[i][org_or_target_class[i]] + confidence) loss2_grade[i] = constant[i] * ( jaco_grad[other_class_index[i]][i] - jaco_grad[org_or_target_class[i]][i]) else: for i in range(org_or_target_class.shape[0]): loss2[i] = max( 0, logits[i][org_or_target_class[i]] - logits[i][other_class_index[i]] + confidence) loss2_grade[i] = constant[i] * ( jaco_grad[org_or_target_class[i]][i] - jaco_grad[other_class_index[i]][i]) total_loss = loss1 + constant * loss2 loss1_grade = 2 * (new_x - org_x) for i in range(org_or_target_class.shape[0]): if loss2[i] < 0: msg = 'loss value should greater than or equal to 0, ' \ 'but got loss2 {}'.format(loss2[i]) LOGGER.error(TAG, msg) raise ValueError(msg) if loss2[i] == 0: loss2_grade[i, ...] = 0 total_loss_grade = loss1_grade + loss2_grade return loss1, total_loss, total_loss_grade
def batch_generate(self, inputs, labels, batch_size=64): """ Generate adversarial examples in batch, based on input samples and their labels. Args: inputs (numpy.ndarray): Samples based on which adversarial examples are generated. labels (Union[numpy.ndarray, tuple]): Original/target labels. \ For each input if it has more than one label, it is wrapped in a tuple. batch_size (int): The number of samples in one batch. Returns: numpy.ndarray, generated adversarial examples Examples: >>> inputs = np.array([[0.2, 0.4, 0.5, 0.2], [0.7, 0.2, 0.4, 0.3]]) >>> labels = np.array([3, 0]) >>> advs = attack.batch_generate(inputs, labels, batch_size=2) """ if isinstance(labels, tuple): for i, labels_item in enumerate(labels): arr_x, _ = check_pair_numpy_param('inputs', inputs, \ 'labels[{}]'.format(i), labels_item) else: arr_x, _ = check_pair_numpy_param('inputs', inputs, \ 'labels', labels) arr_y = labels len_x = arr_x.shape[0] batch_size = check_int_positive('batch_size', batch_size) batches = int(len_x / batch_size) rest = len_x - batches * batch_size res = [] for i in range(batches): x_batch = arr_x[i * batch_size:(i + 1) * batch_size] if isinstance(arr_y, tuple): y_batch = tuple([ sub_labels[i * batch_size:(i + 1) * batch_size] for sub_labels in arr_y ]) else: y_batch = arr_y[i * batch_size:(i + 1) * batch_size] adv_x = self.generate(x_batch, y_batch) # Black-attack methods will return 3 values, just get the second. res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x) if rest != 0: x_batch = arr_x[batches * batch_size:] if isinstance(arr_y, tuple): y_batch = tuple([ sub_labels[batches * batch_size:] for sub_labels in arr_y ]) else: y_batch = arr_y[batches * batch_size:] y_batch = arr_y[batches * batch_size:] adv_x = self.generate(x_batch, y_batch) # Black-attack methods will return 3 values, just get the second. res.append(adv_x[1] if isinstance(adv_x, tuple) else adv_x) adv_x = np.concatenate(res, axis=0) return adv_x
def generate(self, inputs, labels): """ Generate adversarial examples based on input samples and targeted labels. Args: inputs (numpy.ndarray): Benign input samples used as references to create adversarial examples. labels (numpy.ndarray): For targeted attack, labels are adversarial target labels. For untargeted attack, labels are ground-truth labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> is_adv_list, adv_list, query_times_each_adv = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [2, 3]) """ arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: arr_y = np.argmax(arr_y, axis=1) ini_bool, ini_advs, ini_count = self._initialize_starting_point( arr_x, arr_y) is_adv_list = list() adv_list = list() query_times_each_adv = list() for sample, sample_label, start_adv, ite_bool, ite_c in zip( arr_x, arr_y, ini_advs, ini_bool, ini_count): if ite_bool: LOGGER.info(TAG, 'Start optimizing.') ori_label = np.argmax( self._model.predict(np.expand_dims(sample, axis=0))[0]) ini_label = np.argmax( self._model.predict(np.expand_dims(start_adv, axis=0))[0]) is_adv, adv_x, query_times = self._decision_optimize( sample, sample_label, start_adv) adv_label = np.argmax( self._model.predict(np.expand_dims(adv_x, axis=0))[0]) LOGGER.info(TAG, 'before ini attack label is :{}'.format(ori_label)) LOGGER.info(TAG, 'after ini attack label is :{}'.format(ini_label)) LOGGER.info(TAG, 'INPUT optimize label is :{}'.format(sample_label)) LOGGER.info( TAG, 'after pointwise attack label is :{}'.format(adv_label)) is_adv_list.append(is_adv) adv_list.append(adv_x) query_times_each_adv.append(query_times + ite_c) else: LOGGER.info(TAG, 'Initial sample is not adversarial, pass.') is_adv_list.append(False) adv_list.append(start_adv) query_times_each_adv.append(ite_c) is_adv_list = np.array(is_adv_list) adv_list = np.array(adv_list) query_times_each_adv = np.array(query_times_each_adv) LOGGER.info(TAG, 'ret list is: {}'.format(adv_list)) return is_adv_list, adv_list, query_times_each_adv
def generate(self, inputs, labels): """ Generate adversarial examples based on input samples and original labels. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Original labels. Returns: numpy.ndarray, adversarial examples. Raises: NotImplementedError: If norm_level is not in [2, np.inf, '2', 'inf']. Examples: >>> advs = generate([[0.2, 0.3, 0.4], [0.3, 0.4, 0.5]], [1, 2]) """ inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) inputs_dtype = inputs.dtype iteration = 0 origin_labels = labels cur_labels = origin_labels.copy() weight = np.squeeze(np.zeros(inputs.shape[1:])) r_tot = np.zeros(inputs.shape) x_origin = inputs while np.any( cur_labels == origin_labels) and iteration < self._max_iters: preds = self._network(Tensor(inputs)).asnumpy() grads = jacobian_matrix(self._net_grad, inputs, self._num_classes) for idx in range(inputs.shape[0]): diff_w = np.inf label = origin_labels[idx] if cur_labels[idx] != label: continue for k in range(self._num_classes): if k == label: continue w_k = grads[k, idx, ...] - grads[label, idx, ...] f_k = preds[idx, k] - preds[idx, label] if self._norm_level == 2 or self._norm_level == '2': diff_w_k = abs(f_k) / (np.linalg.norm(w_k) + 1e-8) elif self._norm_level == np.inf \ or self._norm_level == 'inf': diff_w_k = abs(f_k) / (np.linalg.norm(w_k, ord=1) + 1e-8) else: msg = 'ord {} is not available.' \ .format(str(self._norm_level)) LOGGER.error(TAG, msg) raise NotImplementedError(msg) if diff_w_k < diff_w: diff_w = diff_w_k weight = w_k if self._norm_level == 2 or self._norm_level == '2': r_i = diff_w * weight / (np.linalg.norm(weight) + 1e-8) elif self._norm_level == np.inf or self._norm_level == 'inf': r_i = diff_w*np.sign(weight) \ / (np.linalg.norm(weight, ord=1) + 1e-8) else: msg = 'ord {} is not available in normalization.' \ .format(str(self._norm_level)) LOGGER.error(TAG, msg) raise NotImplementedError(msg) r_tot[idx, ...] = r_tot[idx, ...] + r_i if self._bounds is not None: clip_min, clip_max = self._bounds inputs = x_origin + (1 + self._overshoot) * r_tot * (clip_max - clip_min) inputs = np.clip(inputs, clip_min, clip_max) else: inputs = x_origin + (1 + self._overshoot) * r_tot cur_labels = np.argmax(self._network( Tensor(inputs.astype(inputs_dtype))).asnumpy(), axis=1) iteration += 1 inputs = inputs.astype(inputs_dtype) del preds, grads return inputs
def generate(self, inputs, labels): """ Generate adversarial examples based on input data and targeted labels (or ground_truth labels). Args: inputs (Union[numpy.ndarray, tuple]): Input samples. The format of inputs should be numpy.ndarray if model_type='classification'. The format of inputs can be (input1, input2, ...) or only one array if model_type='detection'. labels (Union[numpy.ndarray, tuple]): Targeted labels or ground-truth labels. The format of labels should be numpy.ndarray if model_type='classification'. The format of labels should be (gt_boxes, gt_labels) if model_type='detection'. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> advs = attack.generate([[0.2, 0.3, 0.4], >>> [0.3, 0.3, 0.2]], >>> [1, 2]) """ if self._model_type == 'classification': inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if self._sparse: if labels.size > 1: label_squ = np.squeeze(labels) else: label_squ = labels if len(label_squ.shape) >= 2 or label_squ.shape[0] != inputs.shape[0]: msg = "The parameter 'sparse' of GeneticAttack is True, but the input labels is not sparse style " \ "and got its shape as {}.".format(labels.shape) LOGGER.error(TAG, msg) raise ValueError(msg) else: labels = np.argmax(labels, axis=1) images = inputs elif self._model_type == 'detection': images, auxiliary_inputs, gt_boxes, gt_labels = check_detection_inputs(inputs, labels) adv_list = [] success_list = [] query_times_list = [] for i in range(images.shape[0]): is_success = False x_ori = images[i] if not self._bounds: self._bounds = [np.min(x_ori), np.max(x_ori)] pixel_deep = self._bounds[1] - self._bounds[0] if self._model_type == 'classification': label_i = labels[i] elif self._model_type == 'detection': auxiliary_input_i = tuple() for item in auxiliary_inputs: auxiliary_input_i += (np.expand_dims(item[i], axis=0),) gt_boxes_i, gt_labels_i = np.expand_dims(gt_boxes[i], axis=0), np.expand_dims(gt_labels[i], axis=0) inputs_i = (images[i],) + auxiliary_input_i confi_ori, gt_object_num = self._detection_scores(inputs_i, gt_boxes_i, gt_labels_i, model=self._model) LOGGER.info(TAG, 'The number of ground-truth objects is %s', gt_object_num[0]) # generate particles ori_copies = np.repeat(x_ori[np.newaxis, :], self._pop_size, axis=0) # initial perturbations cur_pert = np.random.uniform(self._bounds[0], self._bounds[1], ori_copies.shape) cur_pop = ori_copies + cur_pert query_times = 0 iters = 0 while iters < self._max_steps: iters += 1 cur_pop = np.clip(np.clip(cur_pop, ori_copies - pixel_deep*self._per_bounds, ori_copies + pixel_deep*self._per_bounds), self._bounds[0], self._bounds[1]) if self._model_type == 'classification': pop_preds = self._model.predict(cur_pop) query_times += cur_pop.shape[0] all_preds = np.argmax(pop_preds, axis=1) if self._targeted: success_pop = np.equal(label_i, all_preds).astype(np.int32) else: success_pop = np.not_equal(label_i, all_preds).astype(np.int32) is_success = max(success_pop) best_idx = np.argmax(success_pop) target_preds = pop_preds[:, label_i] others_preds_sum = np.sum(pop_preds, axis=1) - target_preds if self._targeted: fit_vals = target_preds - others_preds_sum else: fit_vals = others_preds_sum - target_preds elif self._model_type == 'detection': confi_adv, correct_nums_adv = self._detection_scores( (cur_pop,) + auxiliary_input_i, gt_boxes_i, gt_labels_i, model=self._model) LOGGER.info(TAG, 'The number of correctly detected objects in adversarial image is %s', np.min(correct_nums_adv)) query_times += self._pop_size fit_vals = abs( confi_ori - confi_adv) - self._c / self._pop_size * np.linalg.norm( (cur_pop - x_ori).reshape(cur_pop.shape[0], -1), axis=1) if np.max(fit_vals) < 0: self._c /= 2 if np.max(fit_vals) < -2: LOGGER.debug(TAG, 'best fitness value is %s, which is too small. We recommend that you decrease ' 'the value of the initialization parameter c.', np.max(fit_vals)) if iters < 3 and np.max(fit_vals) > 100: LOGGER.debug(TAG, 'best fitness value is %s, which is too large. We recommend that you increase ' 'the value of the initialization parameter c.', np.max(fit_vals)) if np.min(correct_nums_adv) <= int(gt_object_num*self._reserve_ratio): is_success = True best_idx = np.argmin(correct_nums_adv) if is_success: LOGGER.debug(TAG, 'successfully find one adversarial sample ' 'and start Reduction process.') final_adv = cur_pop[best_idx] if self._model_type == 'classification': final_adv, query_times = self._reduction(x_ori, query_times, label_i, final_adv, model=self._model, targeted_attack=self._targeted) break best_fit = max(fit_vals) if best_fit > self._best_fit: self._best_fit = best_fit self._plateau_times = 0 else: self._plateau_times += 1 adap_threshold = (lambda z: 100 if z > -0.4 else 300)(best_fit) if self._plateau_times > adap_threshold: self._adap_times += 1 self._plateau_times = 0 if self._adaptive: step_noise = max(self._step_size, 0.4*(0.9**self._adap_times)) step_p = max(self._mutation_rate, 0.5*(0.9**self._adap_times)) else: step_noise = self._step_size step_p = self._mutation_rate step_temp = self._temp elite = cur_pop[np.argmax(fit_vals)] select_probs = softmax(fit_vals/step_temp) select_args = np.arange(self._pop_size) parents_arg = np.random.choice( a=select_args, size=2*(self._pop_size - 1), replace=True, p=select_probs) parent1 = cur_pop[parents_arg[:self._pop_size - 1]] parent2 = cur_pop[parents_arg[self._pop_size - 1:]] parent1_probs = select_probs[parents_arg[:self._pop_size - 1]] parent2_probs = select_probs[parents_arg[self._pop_size - 1:]] parent2_probs = parent2_probs / (parent1_probs + parent2_probs) # duplicate the probabilities to all features of each particle. dims = len(x_ori.shape) for _ in range(dims): parent2_probs = parent2_probs[:, np.newaxis] parent2_probs = np.tile(parent2_probs, ((1,) + x_ori.shape)) cross_probs = (np.random.random(parent1.shape) > parent2_probs).astype(np.int32) childs = parent1*cross_probs + parent2*(1 - cross_probs) mutated_childs = self._mutation( childs, step_noise=self._per_bounds*step_noise, prob=step_p) cur_pop = np.concatenate((mutated_childs, elite[np.newaxis, :])) if not is_success: LOGGER.debug(TAG, 'fail to find adversarial sample.') final_adv = elite if self._model_type == 'detection': final_adv, query_times = self._fast_reduction( x_ori, final_adv, query_times, auxiliary_input_i, gt_boxes_i, gt_labels_i, model=self._model) adv_list.append(final_adv) LOGGER.debug(TAG, 'iteration times is: %d and query times is: %d', iters, query_times) success_list.append(is_success) query_times_list.append(query_times) del ori_copies, cur_pert, cur_pop return np.asarray(success_list), \ np.asarray(adv_list), \ np.asarray(query_times_list)