def __init__(self, attack_name, model, batch_size, dataset_name, goal, distance_metric, session, **kwargs): ''' Initialize AttackBenchmark. :param attack_name: The attack method's name. All valid values are ``'fgsm'``, ``'bim'``, ``'pgd'``, ``'mim'``, ``'cw'``, ``'deepfool'``, ``'nes'``, ``'spsa'``, ``'nattack'``, ``'boundary'``, ``'evolutionary'``. :param model: The classifier model to run the attack on. :param batch_size: Batch size for attack. :param dataset_name: The dataset's name. All valid values are ``'cifar10'`` and ``'imagenet'``. :param goal: The adversarial goal for the attack method. All valid values are ``'t'`` for targeted attack, ``'tm'`` for targeted missclassification attack, and ``'ut'`` for untargeted attack. :param distance_metric: The adversarial distance metric for the attack method. All valid values are ``'l_2'`` and ``'l_inf'``. :param session: The ``tf.Session`` instance for the attack to run in. :param kwargs: Other keyword arguments to pass to the attack method's initialization function. ''' init_kwargs = dict() init_kwargs['model'] = model init_kwargs['batch_size'] = batch_size init_kwargs[ 'samples_batch_size'] = batch_size # use same batch_size for nes, spsa, nattack's samples init_kwargs['goal'] = goal init_kwargs['distance_metric'] = distance_metric init_kwargs['session'] = session for k, v in kwargs.items(): init_kwargs[k] = v self.model = model self.attack_name, self.dataset_name = attack_name, dataset_name self.batch_size, self.goal, self.distance_metric, self.session = batch_size, goal, distance_metric, session self.attack = load_attack(attack_name, init_kwargs) self.xs_ph = tf.placeholder(model.x_dtype, shape=(None, *model.x_shape)) self.xs_label = model.labels(self.xs_ph)
def __init__(self, attack_name, model, batch_size, goal, distance_metric, session, distortion, confidence=0.0, search_steps=5, binsearch_steps=10, nes_lr_factor=None, nes_min_lr_factor=None, spsa_lr_factor=None, **kwargs): ''' Initialize DistortionBenchmark. :param attack_name: The attack method's name. All valid values are 'fgsm', 'bim', 'pgd', 'mim', 'cw', 'deepfool', 'nes', 'spsa', 'nattack'. :param model: The classifier model to run the attack on. :param batch_size: Batch size for attack. :param goal: The adversarial goal for the attack method. All valid values are 't' for targeted attack, 'tm' for targeted missclassification attack, and 'ut' for untargeted attack. :param distance_metric: The adversarial distance metric for the attack method. All valid values are 'l_2' and 'l_inf'. :param session: The `tf.Session` instance for the attack to run in. :param distortion: Initial distortion. When doing search on attack magnitude, it is used as the starting point. :param confidence: For white box attacks, consider the adversarial as succeed only when the margin between top-2 logits is larger than the confidence. :param search_steps: Search steps for finding an initial adversarial distortion. :param binsearch_steps: Binary search steps for refining the initial adversarial distortion. :param nes_lr_factor: The nes attack's `lr` parameter is set to `nes_lr_factor * magnitude`. :param nes_min_lr_factor: The nes attack's `min_lr` parameter is set to `nes_min_lr_factor * magnitude`. :param spsa_lr_factor: The spsa attack's `lr` parameter is set to `spsa_lr_factor * magnitude`. :param kwargs: Other keyword arguments to pass to the attack method's initialization function. ''' self.init_distortion = distortion self.confidence = confidence self.search_steps = search_steps self.binsearch_steps = binsearch_steps self.distance_metric = distance_metric init_kwargs = dict() init_kwargs['model'] = model init_kwargs['batch_size'] = batch_size init_kwargs[ 'samples_batch_size'] = batch_size # use same batch_size for nes, spsa, nattack's samples init_kwargs['goal'] = goal init_kwargs['distance_metric'] = distance_metric init_kwargs['session'] = session for k, v in kwargs.items(): init_kwargs[k] = v self.model = model self.attack_name = attack_name self.batch_size, self.goal, self.distance_metric = batch_size, goal, distance_metric self.attack = load_attack(attack_name, init_kwargs) self.nes_lr_factor, self.nes_min_lr_factor = nes_lr_factor, nes_min_lr_factor self.spsa_lr_factor = spsa_lr_factor self._session = session self._xs_ph = tf.placeholder(self.model.x_dtype, shape=(None, *self.model.x_shape)) if self.attack_name == 'fgsm': self._logits = self.model.logits(self._xs_ph) self._run = self._run_binsearch elif self.attack_name in ('bim', 'pgd', 'mim'): self._logits = self.model.logits(self._xs_ph) self._run = self._run_binsearch_alpha elif self.attack_name in ('cw', 'deepfool'): self._run = self._run_optimized elif self.attack_name in ('nes', 'spsa', 'nattack'): self._logits = self.model.logits(self._xs_ph) self._run = self._run_binsearch_nes_family else: raise NotImplementedError
def __init__(self, iteration, attack_name, model, batch_size, dataset_name, goal, distance_metric, session, cw_n_points=10, **kwargs): ''' Initialize IterationBenchmark. :param iteration: The iteration count. For 'bim', 'pgd', 'mim', 'cw', 'deepfool' attack, it would be passed to the attack as the `iteration` configuration parameter. For 'nes', 'spsa', 'nattack', 'boundary', 'evolutionary' attack, it would be passed to the attack as the `max_queries` configuration parameter. :param attack_name: The attack method's name. All valid values are 'bim', 'pgd', 'mim', 'cw', 'deepfool', 'nes', 'spsa', 'nattack', 'boundary', 'evolutionary'. :param model: The classifier model to run the attack on. :param batch_size: Batch size for attack. :param dataset_name: The dataset's name. All valid values are 'cifar10' and 'imagenet'. :param goal: The adversarial goal for the attack method. All valid values are 't' for targeted attack, 'tm' for targeted missclassification attack, and 'ut' for untargeted attack. :param distance_metric: The adversarial distance metric for the attack method. All valid values are 'l_2' and 'l_inf'. :param session: The `tf.Session` instance for the attack to run in. :param cw_n_points: How many times should we run 'cw' attack for the benchmark. To get the benchmark result for 'cw' attack, we need to run it for each iteration parameter we are interested in. Since the computation cost for C&W attack is huge, we select `cw_n_points` numbers between 0 and `iteration` uniformly as the iteration parameter to reduce the computation cost. :param kwargs: Other keyword arguments to pass to the attack method's initialization function. ''' self.iteration = iteration def iteration_callback(xs, xs_adv): labels = model.labels(xs_adv) delta_xs = tf.reshape(xs_adv - xs, (xs.shape[0], -1)) if distance_metric == 'l_2': dists = tf.linalg.norm(delta_xs, axis=1) else: # 'l_inf' dists = tf.reduce_max(tf.abs(delta_xs), axis=1) return labels, dists init_kwargs = dict() init_kwargs['model'] = model init_kwargs['batch_size'] = batch_size init_kwargs[ 'samples_batch_size'] = batch_size # use same batch_size for nes, spsa, nattack's samples init_kwargs['goal'] = goal init_kwargs['distance_metric'] = distance_metric init_kwargs['session'] = session for k, v in kwargs.items(): init_kwargs[k] = v init_kwargs['iteration_callback'] = iteration_callback self.model = model self.attack_name, self.dataset_name = attack_name, dataset_name self.batch_size, self.goal, self.distance_metric = batch_size, goal, distance_metric self.attack = load_attack(attack_name, init_kwargs) self.cw_n_points = cw_n_points self._session = session if self.attack_name in ('bim', 'pgd', 'mim'): self._run = self._run_basic elif self.attack_name == 'cw': self._xs_ph = tf.placeholder(self.model.x_dtype, shape=(self.batch_size, *self.model.x_shape)) self._xs_adv_ph = tf.placeholder(self.model.x_dtype, shape=(self.batch_size, *self.model.x_shape)) self._cw_data = iteration_callback(self._xs_ph, self._xs_adv_ph) self._run = self._run_cw elif self.attack_name == 'deepfool': self._run = self._run_deepfool elif self.attack_name in ('nes', 'spsa', 'nattack'): self._x_ph = tf.placeholder(self.model.x_dtype, shape=self.model.x_shape) self._x_adv_ph = tf.placeholder(self.model.x_dtype, shape=self.model.x_shape) self._score_based_data = iteration_callback( tf.reshape(self._x_ph, (1, *self.model.x_shape)), tf.reshape(self._x_adv_ph, (1, *self.model.x_shape)), ) self._run = self._run_score_based elif self.attack_name in ('boundary', 'evolutionary'): self._xs_ph = tf.placeholder(model.x_dtype, shape=(None, *model.x_shape)) self._xs_label = model.labels(self._xs_ph) self._run = self._run_decision_based else: raise NotImplementedError