示例#1
0
    def __init__(self, attack_name, model, batch_size, dataset_name, goal,
                 distance_metric, session, **kwargs):
        ''' Initialize AttackBenchmark.

        :param attack_name: The attack method's name. All valid values are ``'fgsm'``, ``'bim'``, ``'pgd'``, ``'mim'``,
            ``'cw'``, ``'deepfool'``, ``'nes'``, ``'spsa'``, ``'nattack'``, ``'boundary'``, ``'evolutionary'``.
        :param model: The classifier model to run the attack on.
        :param batch_size: Batch size for attack.
        :param dataset_name: The dataset's name. All valid values are ``'cifar10'`` and ``'imagenet'``.
        :param goal: The adversarial goal for the attack method. All valid values are ``'t'`` for targeted attack,
            ``'tm'`` for targeted missclassification attack, and ``'ut'`` for untargeted attack.
        :param distance_metric: The adversarial distance metric for the attack method. All valid values are ``'l_2'``
            and ``'l_inf'``.
        :param session: The ``tf.Session`` instance for the attack to run in.
        :param kwargs: Other keyword arguments to pass to the attack method's initialization function.
        '''
        init_kwargs = dict()
        init_kwargs['model'] = model
        init_kwargs['batch_size'] = batch_size
        init_kwargs[
            'samples_batch_size'] = batch_size  # use same batch_size for nes, spsa, nattack's samples
        init_kwargs['goal'] = goal
        init_kwargs['distance_metric'] = distance_metric
        init_kwargs['session'] = session
        for k, v in kwargs.items():
            init_kwargs[k] = v

        self.model = model
        self.attack_name, self.dataset_name = attack_name, dataset_name
        self.batch_size, self.goal, self.distance_metric, self.session = batch_size, goal, distance_metric, session
        self.attack = load_attack(attack_name, init_kwargs)

        self.xs_ph = tf.placeholder(model.x_dtype,
                                    shape=(None, *model.x_shape))
        self.xs_label = model.labels(self.xs_ph)
示例#2
0
    def __init__(self,
                 attack_name,
                 model,
                 batch_size,
                 goal,
                 distance_metric,
                 session,
                 distortion,
                 confidence=0.0,
                 search_steps=5,
                 binsearch_steps=10,
                 nes_lr_factor=None,
                 nes_min_lr_factor=None,
                 spsa_lr_factor=None,
                 **kwargs):
        ''' Initialize DistortionBenchmark.

        :param attack_name: The attack method's name. All valid values are 'fgsm', 'bim', 'pgd', 'mim', 'cw',
            'deepfool', 'nes', 'spsa', 'nattack'.
        :param model: The classifier model to run the attack on.
        :param batch_size: Batch size for attack.
        :param goal: The adversarial goal for the attack method. All valid values are 't' for targeted attack, 'tm' for
            targeted missclassification attack, and 'ut' for untargeted attack.
        :param distance_metric: The adversarial distance metric for the attack method. All valid values are 'l_2' and
            'l_inf'.
        :param session: The `tf.Session` instance for the attack to run in.
        :param distortion: Initial distortion. When doing search on attack magnitude, it is used as the starting point.
        :param confidence: For white box attacks, consider the adversarial as succeed only when the margin between top-2
            logits is larger than the confidence.
        :param search_steps: Search steps for finding an initial adversarial distortion.
        :param binsearch_steps: Binary search steps for refining the initial adversarial distortion.
        :param nes_lr_factor: The nes attack's `lr` parameter is set to `nes_lr_factor * magnitude`.
        :param nes_min_lr_factor: The nes attack's `min_lr` parameter is set to `nes_min_lr_factor * magnitude`.
        :param spsa_lr_factor: The spsa attack's `lr` parameter is set to `spsa_lr_factor * magnitude`.
        :param kwargs: Other keyword arguments to pass to the attack method's initialization function.
        '''
        self.init_distortion = distortion
        self.confidence = confidence
        self.search_steps = search_steps
        self.binsearch_steps = binsearch_steps
        self.distance_metric = distance_metric

        init_kwargs = dict()
        init_kwargs['model'] = model
        init_kwargs['batch_size'] = batch_size
        init_kwargs[
            'samples_batch_size'] = batch_size  # use same batch_size for nes, spsa, nattack's samples
        init_kwargs['goal'] = goal
        init_kwargs['distance_metric'] = distance_metric
        init_kwargs['session'] = session
        for k, v in kwargs.items():
            init_kwargs[k] = v

        self.model = model
        self.attack_name = attack_name
        self.batch_size, self.goal, self.distance_metric = batch_size, goal, distance_metric
        self.attack = load_attack(attack_name, init_kwargs)
        self.nes_lr_factor, self.nes_min_lr_factor = nes_lr_factor, nes_min_lr_factor
        self.spsa_lr_factor = spsa_lr_factor

        self._session = session

        self._xs_ph = tf.placeholder(self.model.x_dtype,
                                     shape=(None, *self.model.x_shape))

        if self.attack_name == 'fgsm':
            self._logits = self.model.logits(self._xs_ph)
            self._run = self._run_binsearch
        elif self.attack_name in ('bim', 'pgd', 'mim'):
            self._logits = self.model.logits(self._xs_ph)
            self._run = self._run_binsearch_alpha
        elif self.attack_name in ('cw', 'deepfool'):
            self._run = self._run_optimized
        elif self.attack_name in ('nes', 'spsa', 'nattack'):
            self._logits = self.model.logits(self._xs_ph)
            self._run = self._run_binsearch_nes_family
        else:
            raise NotImplementedError
示例#3
0
    def __init__(self,
                 iteration,
                 attack_name,
                 model,
                 batch_size,
                 dataset_name,
                 goal,
                 distance_metric,
                 session,
                 cw_n_points=10,
                 **kwargs):
        ''' Initialize IterationBenchmark.

        :param iteration: The iteration count. For 'bim', 'pgd', 'mim', 'cw', 'deepfool' attack, it would be passed to
            the attack as the `iteration` configuration parameter. For 'nes', 'spsa', 'nattack', 'boundary',
            'evolutionary' attack, it would be passed to the attack as the `max_queries` configuration parameter.
        :param attack_name: The attack method's name. All valid values are 'bim', 'pgd', 'mim', 'cw', 'deepfool', 'nes',
            'spsa', 'nattack', 'boundary', 'evolutionary'.
        :param model: The classifier model to run the attack on.
        :param batch_size: Batch size for attack.
        :param dataset_name: The dataset's name. All valid values are 'cifar10' and 'imagenet'.
        :param goal: The adversarial goal for the attack method. All valid values are 't' for targeted attack, 'tm' for
            targeted missclassification attack, and 'ut' for untargeted attack.
        :param distance_metric: The adversarial distance metric for the attack method. All valid values are 'l_2' and
            'l_inf'.
        :param session: The `tf.Session` instance for the attack to run in.
        :param cw_n_points: How many times should we run 'cw' attack for the benchmark. To get the benchmark result for
            'cw' attack, we need to run it for each iteration parameter we are interested in. Since the computation cost
            for C&W attack is huge, we select `cw_n_points` numbers between 0 and `iteration` uniformly as the iteration
            parameter to reduce the computation cost.
        :param kwargs: Other keyword arguments to pass to the attack method's initialization function.
        '''
        self.iteration = iteration

        def iteration_callback(xs, xs_adv):
            labels = model.labels(xs_adv)

            delta_xs = tf.reshape(xs_adv - xs, (xs.shape[0], -1))
            if distance_metric == 'l_2':
                dists = tf.linalg.norm(delta_xs, axis=1)
            else:  # 'l_inf'
                dists = tf.reduce_max(tf.abs(delta_xs), axis=1)

            return labels, dists

        init_kwargs = dict()
        init_kwargs['model'] = model
        init_kwargs['batch_size'] = batch_size
        init_kwargs[
            'samples_batch_size'] = batch_size  # use same batch_size for nes, spsa, nattack's samples
        init_kwargs['goal'] = goal
        init_kwargs['distance_metric'] = distance_metric
        init_kwargs['session'] = session
        for k, v in kwargs.items():
            init_kwargs[k] = v
        init_kwargs['iteration_callback'] = iteration_callback

        self.model = model
        self.attack_name, self.dataset_name = attack_name, dataset_name
        self.batch_size, self.goal, self.distance_metric = batch_size, goal, distance_metric
        self.attack = load_attack(attack_name, init_kwargs)
        self.cw_n_points = cw_n_points

        self._session = session

        if self.attack_name in ('bim', 'pgd', 'mim'):
            self._run = self._run_basic
        elif self.attack_name == 'cw':
            self._xs_ph = tf.placeholder(self.model.x_dtype,
                                         shape=(self.batch_size,
                                                *self.model.x_shape))
            self._xs_adv_ph = tf.placeholder(self.model.x_dtype,
                                             shape=(self.batch_size,
                                                    *self.model.x_shape))
            self._cw_data = iteration_callback(self._xs_ph, self._xs_adv_ph)
            self._run = self._run_cw
        elif self.attack_name == 'deepfool':
            self._run = self._run_deepfool
        elif self.attack_name in ('nes', 'spsa', 'nattack'):
            self._x_ph = tf.placeholder(self.model.x_dtype,
                                        shape=self.model.x_shape)
            self._x_adv_ph = tf.placeholder(self.model.x_dtype,
                                            shape=self.model.x_shape)
            self._score_based_data = iteration_callback(
                tf.reshape(self._x_ph, (1, *self.model.x_shape)),
                tf.reshape(self._x_adv_ph, (1, *self.model.x_shape)),
            )
            self._run = self._run_score_based
        elif self.attack_name in ('boundary', 'evolutionary'):
            self._xs_ph = tf.placeholder(model.x_dtype,
                                         shape=(None, *model.x_shape))
            self._xs_label = model.labels(self._xs_ph)
            self._run = self._run_decision_based
        else:
            raise NotImplementedError