示例#1
0
    def __init__(self, model, batch_size, loss, goal, distance_metric,
                 session):
        ''' Initialize FGSM.

        :param model: The model to attack. A ``ares.model.Classifier`` instance.
        :param batch_size: Batch size for the ``batch_attack()`` method.
        :param loss: The loss function to optimize. A ``ares.loss.Loss`` instance.
        :param goal: Adversarial goals. All supported values are ``'t'``, ``'tm'``, and ``'ut'``.
        :param distance_metric: Adversarial distance metric. All supported values are ``'l_2'`` and ``'l_inf'``.
        :param session: The ``tf.Session`` to run the attack in. The ``model`` should be loaded into this session.
        '''
        self.model, self.batch_size, self._session = model, batch_size, session
        self.loss, self.goal, self.distance_metric = loss, goal, distance_metric
        # placeholder for batch_attack's input
        self.xs_ph = get_xs_ph(model, batch_size)
        self.ys_ph = get_ys_ph(model, batch_size)
        # magnitude
        self.eps_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, ))
        self.eps_var = tf.Variable(
            tf.zeros((self.batch_size, ), dtype=self.model.x_dtype))
        # calculate loss' gradient with relate to the input
        grad = tf.gradients(self.loss(self.xs_ph, self.ys_ph), self.xs_ph)[0]
        if goal == 't' or goal == 'tm':
            grad = -grad
        elif goal != 'ut':
            raise NotImplementedError
        # flatten the gradient for easier broadcast operations
        grad_flatten = tf.reshape(grad, (batch_size, -1))
        # calculate update
        if distance_metric == 'l_2':
            grad_unit = get_unit(grad_flatten)
            update = tf.expand_dims(self.eps_var, 1) * grad_unit
        elif distance_metric == 'l_inf':
            update = tf.expand_dims(self.eps_var, 1) * tf.sign(grad_flatten)
        else:
            raise NotImplementedError
        update = tf.reshape(update, (self.batch_size, *self.model.x_shape))
        # clip by (x_min, x_max)
        self.xs_adv = tf.clip_by_value(self.xs_ph + update, self.model.x_min,
                                       self.model.x_max)

        self.config_eps_step = self.eps_var.assign(self.eps_ph)
示例#2
0
文件: bim.py 项目: ShawnXYang/ares
    def __init__(self,
                 model,
                 batch_size,
                 loss,
                 goal,
                 distance_metric,
                 session,
                 iteration_callback=None):
        ''' Initialize BIM.

        :param model: The model to attack. A ``ares.model.Classifier`` instance.
        :param batch_size: Batch size for the ``batch_attack()`` method.
        :param loss: The loss function to optimize. A ``ares.loss.Loss`` instance.
        :param goal: Adversarial goals. All supported values are ``'t'``, ``'tm'``, and ``'ut'``.
        :param distance_metric: Adversarial distance metric. All supported values are ``'l_2'`` and ``'l_inf'``.
        :param session: The ``tf.Session`` to run the attack in. The ``model`` should be loaded into this session.
        :param iteration_callback: A function accept a ``xs`` ``tf.Tensor`` (the original examples) and a ``xs_adv``
            ``tf.Tensor`` (the adversarial examples for ``xs``). During ``batch_attack()``, this callback function would
            be runned after each iteration, and its return value would be yielded back to the caller. By default,
            ``iteration_callback`` is ``None``.
        '''
        self.model, self.batch_size, self._session = model, batch_size, session
        self.loss, self.goal, self.distance_metric = loss, goal, distance_metric
        # placeholder for batch_attack's input
        self.xs_ph = get_xs_ph(model, batch_size)
        self.ys_ph = get_ys_ph(model, batch_size)
        # flatten shape of xs_ph
        xs_flatten_shape = (batch_size, np.prod(self.model.x_shape))
        # store xs and ys in variables to reduce memory copy between tensorflow and python
        # variable for the original example with shape of (batch_size, D)
        self.xs_var = tf.Variable(
            tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype))
        # variable for labels
        self.ys_var = tf.Variable(
            tf.zeros(shape=(batch_size, ), dtype=self.model.y_dtype))
        # variable for the (hopefully) adversarial example with shape of (batch_size, D)
        self.xs_adv_var = tf.Variable(
            tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype))
        # magnitude
        self.eps_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, ))
        self.eps_var = tf.Variable(
            tf.zeros((self.batch_size, ), dtype=self.model.x_dtype))
        # step size
        self.alpha_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, ))
        self.alpha_var = tf.Variable(
            tf.zeros((self.batch_size, ), dtype=self.model.x_dtype))
        # expand dim for easier broadcast operations
        eps = tf.expand_dims(self.eps_var, 1)
        alpha = tf.expand_dims(self.alpha_var, 1)
        # calculate loss' gradient with relate to the adversarial example
        # grad.shape == (batch_size, D)
        self.xs_adv_model = tf.reshape(self.xs_adv_var,
                                       (batch_size, *self.model.x_shape))
        self.loss = loss(self.xs_adv_model, self.ys_var)
        grad = tf.gradients(self.loss, self.xs_adv_var)[0]
        if goal == 't' or goal == 'tm':
            grad = -grad
        elif goal != 'ut':
            raise NotImplementedError
        # update the adversarial example
        if distance_metric == 'l_2':
            grad_unit = get_unit(grad)
            xs_adv_delta = self.xs_adv_var - self.xs_var + alpha * grad_unit
            # clip by max l_2 magnitude of adversarial noise
            xs_adv_next = self.xs_var + tf.clip_by_norm(
                xs_adv_delta, eps, axes=[1])
        elif distance_metric == 'l_inf':
            xs_lo, xs_hi = self.xs_var - eps, self.xs_var + eps
            grad_sign = tf.sign(grad)
            # clip by max l_inf magnitude of adversarial noise
            xs_adv_next = tf.clip_by_value(self.xs_adv_var + alpha * grad_sign,
                                           xs_lo, xs_hi)
        else:
            raise NotImplementedError
        # clip by (x_min, x_max)
        xs_adv_next = tf.clip_by_value(xs_adv_next, self.model.x_min,
                                       self.model.x_max)

        self.update_xs_adv_step = self.xs_adv_var.assign(xs_adv_next)
        self.config_eps_step = self.eps_var.assign(self.eps_ph)
        self.config_alpha_step = self.alpha_var.assign(self.alpha_ph)
        self.setup_xs = [
            self.xs_var.assign(tf.reshape(self.xs_ph, xs_flatten_shape)),
            self.xs_adv_var.assign(tf.reshape(self.xs_ph, xs_flatten_shape))
        ]
        self.setup_ys = self.ys_var.assign(self.ys_ph)
        self.iteration = None

        self.iteration_callback = None
        if iteration_callback is not None:
            xs_model = tf.reshape(self.xs_var,
                                  (self.batch_size, *self.model.x_shape))
            self.iteration_callback = iteration_callback(
                xs_model, self.xs_adv_model)
示例#3
0
文件: cw.py 项目: Fugoes/realsafe
    def __init__(self,
                 model,
                 batch_size,
                 goal,
                 distance_metric,
                 session,
                 cw_loss_c=99999.0,
                 confidence=0.0,
                 learning_rate=0.01):
        ''' Initialize CW.

        :param model: The model to attack. A ``ares.model.ClassifierWithLogits`` instance.
        :param batch_size: Batch size for the ``batch_attack()`` method.
        :param goal: Adversarial goals. All supported values are ``'t'``, ``'tm'``, and ``'ut'``.
        :param session: The ``tf.Session`` to run the attack in. The ``model`` should be loaded into this session.
        :param cw_loss_c: The ``c`` parameter for ``ares.loss.CWLoss``.
        :param confidence: The minimum margin between the target logit and the second largest logit that we consider the
            example as adversarial.
        :param learning_rate: Learning rate for the ``AdamOptimizer``.
        '''
        self.model, self.batch_size, self._session = model, batch_size, session
        self.goal, self.distance_metric = goal, distance_metric
        self.confidence = confidence

        # flatten shape of xs_ph
        xs_shape_flatten = (self.batch_size, np.prod(self.model.x_shape))
        # placeholder for batch_attack's input
        self.xs_ph, self.ys_ph = get_xs_ph(model, self.batch_size), get_ys_ph(
            model, self.batch_size)
        # store adversarial examples and labels in variables to reduce memory copy between tensorflow and python
        xs_var = tf.Variable(
            tf.zeros(shape=xs_shape_flatten, dtype=self.model.x_dtype))
        ys_var = tf.Variable(tf.zeros_like(self.ys_ph))
        # placeholder for c
        self.cs_ph = tf.placeholder(self.model.x_dtype, (self.batch_size, ))
        cs_var = tf.Variable(tf.zeros_like(self.cs_ph))
        # xs_adv = tanh(ws)
        d_ws = tf.Variable(
            tf.zeros(shape=xs_shape_flatten, dtype=self.model.x_dtype))
        ws = tf.atanh(self._scale_to_tanh(xs_var)) + d_ws
        self.xs_adv = self._scale_to_model(tf.tanh(ws))
        self.xs_adv_model = tf.reshape(self.xs_adv,
                                       (self.batch_size, *self.model.x_shape))
        # the C&W loss term
        cw_loss = CWLoss(self.model)(self.xs_adv_model, ys_var)
        self.logits = self.model.logits(self.xs_adv_model)
        if self.goal == 't' or self.goal == 'tm':
            self.score = tf.maximum(0.0, cw_loss + confidence)
        elif self.goal == 'ut':
            self.score = tf.maximum(0.0, tf.negative(cw_loss) + confidence)
        else:
            raise NotImplementedError
        # the distance term
        if self.distance_metric == 'l_2':
            self.dists = tf.reduce_sum(tf.square(self.xs_adv - xs_var), axis=1)
        else:
            raise NotImplementedError
        # the loss
        loss = self.dists + cs_var * self.score
        # minimize the loss using Adam
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.optimizer_step = optimizer.minimize(loss, var_list=[d_ws])
        self.setup_optimizer = tf.variables_initializer(optimizer.variables())

        self.setup_xs = xs_var.assign(tf.reshape(self.xs_ph, xs_shape_flatten))
        self.setup_ys = ys_var.assign(self.ys_ph)
        self.setup_cs = cs_var.assign(self.cs_ph)
        self.setup_d_ws = d_ws.assign(tf.zeros_like(d_ws))

        # provides default values
        self.iteration = 50
        self.search_steps = 2
        self.binsearch_steps = 10

        self.details = {}
        self.logger = None
示例#4
0
    def __init__(self,
                 model,
                 batch_size,
                 distance_metric,
                 session,
                 iteration_callback=None):
        ''' Initialize DeepFool.

        :param model: The model to attack. A ``ares.model.ClassifierWithLogits`` instance.
        :param batch_size: Batch size for the ``batch_attack()`` method.
        :param distance_metric: Adversarial distance metric. All supported values are ``'l_2'`` and ``'l_inf'``.
        :param session: The ``tf.Session`` to run the attack in. The ``model`` should be loaded into this session.
        :param iteration_callback: A function accept a ``xs`` ``tf.Tensor`` (the original examples) and a ``xs_adv``
            ``tf.Tensor`` (the adversarial examples for ``xs``). During ``batch_attack()``, this callback function would
            be runned after each iteration, and its return value would be yielded back to the caller. By default,
            ``iteration_callback`` is ``None``.
        '''
        self.model, self.batch_size, self._session = model, batch_size, session
        self.overshot = tf.Variable(0.02)
        self.overshot_ph = tf.placeholder(tf.float32)
        # placeholder for batch_attack's input
        self.xs_ph = get_xs_ph(model, batch_size)
        self.ys_ph = get_ys_ph(model, batch_size)
        # store xs, xs_adv and ys in variables to reduce memory copy between tensorflow and python
        # flatten shape of xs_ph
        xs_flatten_shape = (batch_size, np.prod(self.model.x_shape))
        # variable for the original example with shape of (batch_size, D)
        self.xs_var = tf.Variable(
            tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype))
        # variable for labels
        self.ys_var = tf.Variable(
            tf.zeros(shape=(batch_size, ), dtype=self.model.y_dtype))
        # variable for the (hopefully) adversarial example with shape of (batch_size, D)
        self.xs_adv_var = tf.Variable(
            tf.zeros(shape=xs_flatten_shape, dtype=self.model.x_dtype))
        # get the adversarial example's logits and labels
        logits, self.labels = self.model.logits_and_labels(
            xs=tf.reshape(self.xs_adv_var, (batch_size, ) +
                          self.model.x_shape))
        # we need to calculate the jacobian step by step
        self.grads_var = tf.Variable(
            tf.zeros((self.batch_size, self.model.n_class,
                      np.prod(self.model.x_shape)),
                     dtype=self.model.x_dtype))
        # calculating jacobian would construct a large graph
        self.assign_grads = [
            self.grads_var[:, i, :].assign(
                tf.gradients(logits[:, i], self.xs_adv_var)[0])
            for i in range(self.model.n_class)
        ]
        # get the target label's logits and jacobian
        k0s = tf.stack((tf.range(self.batch_size), self.ys_var), axis=1)
        yk0s = tf.expand_dims(tf.gather_nd(logits, k0s), axis=1)
        gradk0s = tf.expand_dims(tf.gather_nd(self.grads_var, k0s), axis=1)

        fs = tf.abs(yk0s - logits)
        ws = self.grads_var - gradk0s

        ws_norm = tf.norm(ws, axis=-1)
        # for index = k0, ws_norm = 0.0, fs = 0.0, ls = 0.0 / 0.0 = NaN, and tf.argmin would ignore NaN
        ls = fs / ws_norm
        ks = tf.argmin(ls, axis=1, output_type=self.model.y_dtype)
        ks = tf.stack((tf.range(self.batch_size), ks), axis=1)

        fsks = tf.gather_nd(fs, ks)
        ws_normks = tf.gather_nd(ws_norm, ks)
        if distance_metric == 'l_2':
            wsks = tf.gather_nd(ws, ks)
            rs = tf.reshape(fsks / tf.square(ws_normks),
                            (self.batch_size, 1)) * wsks
        elif distance_metric == 'l_inf':
            ws_sign_ks = tf.gather_nd(tf.sign(ws), ks)
            rs = tf.reshape(fsks / ws_normks,
                            (self.batch_size, 1)) * ws_sign_ks
        else:
            raise NotImplementedError

        # if the xs_adv is adversarial, we do early stop.
        self.eqs = tf.equal(self.labels, self.ys_var)
        flags = tf.reshape(
            tf.cast(self.eqs, self.model.x_dtype) * (1 + self.overshot),
            (self.batch_size, 1))
        xs_adv_next = self.xs_adv_var + flags * rs
        xs_adv_next = tf.clip_by_value(xs_adv_next, self.model.x_min,
                                       self.model.x_max)

        self.update_xs_adv_step = self.xs_adv_var.assign(xs_adv_next)
        self.setup = [
            self.grads_var.initializer,
            self.xs_var.assign(tf.reshape(self.xs_ph, self.xs_var.shape)),
            self.xs_adv_var.assign(
                tf.reshape(self.xs_ph, self.xs_adv_var.shape)),
            self.ys_var.assign(self.ys_ph),
        ]
        self.setup_overshot = self.overshot.assign(self.overshot_ph)

        self.iteration_callback = None
        if iteration_callback is not None:
            xs_model = tf.reshape(self.xs_var,
                                  (self.batch_size, *self.model.x_shape))
            xs_adv_model = tf.reshape(self.xs_adv_var,
                                      (self.batch_size, *self.model.x_shape))
            self.iteration_callback = iteration_callback(
                xs_model, xs_adv_model)

        self.iteration = None
        self.details = {}