Exemplo n.º 1
0
    def body(x_in, y_in, domain_in, i_in, cond_in, predictions):
        logits = model.get_logits(x_in)
        preds = tf.nn.softmax(logits)
        preds_onehot = tf.one_hot(tf.argmax(preds, axis=1), depth=nb_classes)
        tensor1 = tf.zeros((1, i_in * 10))
        tensor2 = tf.zeros((1, (max_iters - 1 - i_in) * 10))
        reshaped_preds = tf.concat([tensor1, preds, tensor2], 1)
        predictions = tf.add(predictions, reshaped_preds)

        list_derivatives = []
        for class_ind in xrange(nb_classes):
            derivatives = tf.gradients(logits[:, class_ind], x_in)
            list_derivatives.append(derivatives[0])

        if attack == "tjsma":
            grads0 = tf.reshape(tf.stack(list_derivatives),
                                shape=[nb_classes, -1, nb_features])

            grads = tf.reshape(1 - x_in, shape=[1, nb_features]) * grads0

            target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]),
                                      shape=[nb_classes, -1, 1])
            other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype)

            grads_target = reduce_sum(grads * target_class, axis=0)

        else:
            grads = tf.reshape(tf.stack(list_derivatives),
                               shape=[nb_classes, -1, nb_features])

            target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]),
                                      shape=[nb_classes, -1, 1])
            other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype)

            grads_target = reduce_sum(grads * target_class, axis=0)

        if attack == "tjsma" or attack == "wjsma":
            grads_other = reduce_sum(
                grads * other_classes *
                tf.reshape(preds, shape=[nb_classes, -1, 1]),
                axis=0)
        else:
            grads_other = reduce_sum(grads * other_classes, axis=0)

        increase_coef = (4 * int(increase) - 2) * tf.cast(
            tf.equal(domain_in, 0), tf_dtype)

        target_tmp = grads_target
        target_tmp -= increase_coef * reduce_max(
            tf.abs(grads_target), axis=1, keepdims=True)
        target_sum = tf.reshape(target_tmp, shape=[-1, nb_features, 1]) + \
            tf.reshape(target_tmp, shape=[-1, 1, nb_features])

        other_tmp = grads_other
        other_tmp += increase_coef * reduce_max(
            tf.abs(grads_other), axis=1, keepdims=True)
        other_sum = tf.reshape(other_tmp, shape=[-1, nb_features, 1]) + \
            tf.reshape(other_tmp, shape=[-1, 1, nb_features])

        if increase:
            scores_mask = ((target_sum > 0) & (other_sum < 0))
        else:
            scores_mask = ((target_sum < 0) & (other_sum > 0))

        scores = tf.cast(scores_mask,
                         tf_dtype) * (-target_sum * other_sum) * zero_diagonal

        best = tf.argmax(tf.reshape(scores,
                                    shape=[-1, nb_features * nb_features]),
                         axis=1)

        p1 = tf.mod(best, nb_features)
        p2 = tf.floordiv(best, nb_features)
        p1_one_hot = tf.one_hot(p1, depth=nb_features)
        p2_one_hot = tf.one_hot(p2, depth=nb_features)

        mod_not_done = tf.equal(reduce_sum(y_in * preds_onehot, axis=1), 0)
        cond = mod_not_done & (reduce_sum(domain_in, axis=1) >= 2)

        cond_float = tf.reshape(tf.cast(cond, tf_dtype), shape=[-1, 1])
        to_mod = (p1_one_hot + p2_one_hot) * cond_float

        domain_out = domain_in - to_mod

        to_mod_reshape = tf.reshape(to_mod,
                                    shape=([-1] + x_in.shape[1:].as_list()))

        if increase:
            x_out = tf.minimum(clip_max, x_in + to_mod_reshape * theta)
        else:
            x_out = tf.maximum(clip_min, x_in - to_mod_reshape * theta)

        i_out = tf.add(i_in, 1)
        cond_out = reduce_any(cond)

        return x_out, y_in, domain_out, i_out, cond_out, predictions
Exemplo n.º 2
0
    def body(x_in, y_in, domain_in, i_in, cond_in):

        preds = model.get_probs(x_in)
        preds_onehot = tf.one_hot(tf.argmax(preds, axis=1), depth=nb_classes)

        # create the Jacobian graph
        list_derivatives = []
        for class_ind in xrange(nb_classes):
            derivatives = tf.gradients(preds[:, class_ind], x_in)
            list_derivatives.append(derivatives[0])
        grads = tf.reshape(tf.stack(list_derivatives),
                           shape=[nb_classes, -1, nb_features])

        # Compute the Jacobian components
        # To help with the computation later, reshape the target_class
        # and other_class to [nb_classes, -1, 1].
        # The last dimention is added to allow broadcasting later.
        target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]),
                                  shape=[nb_classes, -1, 1])
        other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype)

        grads_target = reduce_sum(grads * target_class, axis=0)
        grads_other = reduce_sum(grads * other_classes, axis=0)

        # Remove the already-used input features from the search space
        # Subtract 2 times the maximum value from those value so that
        # they won't be picked later
        increase_coef = (4 * int(increase) - 2) \
            * tf.cast(tf.equal(domain_in, 0), tf_dtype)

        target_tmp = grads_target
        target_tmp -= increase_coef \
            * reduce_max(tf.abs(grads_target), axis=1, keepdims=True)
        target_sum = tf.reshape(target_tmp, shape=[-1, nb_features, 1]) \
            + tf.reshape(target_tmp, shape=[-1, 1, nb_features])

        other_tmp = grads_other
        other_tmp += increase_coef \
            * reduce_max(tf.abs(grads_other), axis=1, keepdims=True)
        other_sum = tf.reshape(other_tmp, shape=[-1, nb_features, 1]) \
            + tf.reshape(other_tmp, shape=[-1, 1, nb_features])

        # Create a mask to only keep features that match conditions
        if increase:
            scores_mask = ((target_sum > 0) & (other_sum < 0))
        else:
            scores_mask = ((target_sum < 0) & (other_sum > 0))

        # Create a 2D numpy array of scores for each pair of candidate features
        scores = tf.cast(scores_mask, tf_dtype) \
            * (-target_sum * other_sum) * zero_diagonal

        # Extract the best two pixels
        best = tf.argmax(tf.reshape(scores,
                                    shape=[-1, nb_features * nb_features]),
                         axis=1)

        p1 = tf.mod(best, nb_features)
        p2 = tf.floordiv(best, nb_features)
        p1_one_hot = tf.one_hot(p1, depth=nb_features)
        p2_one_hot = tf.one_hot(p2, depth=nb_features)

        # Check if more modification is needed for each sample
        mod_not_done = tf.equal(reduce_sum(y_in * preds_onehot, axis=1), 0)
        cond = mod_not_done & (reduce_sum(domain_in, axis=1) >= 2)

        # Update the search domain
        cond_float = tf.reshape(tf.cast(cond, tf_dtype), shape=[-1, 1])
        to_mod = (p1_one_hot + p2_one_hot) * cond_float

        domain_out = domain_in - to_mod

        # Apply the modification to the images
        to_mod_reshape = tf.reshape(to_mod,
                                    shape=([-1] + x_in.shape[1:].as_list()))
        if increase:
            x_out = tf.minimum(clip_max, x_in + to_mod_reshape * theta)
        else:
            x_out = tf.maximum(clip_min, x_in - to_mod_reshape * theta)

        # Increase the iterator, and check if all misclassifications are done
        i_out = tf.add(i_in, 1)
        cond_out = reduce_any(cond)

        return x_out, y_in, domain_out, i_out, cond_out