def body(x_in, y_in, domain_in, i_in, cond_in, predictions): logits = model.get_logits(x_in) preds = tf.nn.softmax(logits) preds_onehot = tf.one_hot(tf.argmax(preds, axis=1), depth=nb_classes) tensor1 = tf.zeros((1, i_in * 10)) tensor2 = tf.zeros((1, (max_iters - 1 - i_in) * 10)) reshaped_preds = tf.concat([tensor1, preds, tensor2], 1) predictions = tf.add(predictions, reshaped_preds) list_derivatives = [] for class_ind in xrange(nb_classes): derivatives = tf.gradients(logits[:, class_ind], x_in) list_derivatives.append(derivatives[0]) if attack == "tjsma": grads0 = tf.reshape(tf.stack(list_derivatives), shape=[nb_classes, -1, nb_features]) grads = tf.reshape(1 - x_in, shape=[1, nb_features]) * grads0 target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]), shape=[nb_classes, -1, 1]) other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype) grads_target = reduce_sum(grads * target_class, axis=0) else: grads = tf.reshape(tf.stack(list_derivatives), shape=[nb_classes, -1, nb_features]) target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]), shape=[nb_classes, -1, 1]) other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype) grads_target = reduce_sum(grads * target_class, axis=0) if attack == "tjsma" or attack == "wjsma": grads_other = reduce_sum( grads * other_classes * tf.reshape(preds, shape=[nb_classes, -1, 1]), axis=0) else: grads_other = reduce_sum(grads * other_classes, axis=0) increase_coef = (4 * int(increase) - 2) * tf.cast( tf.equal(domain_in, 0), tf_dtype) target_tmp = grads_target target_tmp -= increase_coef * reduce_max( tf.abs(grads_target), axis=1, keepdims=True) target_sum = tf.reshape(target_tmp, shape=[-1, nb_features, 1]) + \ tf.reshape(target_tmp, shape=[-1, 1, nb_features]) other_tmp = grads_other other_tmp += increase_coef * reduce_max( tf.abs(grads_other), axis=1, keepdims=True) other_sum = tf.reshape(other_tmp, shape=[-1, nb_features, 1]) + \ tf.reshape(other_tmp, shape=[-1, 1, nb_features]) if increase: scores_mask = ((target_sum > 0) & (other_sum < 0)) else: scores_mask = ((target_sum < 0) & (other_sum > 0)) scores = tf.cast(scores_mask, tf_dtype) * (-target_sum * other_sum) * zero_diagonal best = tf.argmax(tf.reshape(scores, shape=[-1, nb_features * nb_features]), axis=1) p1 = tf.mod(best, nb_features) p2 = tf.floordiv(best, nb_features) p1_one_hot = tf.one_hot(p1, depth=nb_features) p2_one_hot = tf.one_hot(p2, depth=nb_features) mod_not_done = tf.equal(reduce_sum(y_in * preds_onehot, axis=1), 0) cond = mod_not_done & (reduce_sum(domain_in, axis=1) >= 2) cond_float = tf.reshape(tf.cast(cond, tf_dtype), shape=[-1, 1]) to_mod = (p1_one_hot + p2_one_hot) * cond_float domain_out = domain_in - to_mod to_mod_reshape = tf.reshape(to_mod, shape=([-1] + x_in.shape[1:].as_list())) if increase: x_out = tf.minimum(clip_max, x_in + to_mod_reshape * theta) else: x_out = tf.maximum(clip_min, x_in - to_mod_reshape * theta) i_out = tf.add(i_in, 1) cond_out = reduce_any(cond) return x_out, y_in, domain_out, i_out, cond_out, predictions
def body(x_in, y_in, domain_in, i_in, cond_in): preds = model.get_probs(x_in) preds_onehot = tf.one_hot(tf.argmax(preds, axis=1), depth=nb_classes) # create the Jacobian graph list_derivatives = [] for class_ind in xrange(nb_classes): derivatives = tf.gradients(preds[:, class_ind], x_in) list_derivatives.append(derivatives[0]) grads = tf.reshape(tf.stack(list_derivatives), shape=[nb_classes, -1, nb_features]) # Compute the Jacobian components # To help with the computation later, reshape the target_class # and other_class to [nb_classes, -1, 1]. # The last dimention is added to allow broadcasting later. target_class = tf.reshape(tf.transpose(y_in, perm=[1, 0]), shape=[nb_classes, -1, 1]) other_classes = tf.cast(tf.not_equal(target_class, 1), tf_dtype) grads_target = reduce_sum(grads * target_class, axis=0) grads_other = reduce_sum(grads * other_classes, axis=0) # Remove the already-used input features from the search space # Subtract 2 times the maximum value from those value so that # they won't be picked later increase_coef = (4 * int(increase) - 2) \ * tf.cast(tf.equal(domain_in, 0), tf_dtype) target_tmp = grads_target target_tmp -= increase_coef \ * reduce_max(tf.abs(grads_target), axis=1, keepdims=True) target_sum = tf.reshape(target_tmp, shape=[-1, nb_features, 1]) \ + tf.reshape(target_tmp, shape=[-1, 1, nb_features]) other_tmp = grads_other other_tmp += increase_coef \ * reduce_max(tf.abs(grads_other), axis=1, keepdims=True) other_sum = tf.reshape(other_tmp, shape=[-1, nb_features, 1]) \ + tf.reshape(other_tmp, shape=[-1, 1, nb_features]) # Create a mask to only keep features that match conditions if increase: scores_mask = ((target_sum > 0) & (other_sum < 0)) else: scores_mask = ((target_sum < 0) & (other_sum > 0)) # Create a 2D numpy array of scores for each pair of candidate features scores = tf.cast(scores_mask, tf_dtype) \ * (-target_sum * other_sum) * zero_diagonal # Extract the best two pixels best = tf.argmax(tf.reshape(scores, shape=[-1, nb_features * nb_features]), axis=1) p1 = tf.mod(best, nb_features) p2 = tf.floordiv(best, nb_features) p1_one_hot = tf.one_hot(p1, depth=nb_features) p2_one_hot = tf.one_hot(p2, depth=nb_features) # Check if more modification is needed for each sample mod_not_done = tf.equal(reduce_sum(y_in * preds_onehot, axis=1), 0) cond = mod_not_done & (reduce_sum(domain_in, axis=1) >= 2) # Update the search domain cond_float = tf.reshape(tf.cast(cond, tf_dtype), shape=[-1, 1]) to_mod = (p1_one_hot + p2_one_hot) * cond_float domain_out = domain_in - to_mod # Apply the modification to the images to_mod_reshape = tf.reshape(to_mod, shape=([-1] + x_in.shape[1:].as_list())) if increase: x_out = tf.minimum(clip_max, x_in + to_mod_reshape * theta) else: x_out = tf.maximum(clip_min, x_in - to_mod_reshape * theta) # Increase the iterator, and check if all misclassifications are done i_out = tf.add(i_in, 1) cond_out = reduce_any(cond) return x_out, y_in, domain_out, i_out, cond_out