def test_train_pgd(self, model_fn): w = np.array([[4.0], [-3.0]]) x0 = np.array([[2.0, 3.0]]) y0 = np.array([[0.0]]) adv_multiplier = 0.2 adv_step_size = 0.01 learning_rate = 0.01 pgd_iterations = 3 pgd_epsilon = 2.5 * adv_step_size adv_config = configs.make_adv_reg_config(multiplier=adv_multiplier, adv_step_size=adv_step_size, adv_grad_norm='infinity', pgd_iterations=pgd_iterations, pgd_epsilon=pgd_epsilon) y_hat = np.dot(x0, w) # The adversarial perturbation is constant across PGD iterations. x_adv = x0 + pgd_epsilon * np.sign((y_hat - y0) * w.T) y_hat_adv = np.dot(x_adv, w) grad_w_labeled_loss = 2. * (y_hat - y0) * x0.T grad_w_adv_loss = adv_multiplier * 2. * (y_hat_adv - y0) * x_adv.T w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss) inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = model_fn(input_shape=(2, ), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(tf.keras.optimizers.SGD(learning_rate), loss='MSE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_train_with_feature_column_input(self): x1, x2 = np.array([[1.]]), np.array([[4., 5.]]) w = np.array([[2.], [3.], [6.]]) y = np.array([0.]) inputs = {'x1': x1, 'x2': x2, 'label': y} lr, adv_step_size = 0.001, 0.1 feature_columns = [ tf.feature_column.numeric_column('x1', shape=[1]), tf.feature_column.numeric_column('x2', shape=[2]), ] model = tf.keras.Sequential([ tf.keras.layers.DenseFeatures(feature_columns), tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w)), ]) adv_config = configs.make_adv_reg_config(multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2') adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) x = np.concatenate([x1, x2], axis=-1) # loss = |x * w|, gradient(loss, x) = w x_adv = x + adv_step_size * w.T / np.linalg.norm(w, ord=2) # gradient(loss, w) = x w_new = w - lr * (x + x_adv).T self.assertAllClose( w_new, tf.keras.backend.get_value(model.layers[1].weights[0]))
def test_train_with_2_inputs(self, name1, name2): x1, x2 = np.array([[1.]]), np.array([[4., 5.]]) w1, w2 = np.array([[2.]]), np.array([[3.], [6.]]) y = np.array([0.]) inputs = {name1: x1, name2: x2, 'label': y} lr, adv_step_size = 0.001, 0.1 input1 = tf.keras.Input(shape=(1,), name=name1) input2 = tf.keras.Input(shape=(2,), name=name2) dense1 = tf.keras.layers.Dense( w1.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w1)) dense2 = tf.keras.layers.Dense( w2.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w2)) output = tf.keras.layers.Add()([dense1(input1), dense2(input2)]) model = tf.keras.Model(inputs=[input1, input2], outputs=output) adv_config = configs.make_adv_reg_config( multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2') adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) # loss = |x1 * w1 + x2 * w2|, gradient(loss, [x1, x2]) = [w1, w2] w_norm = np.sqrt((np.sum(w1 * w1) + np.sum(w2 * w2))) x1_adv, x2_adv = x1 + adv_step_size * w1.T / w_norm, x2 + adv_step_size * w2.T / w_norm # gradient(loss, [w1, w2]) = [x1, x2] w1_new, w2_new = w1 - lr * (x1 + x1_adv).T, w2 - lr * (x2 + x2_adv).T self.assertAllClose(w1_new, tf.keras.backend.get_value(dense1.weights[0])) self.assertAllClose(w2_new, tf.keras.backend.get_value(dense2.weights[0]))
def test_adversarial_wrapper_adds_regularization(self): # base model: y = w*x+b = 4*x1 + 3*x2 + 2 weight = np.array([[4.0], [3.0]], dtype=np.float32) bias = np.array([2.0], dtype=np.float32) x0, y0 = np.array([[1.0, 1.0]]), np.array([8.0]) adv_step_size = 0.1 learning_rate = 0.01 base_est = self.build_linear_regressor(weight=weight, bias=bias) adv_config = nsl_configs.make_adv_reg_config( multiplier=1.0, # equal weight on original and adv examples adv_step_size=adv_step_size) adv_est = nsl_estimator.add_adversarial_regularization( base_est, optimizer_fn=lambda: tf.train.GradientDescentOptimizer(learning_rate), adv_config=adv_config) input_fn = single_batch_input_fn({FEATURE_NAME: x0}, y0) adv_est.train(input_fn=input_fn, steps=1) # Computes the gradients on original and adversarial examples. orig_pred = np.dot(x0, weight) + bias # [9.0] orig_grad_w = 2 * (orig_pred - y0) * x0.T # [[2.0], [2.0]] orig_grad_b = 2 * (orig_pred - y0).reshape((1,)) # [2.0] grad_x = 2 * (orig_pred - y0) * weight.T # [[8.0, 6.0]] perturbation = adv_step_size * grad_x / np.linalg.norm(grad_x) x_adv = x0 + perturbation # [[1.08, 1.06]] adv_pred = np.dot(x_adv, weight) + bias # [9.5] adv_grad_w = 2 * (adv_pred - y0) * x_adv.T # [[3.24], [3.18]] adv_grad_b = 2 * (adv_pred - y0).reshape((1,)) # [3.0] new_bias = bias - learning_rate * (orig_grad_b + adv_grad_b) new_weight = weight - learning_rate * (orig_grad_w + adv_grad_w) self.assertAllClose(new_bias, adv_est.get_variable_value(BIAS_VARIABLE)) self.assertAllClose(new_weight, adv_est.get_variable_value(WEIGHT_VARIABLE))
def test_adversarial_wrapper_adds_regularization(self, adv_step_size, pgd_iterations, pgd_epsilon): # base model: y = w*x+b = 4*x1 + 3*x2 + 2 weight = np.array([[4.0], [3.0]], dtype=np.float32) bias = np.array([2.0], dtype=np.float32) x0, y0 = np.array([[1.0, 1.0]]), np.array([8.0]) learning_rate = 0.01 base_est = self.build_linear_regressor(weight=weight, bias=bias) adv_config = nsl_configs.make_adv_reg_config( multiplier=1.0, # equal weight on original and adv examples adv_step_size=adv_step_size, pgd_iterations=pgd_iterations, pgd_epsilon=pgd_epsilon) adv_est = nsl_estimator.add_adversarial_regularization( base_est, optimizer_fn=lambda: tf.train.GradientDescentOptimizer( learning_rate), adv_config=adv_config) input_fn = single_batch_input_fn({FEATURE_NAME: x0}, y0) adv_est.train(input_fn=input_fn, steps=1) # Computes the gradients on original and adversarial examples. orig_pred = np.dot(x0, weight) + bias # [9.0] orig_grad_w = 2 * (orig_pred - y0) * x0.T # [[2.0], [2.0]] orig_grad_b = 2 * (orig_pred - y0).reshape((1, )) # [2.0] grad_x = 2 * (orig_pred - y0) * weight.T # [[8.0, 6.0]] # Gradient direction is independent of x, so perturbing for multiple # iterations is the same as scaling the perturbation. perturbation_magnitude = pgd_iterations * adv_step_size if pgd_epsilon is not None: perturbation_magnitude = np.minimum(perturbation_magnitude, pgd_epsilon) perturbation = perturbation_magnitude * grad_x / np.linalg.norm(grad_x) x_adv = x0 + perturbation # fgm: [[1.08, 1.06]]; pgd: [[1.20, 1.15]] adv_pred = np.dot(x_adv, weight) + bias # fgm: [9.5]; pgd: [10.25] adv_grad_w = 2 * (adv_pred - y0) * x_adv.T # fgm: [[3.24], [3.18]] adv_grad_b = 2 * (adv_pred - y0).reshape( (1, )) # fgm: [3.0]; pgd: [4.5] new_bias = bias - learning_rate * (orig_grad_b + adv_grad_b) new_weight = weight - learning_rate * (orig_grad_w + adv_grad_w) self.assertAllClose(new_bias, adv_est.get_variable_value(BIAS_VARIABLE)) self.assertAllClose(new_weight, adv_est.get_variable_value(WEIGHT_VARIABLE))
def _set_up_linear_regression(self, sample_weight=1.0): w = np.array([[4.0], [-3.0]]) x0 = np.array([[2.0, 3.0]]) y0 = np.array([[0.0]]) adv_multiplier = 0.2 adv_step_size = 0.01 learning_rate = 0.01 adv_config = configs.make_adv_reg_config(multiplier=adv_multiplier, adv_step_size=adv_step_size, adv_grad_norm='infinity') y_hat = np.dot(x0, w) x_adv = x0 + adv_step_size * np.sign((y_hat - y0) * w.T) y_hat_adv = np.dot(x_adv, w) grad_w_labeled_loss = sample_weight * 2. * (y_hat - y0) * x0.T grad_w_adv_loss = adv_multiplier * sample_weight * 2. * (y_hat_adv - y0) * x_adv.T w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss) return w, x0, y0, learning_rate, adv_config, w_new
def setUp(self): super(AdversarialLossTest, self).setUp() self.adv_step_size = 0.01 self.adv_config = configs.make_adv_reg_config( adv_step_size=self.adv_step_size, adv_grad_norm='infinity')