def test_train_with_feature_column_input(self): x1, x2 = np.array([[1.]]), np.array([[4., 5.]]) w = np.array([[2.], [3.], [6.]]) y = np.array([0.]) inputs = {'x1': x1, 'x2': x2, 'label': y} lr, adv_step_size = 0.001, 0.1 feature_columns = [ tf.feature_column.numeric_column('x1', shape=[1]), tf.feature_column.numeric_column('x2', shape=[2]), ] model = tf.keras.Sequential([ tf.keras.layers.DenseFeatures(feature_columns), tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w)), ]) adv_config = configs.make_adv_reg_config(multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2') adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) x = np.concatenate([x1, x2], axis=-1) # loss = |x * w|, gradient(loss, x) = w x_adv = x + adv_step_size * w.T / np.linalg.norm(w, ord=2) # gradient(loss, w) = x w_new = w - lr * (x + x_adv).T self.assertAllClose( w_new, tf.keras.backend.get_value(model.layers[1].weights[0]))
def test_train_with_2_inputs(self, name1, name2): x1, x2 = np.array([[1.]]), np.array([[4., 5.]]) w1, w2 = np.array([[2.]]), np.array([[3.], [6.]]) y = np.array([0.]) inputs = {name1: x1, name2: x2, 'label': y} lr, adv_step_size = 0.001, 0.1 input1 = tf.keras.Input(shape=(1,), name=name1) input2 = tf.keras.Input(shape=(2,), name=name2) dense1 = tf.keras.layers.Dense( w1.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w1)) dense2 = tf.keras.layers.Dense( w2.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w2)) output = tf.keras.layers.Add()([dense1(input1), dense2(input2)]) model = tf.keras.Model(inputs=[input1, input2], outputs=output) adv_config = configs.make_adv_reg_config( multiplier=1.0, adv_step_size=adv_step_size, adv_grad_norm='l2') adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MAE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) # loss = |x1 * w1 + x2 * w2|, gradient(loss, [x1, x2]) = [w1, w2] w_norm = np.sqrt((np.sum(w1 * w1) + np.sum(w2 * w2))) x1_adv, x2_adv = x1 + adv_step_size * w1.T / w_norm, x2 + adv_step_size * w2.T / w_norm # gradient(loss, [w1, w2]) = [x1, x2] w1_new, w2_new = w1 - lr * (x1 + x1_adv).T, w2 - lr * (x2 + x2_adv).T self.assertAllClose(w1_new, tf.keras.backend.get_value(dense1.weights[0])) self.assertAllClose(w2_new, tf.keras.backend.get_value(dense2.weights[0]))
def test_train_subclassed_base_model_with_label_input(self): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} class BaseModel(tf.keras.Model): def __init__(self): super(BaseModel, self).__init__() self.dense = tf.keras.layers.Dense( w.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w)) self.seen_input_keys = set() def call(self, inputs): self.seen_input_keys |= set(inputs.keys()) return self.dense(inputs['feature']) model = BaseModel() adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config, base_with_labels_in_features=True) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MSE', metrics=['mae']) adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertIn('label', model.seen_input_keys)
def test_evaluate_binary_classification_metrics(self): # multi-label binary classification model w = np.array([[4.0, 1.0, -5.0], [-3.0, 1.0, 2.0]]) x0 = np.array([[2.0, 3.0]]) y0 = np.array([[0.0, 1.0, 1.0]]) inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_sequential_model(input_shape=(2, ), weights=w) model.add(tf.keras.layers.Lambda(tf.sigmoid)) adv_model = adversarial_regularization.AdversarialRegularization(model) adv_model.compile(optimizer=tf.keras.optimizers.SGD(0.1), loss='squared_hinge', metrics=['accuracy', 'ce']) metrics_values = adv_model.evaluate(inputs, steps=1) results = dict(zip(adv_model.metrics_names, metrics_values)) y_hat = 1. / (1. + np.exp(-np.dot(x0, w)) ) # [[0.26894, 0.99331, 0.01799]] accuracy = np.mean(np.sign(y_hat - 0.5) == np.sign(y0 - 0.5)) # (1+1+0) / 3 cross_entropy = np.mean(y0 * -np.log(y_hat) + (1 - y0) * -np.log(1 - y_hat)) self.assertIn('binary_accuracy', results) self.assertIn('binary_crossentropy', results) self.assertAllClose(accuracy, results['binary_accuracy']) self.assertAllClose(cross_entropy, results['binary_crossentropy'])
def test_train_pgd(self, model_fn): w = np.array([[4.0], [-3.0]]) x0 = np.array([[2.0, 3.0]]) y0 = np.array([[0.0]]) adv_multiplier = 0.2 adv_step_size = 0.01 learning_rate = 0.01 pgd_iterations = 3 pgd_epsilon = 2.5 * adv_step_size adv_config = configs.make_adv_reg_config(multiplier=adv_multiplier, adv_step_size=adv_step_size, adv_grad_norm='infinity', pgd_iterations=pgd_iterations, pgd_epsilon=pgd_epsilon) y_hat = np.dot(x0, w) # The adversarial perturbation is constant across PGD iterations. x_adv = x0 + pgd_epsilon * np.sign((y_hat - y0) * w.T) y_hat_adv = np.dot(x_adv, w) grad_w_labeled_loss = 2. * (y_hat - y0) * x0.T grad_w_adv_loss = adv_multiplier * 2. * (y_hat_adv - y0) * x_adv.T w_new = w - learning_rate * (grad_w_labeled_loss + grad_w_adv_loss) inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = model_fn(input_shape=(2, ), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(tf.keras.optimizers.SGD(learning_rate), loss='MSE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_evaluate_classification_metrics(self): # multi-class logistic regression model w = np.array([[4.0, 1.0, -5.0], [-3.0, 1.0, 2.0]]) x0 = np.array([[2.0, 3.0]]) y0 = np.array([[1]]) inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_sequential_model(input_shape=(2, ), weights=w) model.add(tf.keras.layers.Softmax()) adv_model = adversarial_regularization.AdversarialRegularization(model) adv_model.compile(optimizer=tf.keras.optimizers.SGD(0.1), loss='sparse_categorical_crossentropy', metrics=['accuracy', 'ce']) metrics_values = adv_model.evaluate(inputs, steps=1) results = dict(zip(adv_model.metrics_names, metrics_values)) logit = np.dot(x0, w) # [[-1., 5., -4.]] accuracy = np.mean(np.argmax(logit, axis=-1) == y0) cross_entropy = np.log(np.sum(np.exp(logit))) - np.reshape( logit[:, y0], ()) self.assertIn('sparse_categorical_accuracy', results) self.assertIn('sparse_categorical_crossentropy', results) self.assertAllClose(accuracy, results['sparse_categorical_accuracy']) self.assertAllClose(cross_entropy, results['sparse_categorical_crossentropy'])
def test_train_with_2_outputs(self): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = { 'feature': tf.constant(x0), 'label1': tf.constant(y0), 'label2': tf.constant(-y0) } input_layer = tf.keras.Input(shape=(2, ), name='feature') layer1 = tf.keras.layers.Dense( w.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(w)) layer2 = tf.keras.layers.Dense( w.shape[-1], use_bias=False, kernel_initializer=tf.keras.initializers.Constant(-w)) model = tf.keras.Model( inputs={'feature': input_layer}, outputs=[layer1(input_layer), layer2(input_layer)]) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label1', 'label2'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MSE', metrics=[tf.keras.metrics.MeanAbsoluteError()]) history = adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) expected_metric = np.abs(y0 - np.dot(x0, w)).mean() self.assertAllClose(expected_metric, history.history['mean_absolute_error_label1'][0]) self.assertAllClose(expected_metric, history.history['mean_absolute_error_label2'][0])
def test_predict_by_base_model(self, model_fn): model = model_fn(input_shape=(2, ), weights=np.array([[1.0], [-1.0]])) inputs = {'feature': tf.constant([[5.0, 3.0]])} adv_model = adversarial_regularization.AdversarialRegularization(model) adv_model.compile(optimizer=tf.keras.optimizers.SGD(0.01), loss='MSE') prediction = model.predict(x=inputs, steps=1, batch_size=1) self.assertAllEqual([[2.0]], prediction)
def test_train_fgsm(self, model_fn): w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = model_fn(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MSE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_train_with_loss_object(self): w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_functional_model(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile( optimizer=tf.keras.optimizers.SGD(lr), loss=tf.keras.losses.MeanSquaredError()) adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_perturb_on_batch(self, model_fn): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = {'feature': x0, 'label': y0} model = model_fn(input_shape=(2, ), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss=['MSE']) adv_inputs = adv_model.perturb_on_batch(inputs) y_hat = np.dot(x0, w) adv_step_size = adv_config.adv_neighbor_config.adv_step_size x_adv = x0 + adv_step_size * np.sign((y_hat - y0) * w.T) self.assertAllClose(x_adv, adv_inputs['feature']) self.assertAllClose(y0, adv_inputs['label'])
def test_train_with_duplicated_metrics(self): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_functional_model(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile( optimizer=tf.keras.optimizers.SGD(lr), loss=['MSE'], metrics=[['MSE']]) history = adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertIn('mean_squared_error', history.history) self.assertIn('mean_squared_error_2', history.history) self.assertEqual(history.history['mean_squared_error'], history.history['mean_squared_error_2'])
def test_perturb_on_batch_custom_config(self): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = {'feature': x0, 'label': y0} model = build_linear_keras_functional_model(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss=['MSE']) adv_step_size = 0.2 # A different value from config.adv_step_size adv_inputs = adv_model.perturb_on_batch(inputs, adv_step_size=adv_step_size) y_hat = np.dot(x0, w) x_adv = x0 + adv_step_size * np.sign((y_hat - y0) * w.T) self.assertAllClose(x_adv, adv_inputs['feature']) self.assertAllClose(y0, adv_inputs['label'])
def test_train_fgsm_functional_model_diff_feature_key(self): # This test asserts that AdversarialRegularization works regardless of the # alphabetical order of feature and label keys in the input dictionary. This # is specifically for Keras Functional models because those models sort the # inputs by key. w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = {'the_feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_functional_model( input_shape=(2,), weights=w, input_name='the_feature') adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MSE') adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_train_with_metric_object(self): w, x0, y0, lr, adv_config, _ = self._set_up_linear_regression() inputs = {'feature': tf.constant(x0), 'label': tf.constant(y0)} model = build_linear_keras_functional_model(input_shape=(2, ), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=tf.keras.optimizers.SGD(lr), loss='MSE', metrics=[tf.keras.metrics.MeanAbsoluteError()]) history = adv_model.fit(x=inputs, batch_size=1, steps_per_epoch=1) actual_metric = history.history['mean_absolute_error'][0] expected_metric = np.abs(y0 - np.dot(x0, w)).mean() self.assertAllClose(expected_metric, actual_metric)
def test_train_with_loss_object(self): w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = tf.data.Dataset.from_tensor_slices({ 'feature': x0, 'label': y0 }).batch(NUM_REPLICAS) strategy = self._get_mirrored_strategy() with strategy.scope(): model = build_linear_keras_functional_model(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile( optimizer=keras.optimizers.SGD(lr), loss=tf.keras.losses.MeanSquaredError()) adv_model.fit(x=inputs) self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))
def test_train_with_distribution_strategy(self, model_fn): w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = tf.data.Dataset.from_tensor_slices({ 'feature': x0, 'label': y0 }).batch(1) strategy = tf.distribute.MirroredStrategy() with strategy.scope(): model = model_fn(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile( optimizer=tf.keras.optimizers.SGD(lr), loss='MSE', metrics=['mae']) adv_model.fit(x=inputs) self.assertAllClose(w_new, tf.keras.backend.get_value(model.weights[0]))
def test_train_with_distribution_strategy(self): w, x0, y0, lr, adv_config, w_new = self._set_up_linear_regression() inputs = tf.data.Dataset.from_tensor_slices({ 'feature': x0, 'label': y0 }).batch(NUM_REPLICAS) strategy = self._get_mirrored_strategy() with strategy.scope(): # Makes sure we are running on multiple devices. self.assertEqual(NUM_REPLICAS, strategy.num_replicas_in_sync) model = build_linear_keras_functional_model(input_shape=(2,), weights=w) adv_model = adversarial_regularization.AdversarialRegularization( model, label_keys=['label'], adv_config=adv_config) adv_model.compile(optimizer=keras.optimizers.SGD(lr), loss='MSE') adv_model.fit(x=inputs) # The updated weight should be the same regardless of the number of devices. self.assertAllClose(w_new, keras.backend.get_value(model.weights[0]))