def perturb_on_batch(self, x, **config_kwargs): """Perturbs the given input to generates adversarial examples. Args: x: Input examples to be perturbed, in a dictionary of Numpy arrays, `Tensor`, `SparseTensor`, or `RaggedTensor` objects. The first dimension of all tensors or arrays should be the same (i.e. batch size). **config_kwargs: (optional) hyperparameters for generating adversarial preturbation. Any keyword argument here will overwrite the corresponding field in `nsl.configs.AdvNeighborConfig` specified in `__init__`. Acceptable keys: `feature_mask`, `adv_step_size`, `adv_grad_norm`, `clip_value_min`, `clip_value_max`, `pgd_iterations`, and `pgd_epsilon`. Returns: A dictionary of NumPy arrays, `SparseTensor`, or `RaggedTensor` objects of the generated adversarial examples. """ inputs = tf.nest.map_structure(tf.convert_to_tensor, x, expand_composites=True) labels, sample_weights = self._extract_labels_and_weights(inputs) _, labeled_loss, _, tape = self._forward_pass(inputs, labels, sample_weights, {'training': False}) config_kwargs = { k: v for k, v in config_kwargs.items() if v is not None } config = attr.evolve(self.adv_config.adv_neighbor_config, **config_kwargs) adv_inputs, _ = nsl_lib.gen_adv_neighbor( inputs, labeled_loss, config=config, gradient_tape=tape, pgd_model_fn=self._call_base_model, pgd_loss_fn=self._compute_total_loss, pgd_labels=labels) if tf.executing_eagerly(): # Converts `Tensor` objects to NumPy arrays and keeps other objects (e.g. # `SparseTensor`) as-is. adv_inputs = tf.nest.map_structure(lambda x: x.numpy() if hasattr(x, 'numpy') else x, adv_inputs, expand_composites=False) else: adv_inputs = tf.keras.backend.function([], adv_inputs)([]) # Inserts the labels and sample_weights back to the input dictionary, so # the returned input has the same structure as the original input. for label_key, label in zip(self.label_keys, labels): adv_inputs[label_key] = label if self.sample_weight_key is not None: adv_inputs[self.sample_weight_key] = sample_weights return adv_inputs
def adversarial_loss(features, labels, model, loss_fn, sample_weights=None, adv_config=None, predictions=None, labeled_loss=None, gradient_tape=None, model_kwargs=None): """Computes the adversarial loss for `model` given `features` and `labels`. This utility function adds adversarial perturbations to the input `features`, runs the `model` on the perturbed features for predictions, and returns the corresponding loss `loss_fn(labels, model(perturbed_features))`. This function can be used in a Keras subclassed model and a custom training loop. This can also be used freely as a helper function in eager execution mode. The adversarial perturbation is based on the gradient of the labeled loss on the original input features, i.e. `loss_fn(labels, model(features))`. Therefore, this function needs to compute the model's predictions on the input features as `model(features)`, and the labeled loss as `loss_fn(labels, predictions)`. If predictions or labeled loss have already been computed, they can be passed in via the `predictions` and `labeled_loss` arguments in order to save computational resources. Note that in eager execution mode, `gradient_tape` needs to be set accordingly when passing in `predictions` or `labeled_loss`, so that the gradient can be computed correctly. Example: ```python # A linear regression model (for demonstrating the usage only) model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(2,))]) loss_fn = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD() # Custom training loop. (The actual training data is omitted for clarity.) for x, y in train_dataset: with tf.GradientTape() as tape_w: # A separate GradientTape is needed for watching the input. with tf.GradientTape() as tape_x: tape_x.watch(x) # Regular forward pass. labeled_loss = loss_fn(y, model(x)) # Calculates the adversarial loss. This will reuse labeled_loss and will # consume tape_x. adv_loss = nsl.keras.adversarial_loss( x, y, model, loss_fn, labeled_loss=labeled_loss, gradient_tape=tape_x) # Combines both losses. This could also be a weighted combination. total_loss = labeled_loss + adv_loss # Regular backward pass. gradients = tape_w.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) ``` Arguments: features: Input features, should be a `Tensor` or a collection of `Tensor` objects. If it is a collection, the first dimension of all `Tensor` objects inside should be the same (i.e. batch size). labels: Target labels. model: A callable that takes `features` as inputs and computes `predictions` as outputs. An example would be a `tf.keras.Model` object. loss_fn: A callable which calcualtes labeled loss from `labels`, `predictions`, and `sample_weights`. An example would be a `tf.keras.losses.Loss` object. sample_weights: (optional) A 1-D `Tensor` of weights for the examples, with the same length as the first dimension of `features`. adv_config: (optional) An `nsl.configs.AdvRegConfig` object for adversarial regularization hyperparameters. Use `nsl.configs.make_adv_reg_config` to construct one. predictions: (optional) Precomputed value of `model(features)`. If set, the value will be reused when calculating adversarial regularization. In eager mode, the `gradient_tape` has to be set as well. labeled_loss: (optional) Precomputed value of `loss_fn(labels, model(features))`. If set, the value will be reused when calculating adversarial regularization. In eager mode, the `gradient_tape` has to be set as well. gradient_tape: (optional) A `tf.GradientTape` object watching `features`. model_kwargs: (optional) A dictionary of additional keyword arguments to be passed to the `model`. Returns: A `Tensor` for adversarial regularization loss, i.e. labeled loss on adversarially perturbed features. """ if adv_config is None: adv_config = nsl_configs.AdvRegConfig() if model_kwargs is not None: model = functools.partial(model, **model_kwargs) # Calculates labeled_loss if not provided. if labeled_loss is None: # Reuses the tape if provided; otherwise creates a new tape. gradient_tape = gradient_tape or tf.GradientTape() with gradient_tape: gradient_tape.watch(tf.nest.flatten(features)) # Calculates prediction if not provided. predictions = predictions if predictions is not None else model( features) labeled_loss = loss_fn(labels, predictions, sample_weights) adv_input, adv_sample_weights = nsl_lib.gen_adv_neighbor( features, labeled_loss, config=adv_config.adv_neighbor_config, gradient_tape=gradient_tape) adv_output = model(adv_input) if sample_weights is not None: adv_sample_weights = tf.math.multiply(sample_weights, adv_sample_weights) adv_loss = loss_fn(labels, adv_output, adv_sample_weights) return adv_loss
def adv_model_fn(features, labels, mode, params=None, config=None): """The adversarial-regularized model_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or `dict` of same. labels: This is the second item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or dict of same (for multi-head models). If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the `model_fn`'s signature does not accept `mode`, the `model_fn` must still be able to handle `labels=None`. mode: Optional. Specifies if this is training, evaluation, or prediction. See `tf.estimator.ModeKeys`. params: Optional `dict` of hyperparameters. Will receive what is passed to Estimator in the `params` parameter. This allows users to configure Estimators from hyper parameter tuning. config: Optional `estimator.RunConfig` object. Will receive what is passed to Estimator as its `config` parameter, or a default value. Allows setting up things in the model_fn based on configuration such as `num_ps_replicas`, or `model_dir`. Unused currently. Returns: A `tf.estimator.EstimatorSpec` with adversarial regularization. """ # Uses the same variable scope for calculating the original objective and # adversarial regularization. with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(), reuse=tf.compat.v1.AUTO_REUSE, auxiliary_name_scope=False): # If no 'params' is passed, then it is possible for base_model_fn not to # accept a 'params' argument. See documentation for tf.estimator.Estimator # for additional context. if params: original_spec = base_model_fn(features, labels, mode, params, config) else: original_spec = base_model_fn(features, labels, mode, config) # Adversarial regularization only happens in training. if mode != tf.estimator.ModeKeys.TRAIN: return original_spec adv_neighbor, _ = nsl_lib.gen_adv_neighbor(features, original_spec.loss, adv_config.adv_neighbor_config) # Runs the base model again to compute loss on adv_neighbor. adv_spec = base_model_fn(adv_neighbor, labels, mode, config) final_loss = original_spec.loss + adv_config.multiplier * adv_spec.loss if not optimizer_fn: # Default to the Adagrad optimizer, the same as canned DNNEstimator. optimizer = tf.train.AdagradOptimizer(learning_rate=0.05) else: optimizer = optimizer_fn() final_train_op = optimizer.minimize( loss=final_loss, global_step=tf.compat.v1.train.get_global_step()) return original_spec._replace(loss=final_loss, train_op=final_train_op)
def adv_model_fn(features, labels, mode, params=None, config=None): """The adversarial-regularized model_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or `dict` of same. labels: This is the second item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor` or dict of same (for multi-head models). If mode is `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the `model_fn`'s signature does not accept `mode`, the `model_fn` must still be able to handle `labels=None`. mode: Optional. Specifies if this is training, evaluation, or prediction. See `tf.estimator.ModeKeys`. params: Optional `dict` of hyperparameters. Will receive what is passed to Estimator in the `params` parameter. This allows users to configure Estimators from hyper parameter tuning. config: Optional `estimator.RunConfig` object. Will receive what is passed to Estimator as its `config` parameter, or a default value. Allows setting up things in the model_fn based on configuration such as `num_ps_replicas`, or `model_dir`. Unused currently. Returns: A `tf.estimator.EstimatorSpec` with adversarial regularization. """ # Parameters 'params' and 'config' are optional. If they are not passed, # then it is possible for base_model_fn not to accept these arguments. # See documentation for tf.estimator.Estimator for additional context. kwargs = {'mode': mode} if 'params' in base_model_fn_args: kwargs['params'] = params if 'config' in base_model_fn_args: kwargs['config'] = config base_fn = functools.partial(base_model_fn, **kwargs) # Uses the same variable scope for calculating the original objective and # adversarial regularization. with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(), reuse=tf.compat.v1.AUTO_REUSE, auxiliary_name_scope=False): original_spec = base_fn(features, labels) # Adversarial regularization only happens in training. if mode != tf.estimator.ModeKeys.TRAIN: return original_spec adv_neighbor, _ = nsl_lib.gen_adv_neighbor( features, original_spec.loss, adv_config.adv_neighbor_config, # The pgd_model_fn is a dummy identity function since loss is # directly available from base_fn. pgd_model_fn=lambda features: features, pgd_loss_fn=lambda labels, features: base_fn(features, labels). loss, pgd_labels=labels, use_while_loop=False) # Runs the base model again to compute loss on adv_neighbor. adv_spec = base_fn(adv_neighbor, labels) scaled_adversarial_loss = adv_config.multiplier * adv_spec.loss tf.compat.v1.summary.scalar('loss/scaled_adversarial_loss', scaled_adversarial_loss) supervised_loss = original_spec.loss tf.compat.v1.summary.scalar('loss/supervised_loss', supervised_loss) final_loss = supervised_loss + scaled_adversarial_loss if not optimizer_fn: # Default to the Adagrad optimizer, the same as canned DNNEstimator. optimizer = tf.compat.v1.train.AdagradOptimizer( learning_rate=0.05) else: optimizer = optimizer_fn() train_op = optimizer.minimize( loss=final_loss, global_step=tf.compat.v1.train.get_global_step()) update_ops = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.UPDATE_OPS) if update_ops: train_op = tf.group(train_op, *update_ops) return original_spec._replace(loss=final_loss, train_op=train_op)