def _rewrite_output_as_tensor(body_grad_graph, grad_output_slices): """Rewrites grad_output_slices to be a Tensor output. Args: body_grad_graph: _WhileBodyGradFuncGraph. grad_output_slices: IndexedSlices output of body_grad_graph. """ with body_grad_graph.as_default(): new_output = ops.convert_to_tensor_v2(grad_output_slices) idx = body_grad_graph.structured_outputs.index(grad_output_slices) body_grad_graph.structured_outputs[idx] = new_output body_grad_graph.outputs = func_graph.flatten( body_grad_graph.structured_outputs)
def _prepare_local(self, var_device, var_dtype, apply_state): super(AdamMultilr, self)._prepare_local(var_device, var_dtype, apply_state) if self.pattern_lrs: for i, pair in enumerate(self.pattern_lrs): lr_t = array_ops.identity( self._decayed_multi_lr(pair["lr"], var_dtype)) apply_state[(var_device, var_dtype)][f"lr-{i}_t"] = lr_t local_step = math_ops.cast(self.iterations + 1, var_dtype) beta_1_t = array_ops.identity(self._get_hyper("beta_1", var_dtype)) beta_2_t = array_ops.identity(self._get_hyper("beta_2", var_dtype)) beta_1_power = math_ops.pow(beta_1_t, local_step) beta_2_power = math_ops.pow(beta_2_t, local_step) updated_lrs = { lr_name.replace("_t", ""): apply_state[(var_device, var_dtype)][lr_name] * (math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)) for lr_name in apply_state[(var_device, var_dtype)] if "lr" in lr_name } # lr = (apply_state[(var_device, var_dtype)]['lr_t'] * # (math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))) apply_state[(var_device, var_dtype)].update( dict( # lr=lr, epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype), beta_1_t=beta_1_t, beta_1_power=beta_1_power, one_minus_beta_1_t=1 - beta_1_t, beta_2_t=beta_2_t, beta_2_power=beta_2_power, one_minus_beta_2_t=1 - beta_2_t, **updated_lrs, ))
def test_metric_dict(self): metric_container = compile_utils.MetricsContainer(metrics={ 'out1': 'mse', 'out2': 'mae' }, weighted_metrics={ 'out1': 'mse', 'out2': 'mae' }) y_t = { 'out1': array_ops.ones((10, 1)), 'out2': array_ops.zeros((10, 1)) } y_p = { 'out1': array_ops.ones((10, 1)), 'out2': 2 * array_ops.ones((10, 1)) } sw = ops.convert_to_tensor_v2([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) metric_container.update_state(y_t, y_p, sample_weight=sw) mse_metric = metric_container.metrics[0] self.assertEqual(mse_metric.name, 'out1_mse') self.assertEqual(mse_metric.result().numpy(), 0.) mae_metric = metric_container.metrics[1] self.assertEqual(mae_metric.name, 'out2_mae') self.assertEqual(mae_metric.result().numpy(), 2.) weighted_mse_metric = metric_container.metrics[2] self.assertEqual(weighted_mse_metric.name, 'weighted_out1_mse') self.assertEqual(weighted_mse_metric.result().numpy(), 0.) weighted_mae_metric = metric_container.metrics[3] self.assertEqual(weighted_mae_metric.name, 'weighted_out2_mae') self.assertEqual(weighted_mae_metric.result().numpy(), 2.)
def test_add_update_in_model(self): class MyModel(keras.Model): def __init__(self): super(MyModel, self).__init__() self.b = self.add_weight('bias', (10, )) self.c = self.add_weight('bias2', (10, )) def call(self, inputs): # Unconditional self.add_update(self.b.assign(self.b * 2)) # Conditional self.add_update(self.c.assign(inputs[1, :]), inputs) return inputs + self.b + self.c x = ops.convert_to_tensor_v2(np.ones((10, 10), 'float32')) model = MyModel() model(x) if context.executing_eagerly(): self.assertEqual(0, len(model.updates)) else: self.assertEqual(2, len(model.updates)) self.assertEqual(1, len(model.get_updates_for(None))) self.assertEqual(1, len(model.get_updates_for(x)))
def test_list_of_metrics_list_of_outputs(self): metric_container = compile_utils.MetricsContainer( metrics=['mse', 'mae'], # Should broadcast to both outputs. weighted_metrics=['accuracy']) # Should broadcast to both outputs. y_t = [array_ops.ones((10, 1)), array_ops.zeros((10, 1))] y_p = [array_ops.ones((10, 1)), 2 * array_ops.ones((10, 1))] sw = ops.convert_to_tensor_v2([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) metric_container.update_state(y_t, y_p, sample_weight=sw) self.assertLen(metric_container.metrics, 6) mse_metric = metric_container.metrics[0] self.assertEqual(mse_metric.name, 'output_1_mse') self.assertEqual(mse_metric.result().numpy(), 0.) mse_metric = metric_container.metrics[1] self.assertEqual(mse_metric.name, 'output_1_mae') self.assertEqual(mse_metric.result().numpy(), 0.) acc_metric_1 = metric_container.metrics[2] self.assertEqual(acc_metric_1.name, 'output_1_accuracy') self.assertEqual(acc_metric_1.result().numpy(), 1.) self.assertEqual(acc_metric_1._fn, metrics_mod.binary_accuracy) mae_metric = metric_container.metrics[3] self.assertEqual(mae_metric.name, 'output_2_mse') self.assertEqual(mae_metric.result().numpy(), 4.) mae_metric = metric_container.metrics[4] self.assertEqual(mae_metric.name, 'output_2_mae') self.assertEqual(mae_metric.result().numpy(), 2.) acc_metric_2 = metric_container.metrics[5] self.assertEqual(acc_metric_2.name, 'output_2_accuracy') self.assertEqual(acc_metric_2.result().numpy(), 0.) self.assertEqual(acc_metric_2._fn, metrics_mod.binary_accuracy)
def __call__(self, step): with ops.name_scope_v2(self.name or "LinearCosineDecay") as name: initial_learning_rate = ops.convert_to_tensor_v2( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = math_ops.cast(self.decay_steps, dtype) num_periods = math_ops.cast(self.num_periods, dtype) alpha = math_ops.cast(self.alpha, dtype) beta = math_ops.cast(self.beta, dtype) global_step_recomp = math_ops.cast(step, dtype) global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps) linear_decayed = (decay_steps - global_step_recomp) / decay_steps completed_fraction = global_step_recomp / decay_steps fraction = 2.0 * num_periods * completed_fraction cosine_decayed = 0.5 * ( 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) linear_cosine_decayed = (alpha + linear_decayed) * cosine_decayed + beta return math_ops.multiply(initial_learning_rate, linear_cosine_decayed, name=name)
def test_validation_split_shuffled(self, use_numpy): if use_numpy: x = np.array([0, 1, 2, 3, 4]) y = np.array([0, 2, 4, 6, 8]) sw = np.array([0, 4, 8, 12, 16]) else: x = ops.convert_to_tensor_v2([0, 1, 2, 3, 4]) y = ops.convert_to_tensor_v2([0, 2, 4, 6, 8]) sw = ops.convert_to_tensor_v2([0, 4, 8, 12, 16]) (train_x, train_y, train_sw), (val_x, val_y, val_sw) = ( data_adapter.train_validation_split((x, y, sw), validation_split=0.2)) self.assertEqual(int(train_x.shape[0]), 4) self.assertEqual(int(train_y.shape[0]), 4) self.assertEqual(int(train_sw.shape[0]), 4) for i in range(4): # Check that all arrays were shuffled in identical order. self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy()) self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy()) self.assertEqual(int(val_x.shape[0]), 1) self.assertEqual(int(val_y.shape[0]), 1) self.assertEqual(int(val_sw.shape[0]), 1) for i in range(1): # Check that all arrays were shuffled in identical order. self.assertEqual(2 * train_x[i].numpy(), train_y[i].numpy()) self.assertEqual(2 * train_y[i].numpy(), train_sw[i].numpy()) # Check that arrays contain expected values. self.assertEqual( sorted(array_ops.concat([train_x, val_x], axis=0).numpy().tolist()), sorted(ops.convert_to_tensor_v2(x).numpy().tolist())) self.assertEqual( sorted(array_ops.concat([train_y, val_y], axis=0).numpy().tolist()), sorted(ops.convert_to_tensor_v2(y).numpy().tolist())) self.assertEqual( sorted(array_ops.concat([train_sw, val_sw], axis=0).numpy().tolist()), sorted(ops.convert_to_tensor_v2(sw).numpy().tolist()))
def build(self, input_shape): self.b = ops.convert_to_tensor_v2(2.0)
def transform(images, transforms, fill_mode='reflect', interpolation='bilinear', output_shape=None, name=None): """Applies the given transform(s) to the image(s). Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows, num_columns) (HW). The rank must be statically known (the shape is not `TensorShape(None)`. transforms: Projective transform matrix/matrices. A vector of length 8 or tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap'}`). interpolation: Interpolation mode. Supported values: "nearest", "bilinear". output_shape: Output dimesion after the transform, [height, width]. If None, output is the same size as input image. name: The name of the op. ## Fill mode. Behavior for each valid value is as follows: reflect (d c b a | a b c d | d c b a) The input is extended by reflecting about the edge of the last pixel. constant (k k k k | a b c d | k k k k) The input is extended by filling all values beyond the edge with the same constant value k = 0. wrap (a b c d | a b c d | a b c d) The input is extended by wrapping around to the opposite edge. Input shape: 4D tensor with shape: `(samples, height, width, channels)`, data_format='channels_last'. Output shape: 4D tensor with shape: `(samples, height, width, channels)`, data_format='channels_last'. Returns: Image(s) with the same type and shape as `images`, with the given transform(s) applied. Transformed coordinates outside of the input image will be filled with zeros. Raises: TypeError: If `image` is an invalid type. ValueError: If output shape is not 1-D int32 Tensor. """ with ops.name_scope(name, 'transform'): if output_shape is None: output_shape = array_ops.shape(images)[1:3] if not context.executing_eagerly(): output_shape_value = tensor_util.constant_value(output_shape) if output_shape_value is not None: output_shape = output_shape_value output_shape = ops.convert_to_tensor_v2( output_shape, dtypes.int32, name='output_shape') if not output_shape.get_shape().is_compatible_with([2]): raise ValueError('output_shape must be a 1-D Tensor of 2 elements: ' 'new_height, new_width, instead got ' '{}'.format(output_shape)) return image_ops.image_projective_transform_v2( images, output_shape=output_shape, transforms=transforms, fill_mode=fill_mode.upper(), interpolation=interpolation.upper())
def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const # TODO(b/129279393): Support zero batch input in non DistributionStrategy # code as well. if self._support_zero_size_input(): inputs_size = array_ops.size(inputs) else: inputs_size = None def _fused_batch_norm_training(): return nn.fused_batch_norm( inputs, gamma, beta, epsilon=self.epsilon, data_format=self._data_format) def _fused_batch_norm_inference(): return nn.fused_batch_norm( inputs, gamma, beta, mean=self.moving_mean, variance=self.moving_variance, epsilon=self.epsilon, is_training=False, data_format=self._data_format) output, mean, variance = tf_utils.smart_cond( training, _fused_batch_norm_training, _fused_batch_norm_inference) if not self._bessels_correction_test_only: # Remove Bessel's correction to be consistent with non-fused batch norm. # Note that the variance computed by fused batch norm is # with Bessel's correction. sample_size = math_ops.cast( array_ops.size(inputs) / array_ops.size(variance), variance.dtype) factor = (sample_size - math_ops.cast(1.0, variance.dtype)) / sample_size variance *= factor training_value = tf_utils.constant_value(training) if training_value is None: momentum = tf_utils.smart_cond(training, lambda: self.momentum, lambda: 1.0) else: momentum = ops.convert_to_tensor_v2(self.momentum) if training_value or training_value is None: def mean_update(): return self._assign_moving_average(self.moving_mean, mean, momentum, inputs_size) def variance_update(): """Update self.moving_variance with the most recent data point.""" if self.renorm: # We apply epsilon as part of the moving_stddev to mirror the training # code path. moving_stddev = self._assign_moving_average( self.moving_stddev, math_ops.sqrt(variance + self.epsilon), momentum, inputs_size) return self._assign_new_value( self.moving_variance, # Apply relu in case floating point rounding causes it to go # negative. K.relu(moving_stddev * moving_stddev - self.epsilon)) else: return self._assign_moving_average(self.moving_variance, variance, momentum, inputs_size) self.add_update(mean_update) self.add_update(variance_update) return output
def _fn(*fargs, **fkwargs): d = fn(*fargs, **fkwargs) x = ops.convert_to_tensor_v2(d) d.shape = x.shape d.get_shape = x.get_shape return d, x
def _get_noise_shape(self, inputs): return ops.convert_to_tensor_v2([1, array_ops.shape(inputs)[-1]])
def _fused_batch_norm(self, inputs, training): """Returns the output of fused batch norm.""" beta = self.beta if self.center else self._beta_const gamma = self.gamma if self.scale else self._gamma_const # TODO(b/129279393): Support zero batch input in non DistributionStrategy # code as well. if self._support_zero_size_input(): inputs_size = array_ops.size(inputs) else: inputs_size = None # TODO(rmlarsen): Support using fused avg updates for non-eager execution # after fixing graph pattern matching and enabling fused_batch_norm to # take exponential_avg_factor as a tensor input. use_fused_avg_updates = ( compat.forward_compatible(2020, 3, 6) and ops.executing_eagerly_outside_functions()) if use_fused_avg_updates: exponential_avg_factor = 1.0 - self.momentum else: exponential_avg_factor = None def _maybe_add_or_remove_bessels_correction(variance, remove=True): r"""Add or remove Bessel's correction.""" # Removes Bessel's correction if remove == True, adds it otherwise. # This is to be consistent with non-fused batch norm. Note that the # variance computed by fused batch norm is with Bessel's correction. # This is only used in legacy V1 batch norm tests. if self._bessels_correction_test_only: return variance sample_size = math_ops.cast( array_ops.size(inputs) / array_ops.size(variance), variance.dtype) if remove: factor = (sample_size - math_ops.cast(1.0, variance.dtype)) / sample_size else: factor = sample_size / ( sample_size - math_ops.cast(1.0, variance.dtype)) return variance * factor def _fused_batch_norm_training(): return nn.fused_batch_norm( inputs, gamma, beta, mean=self.moving_mean, variance=_maybe_add_or_remove_bessels_correction( self.moving_variance, remove=False), epsilon=self.epsilon, is_training=True, data_format=self._data_format, exponential_avg_factor=exponential_avg_factor) def _fused_batch_norm_training_empty(): return inputs, self.moving_mean, self.moving_variance def _fused_batch_norm_inference(): return nn.fused_batch_norm( inputs, gamma, beta, mean=self.moving_mean, variance=self.moving_variance, epsilon=self.epsilon, is_training=False, data_format=self._data_format) train_op = _fused_batch_norm_training if use_fused_avg_updates and inputs_size is not None: train_op = lambda: tf_utils.smart_cond(inputs_size > 0, _fused_batch_norm_training, _fused_batch_norm_training_empty) output, mean, variance = tf_utils.smart_cond(training, train_op, _fused_batch_norm_inference) variance = _maybe_add_or_remove_bessels_correction(variance, remove=True) training_value = tf_utils.constant_value(training) if training_value or training_value is None: if not use_fused_avg_updates: if training_value is None: momentum = tf_utils.smart_cond(training, lambda: self.momentum, lambda: 1.0) else: momentum = ops.convert_to_tensor_v2(self.momentum) def mean_update(): """Update self.moving_mean with the most recent data point.""" if use_fused_avg_updates: return self._assign_new_value(self.moving_mean, mean) else: return self._assign_moving_average(self.moving_mean, mean, momentum, inputs_size) def variance_update(): """Update self.moving_variance with the most recent data point.""" if use_fused_avg_updates: return self._assign_new_value(self.moving_variance, variance) else: return self._assign_moving_average(self.moving_variance, variance, momentum, inputs_size) self.add_update(mean_update) self.add_update(variance_update) return output
def gamma(self): """Returns strongly convex parameter, gamma.""" return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
def __init__(self, input_): self._input = input_ self.value = ops.convert_to_tensor_v2([[42.]])
def test_enables_nontensor_plumbing(self): if context.executing_eagerly(): self.skipTest('`compile` functionality changed.') # Setup. class Foo(object): def __init__(self, input_): self._input = input_ self.value = ops.convert_to_tensor_v2([[42.]]) @property def dtype(self): return self.value.dtype ops.register_tensor_conversion_function( Foo, lambda x, *args, **kwargs: x.value) tf_utils.register_symbolic_tensor_type(Foo) class PlumbingLayer(keras.layers.Lambda): def __init__(self, fn, **kwargs): def _fn(*fargs, **fkwargs): d = fn(*fargs, **fkwargs) x = ops.convert_to_tensor_v2(d) d.shape = x.shape d.get_shape = x.get_shape return d, x super(PlumbingLayer, self).__init__(_fn, **kwargs) self._enter_dunder_call = False def __call__(self, inputs, *args, **kwargs): self._enter_dunder_call = True d, _ = super(PlumbingLayer, self).__call__(inputs, *args, **kwargs) self._enter_dunder_call = False return d def call(self, inputs, *args, **kwargs): d, v = super(PlumbingLayer, self).call(inputs, *args, **kwargs) if self._enter_dunder_call: return d, v return d # User-land. model = keras.Sequential([ keras.layers.InputLayer((1, )), PlumbingLayer(Foo), # Makes a `Foo` object. ]) # Let's ensure Keras graph history is preserved by composing the models. model = keras.Model(model.inputs, model(model.outputs)) # Now we instantiate the model and verify we have a `Foo` object, not a # `Tensor`. y = model(ops.convert_to_tensor_v2([[7.]])) self.assertIsInstance(y, Foo) # Confirm that (custom) loss sees `Foo` instance, not Tensor. obtained_prediction_box = [None] def custom_loss(y_obs, y_pred): del y_obs obtained_prediction_box[0] = y_pred return y_pred # Apparently `compile` calls the loss function enough to trigger the # side-effect. model.compile('SGD', loss=custom_loss) self.assertIsInstance(obtained_prediction_box[0], Foo)
def value(self): return ops.convert_to_tensor_v2(42.)
def build(self, input_shape): a = ops.convert_to_tensor_v2(1.0) b = 2.0 * a self.variable = variables.Variable(b) self.constant = ops.convert_to_tensor_v2(self.variable)
def _model_loss(model, inputs, targets, output_loss_metrics=None, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: Either a dictionary of inputs to the model or a list of input arrays. targets: List of target arrays. output_loss_metrics: List of metrics that are used to aggregated output loss values. sample_weights: Optional list of sample weight arrays. training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss, loss value calculated using the specified loss function and masks for each output. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn. # Used to keep track of the total loss value (stateless). # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. total_loss = 0 kwargs = {} if model._expects_training_arg: kwargs['training'] = training if len(inputs) == 1 and not isinstance(inputs, dict): inputs = inputs[0] # Allow mixed `NumPy` and `EagerTensor` input here. if any( isinstance(input_t, (np.ndarray, float, int)) for input_t in nest.flatten(inputs)): inputs = nest.map_structure(ops.convert_to_tensor_v2, inputs) outs = model(inputs, **kwargs) outs = nest.flatten(outs) if targets: targets = training_utils.cast_if_floating_dtype_and_mismatch( targets, outs) # TODO(sallymatson/psv): check if we should do same mismatch fix for weights if sample_weights: sample_weights = [ training_utils.cast_if_floating_dtype( ops.convert_to_tensor_v2(val)) if val is not None else None for val in sample_weights ] masks = [getattr(t, '_keras_mask', None) for t in outs] targets = nest.flatten(targets) # Used to keep track of individual output losses. output_losses = [] with backend.name_scope('loss'): loss_fns = [ loss_fn for loss_fn in model.loss_functions if loss_fn is not None ] custom_losses = model.losses # Regularization losses if not loss_fns and not custom_losses: if training: raise ValueError('The model cannot be trained ' 'because it has no loss to optimize.') else: raise ValueError('The model cannot be evaluated ' 'because it has no loss to compute.') for i, loss_fn in enumerate(loss_fns): weights = sample_weights[i] if sample_weights else None mask = masks[i] with backend.name_scope(model.output_names[i] + '_loss'): if mask is not None: mask = math_ops.cast(mask, outs[i].dtype) # Update weights with mask. if weights is None: weights = mask else: # Update dimensions of weights to match with mask if possible. weights = math_ops.cast(weights, outs[i].dtype) mask, _, weights = ( losses_utils.squeeze_or_expand_dimensions( mask, sample_weight=weights)) weights *= mask if hasattr(loss_fn, 'reduction'): per_sample_losses = loss_fn.call(targets[i], outs[i]) weighted_losses = losses_utils.compute_weighted_loss( per_sample_losses, sample_weight=weights, reduction=losses_utils.ReductionV2.NONE) loss_reduction = loss_fn.reduction # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all # compile use cases. if loss_reduction == losses_utils.ReductionV2.AUTO: loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE # Compute the stateless loss value. output_loss = losses_utils.reduce_weighted_loss( weighted_losses, reduction=loss_reduction) else: # Compute the stateless loss value for a custom loss class. # Here we assume that the class takes care of loss reduction # because if this class returns a vector value we cannot # differentiate between use case where a custom optimizer # expects a vector loss value vs unreduced per-sample loss value. output_loss = loss_fn(targets[i], outs[i], sample_weight=weights) loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE # If the number of outputs is 1 then we don't append the loss metric # associated with each model output. When there are multiple outputs # associated with a model, each output's loss is calculated and returned # as part of the loss_metrics. if len(model.outputs) > 1: # Keep track of the stateful output loss result. output_losses.append(output_loss_metrics[i](output_loss)) # Scale output loss for distribution. For custom losses we assume # reduction was mean. if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE: output_loss = losses_utils.scale_loss_for_distribution( output_loss) total_loss += model._loss_weights_list[i] * output_loss # Add regularization losses if custom_losses: total_loss += losses_utils.scale_loss_for_distribution( math_ops.add_n(custom_losses)) return outs, total_loss, output_losses, masks
def adapt(self, data, reset_state=True): """Fits the state of the preprocessing layer to the data being passed. Arguments: data: The data to train on. It can be passed either as a tf.data Dataset, or as a numpy array. reset_state: Optional argument specifying whether to clear the state of the layer at the start of the call to `adapt`, or whether to start from the existing state. Subclasses may choose to throw if reset_state is set to 'False'. """ if reset_state: accumulator = None else: accumulator = self._combiner.restore(self._restore_updates()) if isinstance(data, (list, tuple)): data = ops.convert_to_tensor_v2(data) if not isinstance(data, (dataset_ops.DatasetV2, np.ndarray, ops.Tensor, ragged_tensor.RaggedTensor)): raise ValueError( '`adapt()` requires a batched Dataset, a Tensor, ' 'or a Numpy array as input, ' 'got {}'.format(type(data))) if isinstance(data, dataset_ops.DatasetV2): # Validate the datasets to try and ensure we haven't been passed one with # infinite size. That would cause an infinite loop here. if tf_utils.dataset_is_infinite(data): raise ValueError( 'The dataset passed to `adapt()` has an infinite number of ' 'elements. Please use `dataset.take(...)` to make the number ' 'of elements finite.') next_data = self._get_dataset_iterator(data) # TODO(fchollet): consider checking if the dataset is already batched # and otherwise batching it. elif isinstance(data, (ops.Tensor, ragged_tensor.RaggedTensor)): next_data = self._get_dataset_iterator( dataset_ops.Dataset.from_tensor_slices(data).batch(512)) else: generator, _ = training_generator.convert_to_generator_like( data, batch_size=512) # If the data is not a dataset, we can iterate over it using next(foo); # here, we wrap that into a callable. next_data = lambda: next(generator) # TODO(momernick): Some sort of status bar? # TODO(momernick): Implement parallel processing here? try: data_element = next_data() # First, see if the layer is built or not. If it is not, then we must # build it. if not self.built: try: # If this is a Numpy array or tensor, we can get shape from .shape. # If not, an attribute error will be thrown. data_shape = data_element.shape data_shape_nones = tuple([None]*len(data_element.shape)) except AttributeError: # The input has an unknown number of dimensions. data_shape = None data_shape_nones = None # TODO (b/159261555): move this to base layer build. batch_input_shape = getattr(self, '_batch_input_shape', None) if batch_input_shape is None: # Set the number of dimensions. self._batch_input_shape = data_shape_nones self.build(data_shape) # Once we have built the Layer, we can process the input data. We do so # until we've gotten an exception indicating that we have no more data. while True: accumulator = self._combiner.compute(data_element, accumulator) data_element = next_data() # Note that this belongs to the outer indentation of 'try' - we need to # catch exceptions resulting from the first 'next_data()' invocation as # well. except (StopIteration, errors.OutOfRangeError): pass updates = self._combiner.extract(accumulator) self._set_state_variables(updates)
def test_dense_dtype(self): inputs = ops.convert_to_tensor_v2( np.random.randint(low=0, high=7, size=(2, 2))) layer = keras.layers.Dense(5, dtype='float32') outputs = layer(inputs) self.assertEqual(outputs.dtype, 'float32')
def call(self, inputs, training=None): training = self._get_training_value(training) if self.virtual_batch_size is not None: # Virtual batches (aka ghost batches) can be simulated by reshaping the # Tensor and reusing the existing batch norm implementation original_shape = [-1] + inputs.shape.as_list()[1:] expanded_shape = [self.virtual_batch_size, -1] + original_shape[1:] # Will cause errors if virtual_batch_size does not divide the batch size inputs = array_ops.reshape(inputs, expanded_shape) def undo_virtual_batching(outputs): outputs = array_ops.reshape(outputs, original_shape) return outputs if self.fused: outputs = self._fused_batch_norm(inputs, training=training) if self.virtual_batch_size is not None: # Currently never reaches here since fused_batch_norm does not support # virtual batching outputs = undo_virtual_batching(outputs) return outputs # Compute the axes along which to reduce the mean / variance input_shape = inputs.shape ndims = len(input_shape) reduction_axes = [i for i in range(ndims) if i not in self.axis] if self.virtual_batch_size is not None: del reduction_axes[1] # Do not reduce along virtual batch dim # Broadcasting only necessary for single-axis batch norm where the axis is # not the last dimension broadcast_shape = [1] * ndims broadcast_shape[self.axis[0]] = input_shape.dims[self.axis[0]].value def _broadcast(v): if (v is not None and len(v.shape) != ndims and reduction_axes != list(range(ndims - 1))): return array_ops.reshape(v, broadcast_shape) return v scale, offset = _broadcast(self.gamma), _broadcast(self.beta) def _compose_transforms(scale, offset, then_scale, then_offset): if then_scale is not None: scale *= then_scale offset *= then_scale if then_offset is not None: offset += then_offset return (scale, offset) # Determine a boolean value for `training`: could be True, False, or None. training_value = tf_utils.constant_value(training) if training_value == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison mean, variance = self.moving_mean, self.moving_variance else: if self.adjustment: adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs)) # Adjust only during training. adj_scale = tf_utils.smart_cond(training, lambda: adj_scale, lambda: array_ops.ones_like(adj_scale)) adj_bias = tf_utils.smart_cond(training, lambda: adj_bias, lambda: array_ops.zeros_like(adj_bias)) scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset) # Some of the computations here are not necessary when training==False # but not a constant. However, this makes the code simpler. keep_dims = self.virtual_batch_size is not None or len(self.axis) > 1 mean, variance = self._moments( math_ops.cast(inputs, self._param_dtype), reduction_axes, keep_dims=keep_dims) moving_mean = self.moving_mean moving_variance = self.moving_variance mean = tf_utils.smart_cond(training, lambda: mean, lambda: ops.convert_to_tensor_v2(moving_mean)) variance = tf_utils.smart_cond( training, lambda: variance, lambda: ops.convert_to_tensor_v2(moving_variance)) if self.virtual_batch_size is not None: # This isn't strictly correct since in ghost batch norm, you are # supposed to sequentially update the moving_mean and moving_variance # with each sub-batch. However, since the moving statistics are only # used during evaluation, it is more efficient to just update in one # step and should not make a significant difference in the result. new_mean = math_ops.reduce_mean(mean, axis=1, keepdims=True) new_variance = math_ops.reduce_mean(variance, axis=1, keepdims=True) else: new_mean, new_variance = mean, variance if self._support_zero_size_input(): inputs_size = array_ops.size(inputs) else: inputs_size = None if self.renorm: r, d, new_mean, new_variance = self._renorm_correction_and_moments( new_mean, new_variance, training, inputs_size) # When training, the normalized values (say, x) will be transformed as # x * gamma + beta without renorm, and (x * r + d) * gamma + beta # = x * (r * gamma) + (d * gamma + beta) with renorm. r = _broadcast(array_ops.stop_gradient(r, name='renorm_r')) d = _broadcast(array_ops.stop_gradient(d, name='renorm_d')) scale, offset = _compose_transforms(r, d, scale, offset) def _do_update(var, value): """Compute the updates for mean and variance.""" return self._assign_moving_average(var, value, self.momentum, inputs_size) def mean_update(): true_branch = lambda: _do_update(self.moving_mean, new_mean) false_branch = lambda: self.moving_mean return tf_utils.smart_cond(training, true_branch, false_branch) def variance_update(): """Update the moving variance.""" def true_branch_renorm(): # We apply epsilon as part of the moving_stddev to mirror the training # code path. moving_stddev = _do_update(self.moving_stddev, math_ops.sqrt(new_variance + self.epsilon)) return self._assign_new_value( self.moving_variance, # Apply relu in case floating point rounding causes it to go # negative. K.relu(moving_stddev * moving_stddev - self.epsilon)) if self.renorm: true_branch = true_branch_renorm else: true_branch = lambda: _do_update(self.moving_variance, new_variance) false_branch = lambda: self.moving_variance return tf_utils.smart_cond(training, true_branch, false_branch) self.add_update(mean_update) self.add_update(variance_update) mean = math_ops.cast(mean, inputs.dtype) variance = math_ops.cast(variance, inputs.dtype) if offset is not None: offset = math_ops.cast(offset, inputs.dtype) if scale is not None: scale = math_ops.cast(scale, inputs.dtype) # TODO(reedwm): Maybe do math in float32 if given float16 inputs, if doing # math in float16 hurts validation accuracy of popular models like resnet. outputs = nn.batch_normalization(inputs, _broadcast(mean), _broadcast(variance), offset, scale, self.epsilon) # If some components of the shape got lost due to adjustments, fix that. outputs.set_shape(input_shape) if self.virtual_batch_size is not None: outputs = undo_virtual_batching(outputs) return outputs
def _prepare_local(self, var_device, var_dtype, apply_state): super(Adagrad, self)._prepare_local(var_device, var_dtype, apply_state) apply_state[(var_device, var_dtype)].update( dict(epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype), neg_lr_t=-apply_state[(var_device, var_dtype)]['lr_t'], zero=array_ops.zeros((), dtype=dtypes.int64)))
def call(self, y_true, y_pred): """Invokes the `Loss` instance. Args: y_true: Ground truth values. y_pred: The predicted values. Returns: Loss values in the form of a Tensor """ gamma = self.gamma from_logits = self.from_logits axis = -1 y_true = tf.cast(y_true, y_pred.dtype) y_true = ops.convert_to_tensor_v2(y_true) y_pred = ops.convert_to_tensor_v2(y_pred) probs = y_pred # Reformat y_pred shapes if (not from_logits and not isinstance(y_pred, (ops.EagerTensor, variables_module.Variable)) and y_pred.op.type == 'Softmax') and not hasattr( y_pred, '_keras_history'): assert len(y_pred.op.inputs) == 1 y_pred = y_pred.op.inputs[0] from_logits = True # Clip y_pred to a minimum and maximum value if not from_logits: epsilon_ = constant_op.constant(K.epsilon(), y_pred.dtype.base_dtype) y_pred = clip_ops.clip_by_value(y_pred, epsilon_, 1 - epsilon_) y_pred = math_ops.log(y_pred) # Get dimensions of predictions tensor if isinstance(y_pred.shape, (tuple, list)): output_rank = len(y_pred.shape) else: output_rank = y_pred.shape.ndims if output_rank is not None: axis %= output_rank if axis != output_rank - 1: permutation = list( itertools.chain(range(axis), range(axis + 1, output_rank), [axis])) y_pred = array_ops.transpose(y_pred, perm=permutation) elif axis != -1: raise ValueError( 'Cannot compute sparse categorical crossentropy with `axis={}` on an ' 'output tensor with unknown rank'.format(axis)) # Reformat y_true shape and data type. y_true = cast(y_true, 'int64') output_shape = array_ops.shape_v2(y_pred) target_rank = y_true.shape.ndims update_shape = (target_rank is not None and output_rank is not None and target_rank != output_rank - 1) if update_shape: y_true = flatten(y_true) y_pred = array_ops.reshape(y_pred, [-1, output_shape[-1]]) # Calculate cross-entropy loss if py_any(_is_symbolic_tensor(v) for v in [y_true, y_pred]): with get_graph().as_default(): loss = nn.sparse_softmax_cross_entropy_with_logits_v2( labels=y_true, logits=y_pred) else: loss = nn.sparse_softmax_cross_entropy_with_logits_v2( labels=y_true, logits=y_pred) if update_shape and output_rank >= 3: loss = array_ops.reshape(loss, output_shape[:-1]) # Calculate focal modulation to be applied gamma = tf.convert_to_tensor(gamma, dtype=tf.dtypes.float32) scalar_gamma = gamma.shape.rank == 0 y_true_rank = y_true.shape.rank if not scalar_gamma: gamma = tf.gather(gamma, y_true, axis=0, batch_dims=y_true_rank) focal_modulation = K.pow(1 - tf.math.reduce_mean(probs, axis=1), gamma) focal_modulation = tf.gather(focal_modulation, y_true, axis=0, batch_dims=y_true_rank) loss = focal_modulation * loss return loss
def _convert_non_tensor(x): # Don't call `ops.convert_to_tensor_v2` on all `inputs` because # `SparseTensors` can't be converted to `Tensor`. if isinstance(x, (np.ndarray, float, int)): return ops.convert_to_tensor_v2(x) return x
def _prepare_local(self, var_device, var_dtype, apply_state): super(Adadelta, self)._prepare_local(var_device, var_dtype, apply_state) apply_state[(var_device, var_dtype)].update( dict(epsilon=ops.convert_to_tensor_v2(self.epsilon, var_dtype), rho=array_ops.identity(self._get_hyper('rho', var_dtype))))
def call(self, inputs): inputs = ops.convert_to_tensor_v2(inputs, dtype=self.dtype) inputs = gen_math_ops.cast(inputs, dtypes.float32) outputs = gen_math_ops.mat_mul(inputs, self.kernel) outputs = nn.bias_add(outputs, self.bias) return gen_math_ops.cos(outputs)
def generator(): for _ in range(2): for step in range(3): yield (ops.convert_to_tensor_v2([step]), )
def _split(t, indices): if t is None: return t t = ops.convert_to_tensor_v2(t) return array_ops.gather_v2(t, indices)
def testSaveWithSignatures(self): model = keras.models.Sequential() model.add( keras.layers.Dense(5, input_shape=(3, ), kernel_regularizer=regularizers.get('l2'))) model.add(keras.layers.Dropout(0.5)) model.add( keras.layers.Dense(4, kernel_regularizer=regularizers.get('l2'))) input_arr = np.random.random((2, 3)) target_arr = np.random.random((2, 4)) model.compile(loss='mse', optimizer='rmsprop') model.train_on_batch(input_arr, target_arr) @def_function.function( input_signature=[tensor_spec.TensorSpec((None, 3))]) def predict(inputs): return {'predictions': model(inputs)} feature_configs = { 'inputs': parsing_ops.FixedLenFeature(shape=[2, 3], dtype=dtypes.float32) } @def_function.function( input_signature=[tensor_spec.TensorSpec([None], dtypes.string)]) def parse_and_predict(examples): features = parsing_ops.parse_single_example( examples[0], feature_configs) return { 'predictions': model(features['inputs']), 'layer_1_outputs': model.layers[0](features['inputs']) } saved_model_dir = self._save_model_dir() model.save(saved_model_dir, save_format='tf', signatures={ 'predict': predict, 'parse_and_predict': parse_and_predict }) model.save('/tmp/saved', save_format='tf', signatures={ 'predict': predict, 'parse_and_predict': parse_and_predict }) loaded = keras_load.load(saved_model_dir) self.assertAllClose( model.predict(input_arr), loaded.signatures['predict'](ops.convert_to_tensor_v2( input_arr.astype('float32')))['predictions']) feature = { 'inputs': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=input_arr.astype('float32').flatten())) } example = example_pb2.Example(features=feature_pb2.Features( feature=feature)) outputs = loaded.signatures['parse_and_predict']( ops.convert_to_tensor_v2([example.SerializeToString()])) self.assertAllClose(model.predict(input_arr), outputs['predictions']) self.assertAllClose(model.layers[0](input_arr), outputs['layer_1_outputs'])
def autocast_to_tensor(v): if isinstance( v, (int, float, bool, str, list, tuple, np.ndarray, np.generic)): init_val = ops.convert_to_tensor_v2(v) return array_ops.placeholder(init_val.dtype, init_val.shape) return v