Пример #1
0
    def test_apply_gradients(self, optimizer_cls, init_args,
                             expect_variable_names):
        optimizer = optimizer_cls(mesh=self.mesh, **init_args)

        self.assertEqual(self.evaluate(optimizer.iterations), 0)
        self.assertEqual(
            optimizer.iterations.layout,
            dtensor.Layout.replicated(self.mesh, rank=0),
        )

        variable_init_value = tf.ones([4, 4], dtype=tf.float32)
        variable_init_value = dtensor.copy_to_mesh(
            variable_init_value,
            layout=dtensor.Layout.replicated(self.mesh, rank=2),
        )
        model_variable = dtensor.DVariable(variable_init_value, trainable=True)

        grads = tf.ones_like(variable_init_value)
        optimizer.apply_gradients(zip([grads], [model_variable]))
        optimizer_variables = optimizer.variables

        self.assertEqual(self.evaluate(optimizer.iterations), 1)

        all_names = [var._shared_name for var in optimizer_variables]
        expect_variable_names.extend(["iteration", "learning_rate"])
        self.assertCountEqual(all_names, expect_variable_names)
Пример #2
0
 def _build_learning_rate(self, learning_rate):
     if isinstance(learning_rate,
                   learning_rate_schedule.LearningRateSchedule):
         # Create a variable to hold the current learning rate.
         # Note that the init value `learning_rate(self.iterations)` should have
         # the correct layout information from self.iterations.
         self._current_learning_rate = dtensor.DVariable(
             learning_rate(self.iterations),
             name='learning_rate',
             dtype=tf.float32)
         return learning_rate
     init_val = tf.constant(learning_rate, dtype=tf.float32)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     return dtensor.DVariable(init_val, name='learning_rate')
Пример #3
0
 def _create_iteration_variable(self):
     init_val = tf.constant(0, dtype=tf.int64)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     with tf.init_scope():
         # Lift the variable creation to init scope to avoid environment issue.
         self._iterations = dtensor.DVariable(init_val, name='iteration')
Пример #4
0
 def test_build_index_dict(self):
   optimizer = optimizers.Adam(mesh=self.mesh)
   variable_init_value = tf.ones(
       shape=(), dtype=tf.float32,
       layout=dtensor.Layout.replicated(self.mesh, rank=0))
   var_list = [dtensor.DVariable(variable_init_value, name=f'var{i}')
               for i in range(10)]
   optimizer._build_index_dict(var_list)
   self.assertEqual(optimizer._index_dict[optimizer._var_key(var_list[7])], 7)
Пример #5
0
 def test_add_variable_from_reference(self):
   optimizer = optimizers.Adam(mesh=self.mesh)
   variable_init_value = tf.ones(
       [4, 4], dtype=tf.float32,
       layout=dtensor.Layout.replicated(self.mesh, rank=2))
   model_variable = dtensor.DVariable(variable_init_value,
                                      trainable=True,
                                      name='tmp')
   state_variable = optimizer.add_variable_from_reference(
       model_variable, 'test')
   self.assertEqual(state_variable._shared_name, 'test/tmp')
   self.assertAllClose(self.evaluate(state_variable), tf.zeros([4, 4]))
   # Make sure the variable contains the correct layout info
   self.assertEqual(state_variable.layout, model_variable.layout)
Пример #6
0
def _create_dvariable(layout_map, object_path, variable):
  """Create a new variable instead of using the LazyInitVariable.

  We choose to do this since even the LazyInitVariable might behavior like
  a normal tf.Variable/DVariable, it is not future proof for any new changes
  to variable class. It will also fail the instance type check in python,
  which could affect user's code when they do any filtering based on type to
  find any variables.

  Args:
    layout_map: a LayoutMap which contains the variable_object_path (string) ->
      Layout.
    object_path: string, the object attribute path for the variable.
    variable: LazyInitVariable which will be replaced by the newly created
      tf.Variable.
  Returns:
    A new tf.Variable with correct layout information.
  """
  # TODO(b/228209108): Revisit this in future and see if we can just reuse the
  # LazyInitVariable rather than creating a new tf.Variable instance.
  layout = layout_map[object_path]
  if layout is None:
    variable_rank = variable.shape.rank
    layout = dtensor.Layout.replicated(
        mesh=layout_map.get_default_mesh(),
        rank=variable_rank)
  init_val = variable._initial_value  # pylint: disable=protected-access
  if callable(init_val):
    with lazy_variable.disable_init_variable_creator():
      init_val = utils.call_with_layout(init_val, layout)
  else:
    # The init value is probably already created as a tensor, we will just copy
    # it to mesh and give it a proper layout.
    init_val = dtensor.copy_to_mesh(init_val, layout)
  # Use the original variable name for new DVariable creation. TF was adding
  # ":0" suffix to it.
  variable_name = variable.name
  if variable_name.endswith(':0'):
    variable_name = variable_name[:-2]
  new_variable = dtensor.DVariable(init_val,
                                   trainable=variable.trainable,
                                   name=variable_name)
  return new_variable
Пример #7
0
    def add_variable_from_reference(self,
                                    model_variable,
                                    variable_name,
                                    initial_value=None):
        """Create an optimizer variable from model variable.

        Create an optimizer variable based on the information of model variable.
        For example, in SGD optimizer momemtum, for each model variable, a
        corresponding momemtum variable is created of the same shape and dtype.

        Args:
          model_variable: The corresponding model variable to the optimizer variable
            to be created.
          variable_name: The name prefix of the optimizer variable to be created.
            The create variables name will follow the pattern
            `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`.
          initial_value: The initial value of the optimizer variable, if None, the
            value will be default to 0.

        Returns:
          An optimizer variable.
        """
        if initial_value is None:
            # Use tf.zeros_like which will propagate the layout information from the
            # model weights if any.
            initial_value = tf.zeros_like(model_variable)
        elif isinstance(initial_value, tf.Tensor):
            initial_value = dtensor.copy_to_mesh(
                initial_value,
                dtensor.Layout.replicated(self._mesh,
                                          rank=initial_value.shape.rank),
            )
        return dtensor.DVariable(
            initial_value=initial_value,
            name=f"{variable_name}/{model_variable._shared_name}",
            dtype=model_variable.dtype,
            trainable=False,
        )
Пример #8
0
def make_variable(
    name,
    shape=None,
    dtype=tf.float32,
    initializer=None,
    trainable=None,
    caching_device=None,
    validate_shape=True,
    constraint=None,
    use_resource=None,
    collections=None,
    synchronization=tf.VariableSynchronization.AUTO,
    aggregation=tf.VariableAggregation.NONE,
    partitioner=None,
    layout=None,
):
    """Temporary util to create a variable (relies on `variable_scope.variable`).

    Some reuse-related technicalities prevent us from using
    `variable_scope.get_variable()` directly, so we use a subcomponent
    that has fewer constraints (`variable_scope.variable()`).

    In the longer term, it seems like a similar "default variable creator"
    method should exist in `Trackable` instead. When this happens, we can get
    rid of this temporary solution.

    TODO(fchollet): remove this method when no longer needed.

    Args:
      name: Variable name.
      shape: Variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: Initializer instance (callable).
      trainable: Whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      caching_device: Passed to `tf.Variable`.
      validate_shape: Passed to `tf.Variable`.
      constraint: Constraint instance (callable).
      use_resource: Whether to use a `ResourceVariable`.
      collections: List of graph collections keys. The new variable is added to
        these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: Not handled at this time.
      layout: the optional DTensor layout, used for creating DVariable.

    Returns:
      Variable instance.
    """
    initializing_from_value = False
    if initializer is not None and not callable(initializer):
        initializing_from_value = True

    if initializing_from_value:
        init_val = initializer
        variable_dtype = None
    else:
        # Instantiate initializer if provided initializer is a type object.
        if tf_inspect.isclass(initializer):
            initializer = initializer()
        if layout:
            init_val = functools.partial(initializer,
                                         shape,
                                         dtype=dtype,
                                         layout=layout)
        else:
            init_val = functools.partial(initializer, shape, dtype=dtype)
        variable_dtype = dtype.base_dtype

    variable_shape = tf.TensorShape(shape)

    if use_resource is None:
        use_resource = True

    if layout is None:
        # In theory, in `use_resource` is True and `collections` is empty
        # (that is to say, in TF2), we can use tf.Variable.
        # However, this breaks legacy (Estimator) checkpoints because
        # it changes variable names. Remove this when V1 is fully deprecated.
        return tf1.Variable(
            initial_value=init_val,
            name=name,
            trainable=trainable,
            caching_device=caching_device,
            dtype=variable_dtype,
            validate_shape=validate_shape,
            constraint=constraint,
            use_resource=use_resource,
            collections=collections,
            synchronization=synchronization,
            aggregation=aggregation,
            shape=variable_shape if variable_shape else None,
        )
    else:
        return dtensor.DVariable(
            initial_value=init_val,
            name=name,
            trainable=trainable,
            caching_device=caching_device,
            dtype=variable_dtype,
            validate_shape=validate_shape,
            constraint=constraint,
            collections=collections,
            synchronization=synchronization,
            aggregation=aggregation,
            shape=variable_shape if variable_shape else None,
        )