示例#1
0
    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with ops.Graph().as_default():
            for dtype in _DATA_TYPES:
                var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)

                repeated_index_update_var = variables.Variable(var_np,
                                                               dtype=dtype)
                aggregated_update_var = variables.Variable(var_np, dtype=dtype)
                grad_repeated_index = indexed_slices.IndexedSlices(
                    constant_op.constant([0.1, 0.1], shape=[2, 1],
                                         dtype=dtype),
                    constant_op.constant([1, 1]), constant_op.constant([2, 1]))
                grad_aggregated = indexed_slices.IndexedSlices(
                    constant_op.constant([0.2], shape=[1, 1], dtype=dtype),
                    constant_op.constant([1]), constant_op.constant([2, 1]))
                repeated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(variables.global_variables_initializer())
                self.assertAllClose(self.evaluate(aggregated_update_var),
                                    self.evaluate(repeated_index_update_var))
                for _ in range(3):
                    self.evaluate(repeated_update)
                    self.evaluate(aggregated_update)
                    self.assertAllClose(
                        self.evaluate(aggregated_update_var),
                        self.evaluate(repeated_index_update_var))
    def applyOptimizer(self, opt, steps=5, is_sparse=False):
        if is_sparse:
            var0 = variables.Variable([[1.0], [2.0]])
            var1 = variables.Variable([[3.0], [4.0]])
            grads0 = indexed_slices.IndexedSlices(
                constant_op.constant([0.1], shape=[1, 1]),
                constant_op.constant([0]), constant_op.constant([2, 1]))
            grads1 = indexed_slices.IndexedSlices(
                constant_op.constant([0.02], shape=[1, 1]),
                constant_op.constant([1]), constant_op.constant([2, 1]))
        else:
            var0 = variables.Variable([1.0, 2.0])
            var1 = variables.Variable([3.0, 4.0])
            grads0 = constant_op.constant([0.1, 0.2])
            grads1 = constant_op.constant([0.01, 0.02])

        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(variables.global_variables_initializer())

        v0_val, v1_val = self.evaluate([var0, var1])
        if is_sparse:
            self.assertAllClose([[1.0], [2.0]], v0_val)
            self.assertAllClose([[3.0], [4.0]], v1_val)
        else:
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

        # Run ProximalAdagrad for a few steps
        for _ in range(steps):
            update.run()

        v0_val, v1_val = self.evaluate([var0, var1])
        return v0_val, v1_val
示例#3
0
  def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
    var_device, var_dtype = var.device, var.dtype.base_dtype
    coefficients = ((apply_state or {}).get((var_device, var_dtype)) or
                    self._fallback_apply_state(var_device, var_dtype))

    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, 'm')
    m_scaled_g_values = grad * coefficients['one_minus_beta_1_t']
    m.assign(m * coefficients['beta_1_t'])
    m.scatter_add(indexed_slices.IndexedSlices(m_scaled_g_values, indices))

    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
    v = self.get_slot(var, 'v')
    v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t']
    v.assign(v * coefficients['beta_2_t'])
    v.scatter_add(indexed_slices.IndexedSlices(v_scaled_g_values, indices))

    if not self.amsgrad:
      var.assign_sub(coefficients['lr'] * m /
                     (math_ops.sqrt(v) + coefficients['epsilon']))
    else:
      v_hat = self.get_slot(var, 'vhat')
      v_hat.assign(math_ops.maximum(v_hat, v))
      var.assign_sub(coefficients['lr'] * m /
                     (math_ops.sqrt(v_hat) + coefficients['epsilon']))
  def testSparseBasicWithLearningRateDecay(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    with ops.Graph().as_default():
      for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
        var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
        grads0 = indexed_slices.IndexedSlices(
            constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
            constant_op.constant([0]), constant_op.constant([2, 1]))
        grads1 = indexed_slices.IndexedSlices(
            constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
            constant_op.constant([1]), constant_op.constant([2, 1]))
        sgd_op = gradient_descent.SGD(
            3.0, decay=0.5).apply_gradients(
                zip([grads0, grads1], [var0, var1]))
        self.evaluate(variables.global_variables_initializer())
        # Run 2 steps of sgd
        self.evaluate(sgd_op)
        # Validate updated params
        self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                           self.evaluate(var0))
        self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                           self.evaluate(var1))

        self.evaluate(sgd_op)
        # Validate updated params
        self.assertAllCloseAccordingToType(
            [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1))
示例#5
0
 def testSparseBasic(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         # train.GradientDescentOptimizer is V1 only API.
         with ops.Graph().as_default(), self.cached_session():
             var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
             var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
             grads0 = indexed_slices.IndexedSlices(
                 constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
                 constant_op.constant([0]), constant_op.constant([2, 1]))
             grads1 = indexed_slices.IndexedSlices(
                 constant_op.constant([0.01], shape=[1, 1], dtype=dtype),
                 constant_op.constant([1]), constant_op.constant([2, 1]))
             sgd_op = gradient_descent.GradientDescentOptimizer(
                 3.0).apply_gradients(zip([grads0, grads1], [var0, var1]))
             self.evaluate(variables.global_variables_initializer())
             # Fetch params to validate initial values
             self.assertAllCloseAccordingToType([[1.0], [2.0]],
                                                self.evaluate(var0))
             self.assertAllCloseAccordingToType([[3.0], [4.0]],
                                                self.evaluate(var1))
             # Run 1 step of sgd
             sgd_op.run()
             # Validate updated params
             self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                                self.evaluate(var0))
             self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                                self.evaluate(var1))
 def testSparseRepeatedIndices(self):
     with ops.Graph().as_default():
         for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
             with self.cached_session():
                 repeated_index_update_var = variables.Variable(
                     [[1.0], [2.0]], dtype=dtype)
                 aggregated_update_var = variables.Variable([[1.0], [2.0]],
                                                            dtype=dtype)
                 grad_repeated_index = indexed_slices.IndexedSlices(
                     constant_op.constant([0.1, 0.1],
                                          shape=[2, 1],
                                          dtype=dtype),
                     constant_op.constant([1, 1]),
                     constant_op.constant([2, 1]))
                 grad_aggregated = indexed_slices.IndexedSlices(
                     constant_op.constant([0.2], shape=[1, 1], dtype=dtype),
                     constant_op.constant([1]), constant_op.constant([2,
                                                                      1]))
                 repeated_update = adagrad.AdagradOptimizer(
                     3.0).apply_gradients([(grad_repeated_index,
                                            repeated_index_update_var)])
                 aggregated_update = adagrad.AdagradOptimizer(
                     3.0).apply_gradients([(grad_aggregated,
                                            aggregated_update_var)])
                 self.evaluate(variables.global_variables_initializer())
                 self.assertAllClose(
                     aggregated_update_var,
                     self.evaluate(repeated_index_update_var))
                 for _ in range(3):
                     repeated_update.run()
                     aggregated_update.run()
                     self.assertAllClose(
                         aggregated_update_var,
                         self.evaluate(repeated_index_update_var))
 def testSparseBasic(self):
     with ops.Graph().as_default():
         for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
             with self.cached_session():
                 var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
                 var1 = variables.Variable([[3.0], [4.0]], dtype=dtype)
                 grads0 = indexed_slices.IndexedSlices(
                     constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
                     constant_op.constant([0]), constant_op.constant([2,
                                                                      1]))
                 grads1 = indexed_slices.IndexedSlices(
                     constant_op.constant([0.01], shape=[1, 1],
                                          dtype=dtype),
                     constant_op.constant([1]), constant_op.constant([2,
                                                                      1]))
                 ada_opt = adagrad.AdagradOptimizer(
                     3.0, initial_accumulator_value=0.1)
                 ada_update = ada_opt.apply_gradients(
                     zip([grads0, grads1], [var0, var1]))
                 self.evaluate(variables.global_variables_initializer())
                 # Fetch params to validate initial values
                 self.assertAllClose([[1.0], [2.0]], self.evaluate(var0))
                 self.assertAllClose([[3.0], [4.0]], self.evaluate(var1))
                 # Run 3 step of sgd
                 for _ in range(3):
                     ada_update.run()
                 # Validate updated params
                 self.assertAllCloseAccordingToType(
                     np.array([[-1.6026098728179932], [2.0]]),
                     self.evaluate(var0))
                 self.assertAllCloseAccordingToType(
                     np.array([[3.0], [3.715679168701172]]),
                     self.evaluate(var1))
示例#8
0
    def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False):
        if is_sparse:
            var0 = variables.Variable([[0.0], [0.0]], dtype=dtype)
            var1 = variables.Variable([[0.0], [0.0]], dtype=dtype)
            grads0 = indexed_slices.IndexedSlices(
                constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
                constant_op.constant([0]), constant_op.constant([2, 1]))
            grads1 = indexed_slices.IndexedSlices(
                constant_op.constant([0.02], shape=[1, 1], dtype=dtype),
                constant_op.constant([1]), constant_op.constant([2, 1]))
        else:
            var0 = variables.Variable([0.0, 0.0], dtype=dtype)
            var1 = variables.Variable([0.0, 0.0], dtype=dtype)
            grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
            grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)

        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(variables.global_variables_initializer())

        sess = ops.get_default_session()
        v0_val, v1_val = self.evaluate([var0, var1])
        if is_sparse:
            self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val)
            self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val)
        else:
            self.assertAllCloseAccordingToType([0.0, 0.0], v0_val)
            self.assertAllCloseAccordingToType([0.0, 0.0], v1_val)

        # Run Ftrl for a few steps
        for _ in range(steps):
            update.run()

        v0_val, v1_val = self.evaluate([var0, var1])
        return v0_val, v1_val
示例#9
0
    def doTestSparse(self, use_resource=False):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable(var0_np)
                    var1 = resource_variable_ops.ResourceVariable(var1_np)
                else:
                    var0 = variables.RefVariable(var0_np)
                    var1 = variables.RefVariable(var1_np)
                grads0_np_indices = np.array([0, 1], dtype=np.int32)
                grads0 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads0_np),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([2]))
                grads1_np_indices = np.array([0, 1], dtype=np.int32)
                grads1 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads1_np),
                    constant_op.constant(grads1_np_indices),
                    constant_op.constant([2]))
                opt = adam.AdamOptimizer()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                beta1_power, beta2_power = opt._get_beta_accumulators()

                # Run 3 steps of Adam
                for t in range(1, 4):
                    self.assertAllCloseAccordingToType(
                        0.9**t, self.evaluate(beta1_power))
                    self.assertAllCloseAccordingToType(
                        0.999**t, self.evaluate(beta2_power))
                    update.run()

                    var0_np, m0, v0 = adam_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adam_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
 def replica_fn():
   value1 = indexed_slices.IndexedSlices(
       values=array_ops.identity([[1.0]]),
       indices=array_ops.identity([0]),
       dense_shape=array_ops.identity([5, 1]))
   value2 = indexed_slices.IndexedSlices(
       values=array_ops.identity([[2.0]]),
       indices=array_ops.identity([0]),
       dense_shape=array_ops.identity([5, 1]))
   rep_ctx = ds_context.get_replica_context()
   reduced = rep_ctx.all_reduce(reduce_util.ReduceOp.SUM, [value1, value2])
   return reduced
示例#11
0
    def testSparseBasic(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with ops.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0, 0.01],
                                     dtype=dtype.as_numpy_dtype)

                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
                grads0_np_indices = np.array([0, 2], dtype=np.int32)
                grads0 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads0_np[grads0_np_indices]),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([3]))
                grads1_np_indices = np.array([0, 2], dtype=np.int32)
                grads1 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads1_np[grads1_np_indices]),
                    constant_op.constant(grads1_np_indices),
                    constant_op.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

                accum0_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np_indices,
                        grads0_np[grads0_np_indices], learning_rate)
                    var1_np, accum1_np = sparse_adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np_indices,
                        grads1_np[grads1_np_indices], learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
示例#12
0
    def testResourceSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with ops.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype)  # pylint: disable=cell-var-from-loop
                m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(
                ), zero_slots()
                var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)

                grads0_np_indices = np.array([0, 1], dtype=np.int32)
                grads0 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads0_np),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([3]))
                grads1_np_indices = np.array([2, 1], dtype=np.int32)
                grads1 = indexed_slices.IndexedSlices(
                    constant_op.constant(grads1_np),
                    constant_op.constant(grads1_np_indices),
                    constant_op.constant([3]))
                opt = adamax.Adamax()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0, 3.0], var0)
                self.assertAllClose([4.0, 5.0, 6.0], var1)

                beta1_power = get_beta_accumulators(opt, dtype)

                # Run 3 steps of Adamax
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    update.run()

                    var0_np, m0, v0 = adamax_sparse_update_numpy(
                        var0_np, grads0_np_indices, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_sparse_update_numpy(
                        var1_np, grads1_np_indices, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)
示例#13
0
 def replica_fn():
     value = (array_ops.identity(1.0),
              indexed_slices.IndexedSlices(
                  values=array_ops.identity([[1.0]]),
                  indices=array_ops.identity([0]),
                  dense_shape=array_ops.identity([5, 1])),
              array_ops.identity(2.0),
              indexed_slices.IndexedSlices(
                  values=array_ops.identity([[2.0]]),
                  indices=array_ops.identity([1]),
                  dense_shape=array_ops.identity([5, 1])))
     reduced = strategy.extended._replica_ctx_all_reduce(
         reduce_util.ReduceOp.SUM, value)
     return reduced
  def _VariableRankTest(self,
                        tf_scatter,
                        vtype,
                        itype,
                        repeat_indices=False,
                        updates_are_scalar=False,
                        method=False):
    np.random.seed(8)
    with self.cached_session(use_gpu=False):
      for indices_shape in (2,), (3, 7), (3, 4, 7):
        for extra_shape in (), (5,), (5, 9):
          # Generate random indices with no duplicates for easy numpy comparison
          sparse_dim = len(indices_shape) - 1
          indices = np.random.randint(
              indices_shape[sparse_dim], size=indices_shape, dtype=itype)
          updates = _AsType(
              np.random.randn(*(indices_shape + extra_shape)), vtype)

          old = _AsType(np.random.randn(*(indices_shape + extra_shape)), vtype)

          # Scatter via numpy
          new = old.copy()
          np_scatter = _TF_OPS_TO_NUMPY[tf_scatter]
          np_scatter(new, indices, updates)
          # Scatter via tensorflow
          ref = variables.Variable(old)
          self.evaluate(variables.variables_initializer([ref]))

          if method:
            ref.batch_scatter_update(
                indexed_slices.IndexedSlices(indices, updates))
          else:
            self.evaluate(tf_scatter(ref, indices, updates))
          self.assertAllClose(ref, new)
def _ragged_gather_grad(op, *grads):
    """Gradient for RaggedGather op."""
    param_nested_splits = op.inputs[:-2]
    param_inner_values = op.inputs[-2]
    indices = op.inputs[-1]
    grad_inner_values = grads[-1]

    # For each row in `params`, find the range of values in `params.inner_values`
    # that is covered by that row.  In particular, the values in row `i` are
    # `param_inner_values[combined_splits[i]:combined_splits[i+1]`.
    combined_splits = param_nested_splits[0]
    for row_splits in param_nested_splits[1:]:
        combined_splits = array_ops.gather(row_splits, combined_splits)

    # The outer dimensions of `indices` correspond 1:1 with the outer dimensions
    # of `ragged_grad` that are encoded by `grad_nested_splits`.  Thus, the
    # flattened `indices` correspond 1:1 with `grad_inner_values`.
    flat_indices = array_ops.reshape(indices, [-1])

    # Build an IndexedSlices where the values are taken from `flat_grad`.
    grad_indices = ragged_math_ops.range(
        array_ops.gather(combined_splits, flat_indices),
        array_ops.gather(combined_splits[1:], flat_indices)).values

    param_inner_values_grad = indexed_slices.IndexedSlices(
        values=grad_inner_values,
        indices=grad_indices,
        dense_shape=array_ops.shape(param_inner_values))
    return [None
            for _ in param_nested_splits] + [param_inner_values_grad, None]
def _make_indexed_slices(values, indices, dense_shape, device):
    with ops.device(device):
        tensor = indexed_slices_lib.IndexedSlices(
            values=constant_op.constant(values),
            indices=constant_op.constant(indices),
            dense_shape=constant_op.constant(dense_shape))
    return tensor
示例#17
0
def aggregate_indexed_slices_gradients(grads):
  """Aggregates gradients containing `IndexedSlices`s."""
  if len(grads) < 1:
    return None
  if len(grads) == 1:
    return grads[0]
  grads = [g for g in grads if g is not None]
  # If any gradient is a `Tensor`, sum them up and return a dense tensor
  # object.
  if any(isinstance(g, ops.Tensor) for g in grads):
    return math_ops.add_n(grads)

  # The following `_as_indexed_slices_list` casts ids of IndexedSlices into
  # int64. It is to make sure the inputs of `concat` all have same the data
  # type.
  grads = math_ops._as_indexed_slices_list(grads)  # pylint: disable=protected-access

  grads = [flatten_nested_indexed_slices(x) for x in grads]
  # Form IndexedSlices out of the concatenated values and indices.
  concat_grad = indexed_slices.IndexedSlices(
      array_ops.concat([x.values for x in grads], axis=0),
      array_ops.concat([x.indices for x in grads], axis=0),
      grads[0].dense_shape)

  return concat_grad
示例#18
0
 def testSparseStability(self):
     with ops.Graph().as_default():
         for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
             with self.cached_session():
                 shape = [1, 6]
                 var0 = variables.Variable([[
                     0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                     -0.0105945
                 ]],
                                           dtype=dtype)
                 grads0 = indexed_slices.IndexedSlices(
                     constant_op.constant([[
                         -5.91278e-05, 5.31673e-05, -2.5779e-06,
                         4.29153e-05, -8.4877e-05, -9.48906e-05
                     ]],
                                          shape=shape,
                                          dtype=dtype),
                     constant_op.constant([0]), constant_op.constant(shape))
                 ada_opt = adagrad.AdagradOptimizer(
                     1.0, initial_accumulator_value=0.1)
                 ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                 self.assertEqual(["accumulator"], ada_opt.get_slot_names())
                 slot0 = ada_opt.get_slot(var0, "accumulator")
                 init = variables.global_variables_initializer()
                 for _ in range(100):
                     init.run()
                     ada_update.run()
                     self.assertAllCloseAccordingToType(
                         np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                         self.evaluate(slot0))
                     self.assertAllCloseAccordingToType(
                         np.array([[
                             0.00891194, -0.10712013, 0.11047515,
                             0.22636929, -0.0144573, -0.01029443
                         ]]), self.evaluate(var0))
def _rewrite_grad_indexed_slices_output(old_output_slices, new_input_slices):
    """Creates a new version of old_output_slices with new_input_slices as input.

  This method assumes that old_output_slices.{values,indices} are produced by
  concatenating the incoming gradient Tensor input with the IndexedSlices
  produced by the gradient computation of the while body. See
  backprop.aggregate_indexed_slices_gradients for where these concats are
  constructed. We build new concats that use new_input_slices instead of the
  original Tensor input.

  Args:
    old_output_slices: original IndexedSlices output of while gradient.
    new_input_slices: new IndexedSlices to use as input to while gradient.

  Returns:
    A new IndexedSlices to replace old_output_slices.
  """
    def rewrite(old_output, new_input):
        assert old_output.type == "Identity"
        concat_op = old_output.inputs[0].op
        assert concat_op.type == "ConcatV2"
        # Don't include axis arg
        old_concat_args = concat_op.inputs[:-1]
        # We assume that the original gradient input was the first argument to the
        # concat op.
        # TODO(skyewm): do this in a more robust way.
        return array_ops.concat([new_input] + old_concat_args[1:], 0)

    values = rewrite(old_output_slices.values.op, new_input_slices.values)
    indices = rewrite(old_output_slices.indices.op, new_input_slices.indices)
    return indexed_slices.IndexedSlices(
        values=values,
        indices=indices,
        dense_shape=new_input_slices.dense_shape)
示例#20
0
 def testSparseStability(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     with ops.Graph().as_default():
         for dtype in [dtypes.half]:
             shape = [1, 6]
             var0_np = np.array([[
                 0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                 -0.0105945
             ]],
                                dtype=dtype.as_numpy_dtype)
             var0 = variables.Variable(var0_np)
             grads0_np = np.array([[
                 -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                 -8.4877e-05, -9.48906e-05
             ]],
                                  dtype=dtype.as_numpy_dtype)
             grads0 = indexed_slices.IndexedSlices(
                 constant_op.constant(grads0_np), constant_op.constant([0]),
                 constant_op.constant(shape))
             ada_opt = adagrad.Adagrad(1.0)
             ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
             slot0 = ada_opt.get_slot(var0, "accumulator")
             init = variables.global_variables_initializer()
             for _ in range(100):
                 self.evaluate(init)
                 self.evaluate(ada_update)
                 self.assertAllCloseAccordingToType(
                     np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                     self.evaluate(slot0))
                 self.assertAllCloseAccordingToType(
                     np.array([[
                         0.00891194, -0.10712013, 0.11047515, 0.22636929,
                         -0.0144573, -0.01029443
                     ]]), self.evaluate(var0))
示例#21
0
 def testIndexedSlices(self):
   x = indexed_slices.IndexedSlices(
       constant_op.constant([1, 2, 3]), constant_op.constant([10, 20, 30]))
   x_value = indexed_slices.IndexedSlicesValue(
       np.array([1, 2, 3]), np.array([10, 20, 30]), np.array([100]))
   self.assertTrue(tensor_util.is_tf_type(x))
   self.assertFalse(tensor_util.is_tf_type(x_value))
示例#22
0
    def testSparse(self, strategy, tf_function):
        if tf_function is combinations.no_tf_function:
            self.skipTest('Skip IndexedSlices + eager combination.')

        @tf_function
        def fn():
            def replica_fn():
                value = indexed_slices.IndexedSlices(
                    values=array_ops.identity([[1.0]]),
                    indices=array_ops.identity([0]),
                    dense_shape=array_ops.identity([5, 1]))
                rep_ctx = ds_context.get_replica_context()
                reduced = rep_ctx.all_reduce(reduce_util.ReduceOp.MEAN, value)
                return reduced

            return strategy.experimental_local_results(
                strategy.run(replica_fn))

        got = fn()[0]

        if not strategy_test_lib.is_tpu_strategy(strategy):
            self.assertIsInstance(got, indexed_slices.IndexedSlices)
        expect = indexed_slices.IndexedSlices(
            values=array_ops.identity([[1.0]]),
            indices=array_ops.identity([0]),
            dense_shape=array_ops.identity([5, 1]))
        self.assertAllEqual(ops.convert_to_tensor(got),
                            ops.convert_to_tensor(expect))
示例#23
0
  def testApplyGradtInt32IndicesAndShape(self):
    with self.cached_session() as sess:
      q = data_flow_ops.SparseConditionalAccumulator(
          dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3]))
      accum_op = q.apply_grad(
          grad_indices=constant_op.constant(
              [0, 2], dtype=dtypes_lib.int32),
          grad_values=constant_op.constant(
              [[0, 0, 1], [3, 0, 4]], dtype=dtypes_lib.float32),
          grad_shape=constant_op.constant(
              [3, 3], dtype=dtypes_lib.int32))
      accum_op.run()
      accum_op = q.apply_indexed_slices_grad(
          indexed_slices.IndexedSlices(
              indices=constant_op.constant(
                  [0, 2], dtype=dtypes_lib.int32),
              values=constant_op.constant(
                  [[0, 0, 1], [3, 0, 4]], dtype=dtypes_lib.float32),
              dense_shape=constant_op.constant(
                  [3, 3], dtype=dtypes_lib.int32)))
      accum_op.run()
      self.assertEqual(q.num_accumulated().eval(), 2)

      val = self.evaluate(q.take_indexed_slices_grad(1))
      self.assertAllEqual(val.indices, [0, 2])
      self.assertAllEqual(val.values, [[0, 0, 1], [3, 0, 4]])
      self.assertAllEqual(val.dense_shape, [3, 3])
示例#24
0
    def testSparse(self, strategy, tf_function):
        if tf_function is combinations.no_tf_function:
            self.skipTest('Skip IndexedSlices + eager combination.')

        @tf_function
        def fn():
            def replica_fn():
                value = indexed_slices.IndexedSlices(
                    values=array_ops.identity([[1.0]]),
                    indices=array_ops.identity([0]),
                    dense_shape=array_ops.identity([5, 1]))
                reduced = strategy.extended._replica_ctx_all_reduce(
                    reduce_util.ReduceOp.SUM, value)
                return reduced

            return strategy.experimental_local_results(
                strategy.run(replica_fn))

        got = fn()[0]
        expect = indexed_slices.IndexedSlices(
            values=array_ops.identity([[1.0 * strategy.num_replicas_in_sync]]),
            indices=array_ops.identity([0]),
            dense_shape=array_ops.identity([5, 1]))
        self.assertAllEqual(ops.convert_to_tensor(got),
                            ops.convert_to_tensor(expect))
def make_per_replica_value(value_fn, devices):
  """Creates a `PerReplica` object whose values reside in `devices`.

  Args:
    value_fn: a callable that takes one argument (`device_idx`) and should
      return the value that is going to be created on devices[device_idx].
    devices: a list of device strings to create `PerReplica` values on.

  Returns:
    A `PerReplica` object.
  """
  values = []
  for device_idx, device in enumerate(devices):
    v = value_fn(device_idx)
    if isinstance(v, indexed_slices.IndexedSlicesValue):
      with ops.device(device):
        values.append(
            indexed_slices.IndexedSlices(
                values=array_ops.identity(v.values),
                indices=array_ops.identity(v.indices),
                dense_shape=array_ops.identity(v.dense_shape)))
    else:
      with ops.device(device):
        values.append(array_ops.identity(v))
  return value_lib.PerReplica(values)
示例#26
0
def divide_by_n_tensors_or_indexed_slices(value, n):
    if isinstance(value, indexed_slices.IndexedSlices):
        value = backprop.flatten_nested_indexed_slices(value)
        return indexed_slices.IndexedSlices(value.values / n, value.indices,
                                            value.dense_shape)
    else:
        return value / n
 def slices(val, index):
     return indexed_slices.IndexedSlices(
         values=constant_op.constant(
             val, dtype=dtypes.float32),
         indices=constant_op.constant(
             index, dtype=dtypes.int32),
         dense_shape=constant_op.constant(
             [2], dtype=dtypes.int32))
示例#28
0
 def testConvertIndexedSlicesWithIncorrectDtype(self):
     converter = self.makePythonTensorConverter()
     x = indexed_slices.IndexedSlices(
         constant_op.constant([[1, 2, 3]], dtypes.int32, name="x_values"),
         constant_op.constant([1], dtypes.int64, name="x_indices"),
         constant_op.constant([3, 3], dtypes.int64, name="x_shape"))
     with self.assertRaises((ValueError, TypeError)):
         converter.Convert(x, types_pb2.DT_FLOAT)
 def loop_fn(i):
   slices = indexed_slices.IndexedSlices(
       indices=i,
       values=array_ops.reshape(i, [1]),
       dense_shape=[3, 1])
   # Note that returning the components inside the slice avoids
   # densification, which may be more efficient.
   return slices.values, slices.indices
示例#30
0
 def testAcceptsIndexedSlices(self):
   values = constant_op.constant([2, 3, 5, 7, 0, -1], shape=[3, 2])
   indices = constant_op.constant([0, 2, 5])
   x = math_ops.scalar_mul(-3, indexed_slices.IndexedSlices(values, indices))
   with test_util.device(use_gpu=True):
     self.assertAllEqual(
         self.evaluate(x.values), [[-6, -9], [-15, -21], [0, 3]])
     self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])