def testScatterAdd(self):
     with self.test_session():
         handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[1, 1])
         resource_variable_ops.assign_variable_op(handle, constant_op.constant([[1]], dtype=dtypes.int32)).run()
         resource_variable_ops.resource_scatter_add(
             handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)
         ).run()
         read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
         self.assertEqual(read.eval(), [[3]])
Exemplo n.º 2
0
 def testScatterAdd(self):
   with self.test_session():
     handle = resource_variable_ops.var_handle_op(
         dtype=dtypes.int32, shape=[1, 1])
     resource_variable_ops.assign_variable_op(
         handle, constant_op.constant([[1]], dtype=dtypes.int32)).run()
     resource_variable_ops.resource_scatter_add(
         handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)).run()
     read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
     self.assertEqual(read.eval(), [[3]])
Exemplo n.º 3
0
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        acc = self.get_slot(var, 'accumulator')
        with ops.control_dependencies([
                resource_variable_ops.resource_scatter_add(
                    acc.handle, indices, math_ops.square(grad))
        ]):
            acc_t_slice = acc.sparse_read(indices)
        var_update = resource_variable_ops.resource_scatter_add(
            var.handle, indices, coefficients['neg_lr_t'] * grad /
            (math_ops.sqrt(acc_t_slice) + coefficients['epsilon']))
        return var_update
Exemplo n.º 4
0
 def _resource_scatter_add(self, x, i, v):
     #
     # Daqi - handles incompatibility between the old Variable and new ResourceVariable. For now they refer to different C++ implementations.
     # Future releases should see a merge of scatter_add and resource_scatter_add.
     #
     with tf.control_dependencies(
         [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
         return x.value()
Exemplo n.º 5
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
   if self._momentum:
     return super(SGD, self)._resource_apply_sparse_duplicate_indices(
         grad, var, indices)
   else:
     return resource_variable_ops.resource_scatter_add(
         var.handle, indices, -grad * math_ops.cast(
             self._get_hyper("learning_rate"), grad.dtype.base_dtype))
Exemplo n.º 6
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
     if self._momentum:
         return super(SGD, self)._resource_apply_sparse_duplicate_indices(
             grad, var, indices)
     else:
         return resource_variable_ops.resource_scatter_add(
             var.handle, indices, -grad * math_ops.cast(
                 self._get_hyper("learning_rate"), grad.dtype.base_dtype))
Exemplo n.º 7
0
 def _resource_scatter_add(self, x, i, v):
     #
     # We use x.handle for ResourceVariables.
     # resource_scatter_add and scatter_add refer to different ops in C++.
     #
     with tf.control_dependencies(
         [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
         return x.value()
Exemplo n.º 8
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, state):
   if self._use_momentum:
     return super(SGD, self)._resource_apply_sparse_duplicate_indices(
         grad, var, indices, state)
   else:
     lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
     return resource_variable_ops.resource_scatter_add(var.handle, indices,
                                                       -grad * lr)
Exemplo n.º 9
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices,
                                              state):
     if self._use_momentum:
         return super(SGD, self)._resource_apply_sparse_duplicate_indices(
             grad, var, indices, state)
     else:
         lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
         return resource_variable_ops.resource_scatter_add(
             var.handle, indices, -grad * lr)
Exemplo n.º 10
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
   if self._momentum:
     return super(SGD, self)._resource_apply_sparse_duplicate_indices(
         grad, var, indices)
   else:
     var_dtype = var.dtype.base_dtype
     lr_t = self._decayed_lr(var_dtype)
     return resource_variable_ops.resource_scatter_add(var.handle, indices,
                                                       -grad * lr_t)
 def testScatterAdd(self):
   handle = resource_variable_ops.var_handle_op(
       dtype=dtypes.int32, shape=[1, 1])
   self.evaluate(resource_variable_ops.assign_variable_op(
       handle, constant_op.constant([[1]], dtype=dtypes.int32)))
   self.evaluate(resource_variable_ops.resource_scatter_add(
       handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)))
   read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
   self.assertEqual(self.evaluate(read), [[3]])
Exemplo n.º 12
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
   if self._momentum:
     return super(SGD, self)._resource_apply_sparse_duplicate_indices(
         grad, var, indices)
   else:
     var_dtype = var.dtype.base_dtype
     lr_t = self._decayed_lr(var_dtype)
     return resource_variable_ops.resource_scatter_add(var.handle, indices,
                                                       -grad * lr_t)
Exemplo n.º 13
0
 def testScatterAdd(self):
   handle = resource_variable_ops.var_handle_op(
       dtype=dtypes.int32, shape=[1, 1])
   self.evaluate(resource_variable_ops.assign_variable_op(
       handle, constant_op.constant([[1]], dtype=dtypes.int32)))
   self.evaluate(resource_variable_ops.resource_scatter_add(
       handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)))
   read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
   self.assertEqual(self.evaluate(read), [[3]])
Exemplo n.º 14
0
    def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, **kwargs):
        if self._momentum:
            return super(CustomSGD, self)._resource_apply_sparse_duplicate_indices(grad, var, indices, **kwargs)
        else:
            var_device, var_dtype = var.device, var.dtype.base_dtype
            coefficients = kwargs.get("apply_state", {}).get((var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype
            )

            return resource_variable_ops.resource_scatter_add(var.handle, indices, -grad * coefficients["lr_t"])
Exemplo n.º 15
0
 def _assign_sub(self, ref, updates, indices=None):
   if indices is not None:
     if isinstance(ref, tf.Variable):
       return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking)
     elif isinstance(ref, resource_variable_ops.ResourceVariable):
       with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]):
         return ref.value()
     else:
       raise TypeError("did not expect type %r" % type(ref))
   else:
     return tf.assign_sub(ref, updates, use_locking=self._use_locking)
Exemplo n.º 16
0
 def _assign_sub(self, ref, updates, indices=None):
   if indices is not None:
     if isinstance(ref, tf.Variable):
       return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking)
     elif isinstance(ref, resource_variable_ops.ResourceVariable):
       with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]):
         return ref.value()
     else:
       raise TypeError("did not expect type %r" % type(ref))
   else:
     return tf.assign_sub(ref, updates, use_locking=self._use_locking)
Exemplo n.º 17
0
 def testScatterAddScalar(self):
   with self.test_session() as sess, self.test_scope():
     handle = resource_variable_ops.var_handle_op(
         dtype=dtypes.int32, shape=[1, 1])
     sess.run(
         resource_variable_ops.assign_variable_op(
             handle, constant_op.constant([[1]], dtype=dtypes.int32)))
     sess.run(
         resource_variable_ops.resource_scatter_add(
             handle, [0], constant_op.constant(2, dtype=dtypes.int32)))
     read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
     self.assertEqual(self.evaluate(read), [[3]])
Exemplo n.º 18
0
 def testScatterAddScalar(self):
   with self.session() as sess, self.test_scope():
     handle = resource_variable_ops.var_handle_op(
         dtype=dtypes.int32, shape=[1, 1])
     sess.run(
         resource_variable_ops.assign_variable_op(
             handle, constant_op.constant([[1]], dtype=dtypes.int32)))
     sess.run(
         resource_variable_ops.resource_scatter_add(
             handle, [0], constant_op.constant(2, dtype=dtypes.int32)))
     read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
     self.assertEqual(self.evaluate(read), [[3]])
 def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
     if isinstance(handle, embedding_variable_ops.EmbeddingVariable):
         global_step = training_util.get_or_create_global_step()
         return gen_ev_ops.ev_sparse_apply_gradient_descent(
             handle.handle,
             math_ops.cast(self._learning_rate_tensor,
                           grad.dtype.base_dtype),
             grad,
             indices,
             global_step,
             use_locking=self._use_locking)
     else:
         return resource_variable_ops.resource_scatter_add(
             handle.handle, indices, -grad * self._learning_rate)
Exemplo n.º 20
0
    def _resource_apply_sparse(self, grad, var, indices):
        momentum_buffer = self.get_slot(var, "momentum")
        learning_rate = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype)
        momentum = math_ops.cast(self._momentum_tensor, var.dtype.base_dtype)
        nu = math_ops.cast(self._nu_tensor, var.dtype.base_dtype)

        momentum_op = training_ops.resource_sparse_apply_momentum(
            var.handle,
            momentum_buffer.handle,
            nu * (1.0 - momentum) * learning_rate,
            grad,
            indices,
            momentum,
            use_locking=self._use_locking,
            use_nesterov=False,
        )

        with ops.control_dependencies([momentum_op]):
            delta = (nu - 1.0) * learning_rate * grad
            gd_op = resource_variable_ops.resource_scatter_add(var.handle, indices, delta)

        return control_flow_ops.group(momentum_op, gd_op)
Exemplo n.º 21
0
 def _resource_scatter_add(self, x, i, v):
   with ops.control_dependencies(
       [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
     return x.value()
Exemplo n.º 22
0
 def _resource_scatter_add(self, x, i, v, _=None):
     # last argument allows for one overflow argument, to have the same function
     # signature as state_ops.scatter_add
     with ops.control_dependencies(
         [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
         return x.value()
Exemplo n.º 23
0
 def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices,
                                              state):
     lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
     return resource_variable_ops.resource_scatter_add(
         handle.handle, indices, -grad * lr)
Exemplo n.º 24
0
 def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
     return resource_variable_ops.resource_scatter_add(
         handle.handle, indices, -grad * self._get_hyper("learning_rate"))
Exemplo n.º 25
0
 def _wrapWOAccu(accuGrads, grad, var, indices, apply_state):
     return resource_variable_ops.resource_scatter_add(
         var.handle, indices, grad * 0.0)
Exemplo n.º 26
0
 def _resource_scatter_add(self, x, i, v, _=None):
   # last argument allows for one overflow argument, to have the same function
   # signature as state_ops.scatter_add
   with ops.control_dependencies(
       [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
     return x.value()
Exemplo n.º 27
0
 def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
     return resource_variable_ops.resource_scatter_add(
         handle.handle, indices, -grad *
         math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype))
Exemplo n.º 28
0
 def _resource_scatter_add(x, i, v):
     dependencies = [
         resource_variable_ops.resource_scatter_add(x.handle, i, v)
     ]
     with ops.control_dependencies(dependencies):
         return x.value()
Exemplo n.º 29
0
 def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
   return resource_variable_ops.resource_scatter_add(
       handle.handle, indices, -grad * self._learning_rate)
Exemplo n.º 30
0
 def _resource_scatter_add(self, x, i, v, _=None):
     with ops.control_dependencies(
         [resource_variable_ops.resource_scatter_add(x.handle, i, v)]):
         return tf.convert_to_tensor(x)
Exemplo n.º 31
0
 def _resource_apply_sparse_duplicate_indices(self, grad, var, indices):
     return resource_variable_ops.resource_scatter_add(
         var.handle, indices, -grad * math_ops.cast(
             self._get_hyper("learning_rate"), var.dtype.base_dtype))
Exemplo n.º 32
0
 def _resource_apply_sparse_duplicate_indices(
     self, grad, handle, indices, state):
   lr = state.get_hyper("learning_rate", grad.dtype.base_dtype)
   return resource_variable_ops.resource_scatter_add(
       handle.handle, indices, -grad * lr)
Exemplo n.º 33
0
 def _resource_apply_sparse(self, grad, handle, indices):
     return resource_variable_ops.resource_scatter_add(
         handle, indices, -grad * self._learning_rate)