Пример #1
0
 def testUnknownUnconnectedGradientsValueGiven(self):
   with ops.Graph().as_default():
     x = constant(1.0)
     y = constant(1.0)
     with self.assertRaisesRegexp(
         ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
       gradients.gradients([y], [x], unconnected_gradients="nonsense")
Пример #2
0
 def testRealOnly(self):
   x = constant_op.constant(7+3j, dtype=dtypes.complex64)
   y = math_ops.square(x)
   with self.assertRaisesRegexp(
       TypeError,
       r"Gradients of complex tensors must set grad_ys "
       r"\(y\.dtype = tf\.complex64\)"):
     gradients.gradients(y, x)
Пример #3
0
 def testPartialDerivatives(self):
   with self.test_session():
     x = constant_op.constant(1.)
     y = 2 * x
     z = x + y
     totalg = gradients.gradients(z, [x, y])
     self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
     partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
     self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
Пример #4
0
 def testFloorDivGrad(self):
     with self.test_session():
         a = variables.Variable(2.0)
         b = variables.Variable(4.0)
         with self.test_session() as sess:
             sess.run(variables.initialize_all_variables())
             c_grad = gradients.gradients(math_ops.div_deprecated(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
             self.assertAllEqual([x.eval() for x in c_grad], [0.25, -0.125])
             c_grad = gradients.gradients(math_ops.floordiv(a, b), [a, b])
             self.assertAllEqual([None if x is None else x.eval() for x in c_grad], [None, None])
Пример #5
0
 def testFloorDivGrad(self):
   with self.test_session():
     a = variables.Variable(2.)
     b = variables.Variable(4.)
     with self.test_session() as sess:
       sess.run(variables.global_variables_initializer())
       c_grad = gradients.gradients(math_ops.divide(a, b), [a, b])
       self.assertAllEqual([x.eval() for x in c_grad], [.25, -.125])
       c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
       self.assertAllEqual([x.eval() for x in c_grad], [.25, -.125])
       c_grad = gradients.gradients(math_ops.floordiv(a, b), [a, b])
       self.assertAllEqual([None if x is None else x.eval()
                            for x in c_grad], [None, None])
Пример #6
0
 def testDependentYs(self):
   with self.test_session():
     x = constant_op.constant(3.0)
     y = math_ops.square(x)
     y1 = math_ops.square(y)
     y2 = math_ops.square(y1)
     g = gradients.gradients([y, y2], x)
     self.assertAllClose(17502.0, g[0].eval())
     g = gradients.gradients(y + y2, x)
     self.assertAllClose(17502.0, g[0].eval())
     z = array_ops.identity(y)
     z2 = array_ops.identity(y2)
     g = gradients.gradients([z, z2], x)
     self.assertAllClose(17502.0, g[0].eval())
Пример #7
0
 def test_jacobian_fixed_shape(self):
   x = random_ops.random_uniform([2, 2])
   y = math_ops.matmul(x, x, transpose_a=True)
   jacobian_pfor = gradients.jacobian(y, x, use_pfor=True)
   jacobian_while = gradients.jacobian(y, x, use_pfor=False)
   answer = ops.convert_to_tensor([[
       gradient_ops.gradients(y[0][0], x)[0],
       gradient_ops.gradients(y[0][1], x)[0]
   ], [
       gradient_ops.gradients(y[1][0], x)[0],
       gradient_ops.gradients(y[1][1], x)[0]
   ]])
   self.run_and_assert_equal(answer, jacobian_pfor)
   self.run_and_assert_equal(answer, jacobian_while)
Пример #8
0
 def testColocateGradientsWithAggregation(self):
   with ops.Graph().as_default() as g:
     with g.device("/gpu:1"):
       w = constant(1.0, shape=[1, 1])
     x = constant(1.0, shape=[1, 2])
     y = constant(1.0, shape=[1, 2])
     wx = math_ops.matmul(w, x)
     wy = math_ops.matmul(w, y)
     with g.device("/gpu:0"):
       z = wx + wy
     gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
     self.assertEquals("/gpu:1", gw1.device)
     gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
     self.assertEquals(None, gw2.device)
Пример #9
0
  def testCustomGradientErrors(self):

    @custom_gradient.custom_gradient
    def F(x):

      def Grad(_):
        raise RuntimeError("x")

      return x, Grad

    with ops.Graph().as_default():
      x = constant(1.0)
      y = F(x)
      with self.assertRaises(RuntimeError):
        gradients.gradients(y, x)
Пример #10
0
 def testBatchNormGradImpl(self):
     x_shape = [7, 5, 4, 6]
     param_shape = [6]
     np.random.seed(1)  # Make it reproducible.
     x_val = np.random.random_sample(x_shape).astype(np.float32)
     m_val = np.random.random_sample(param_shape).astype(np.float32)
     v_val = np.random.random_sample(param_shape).astype(np.float32)
     beta_val = np.random.random_sample(param_shape).astype(np.float32)
     gamma_val = np.random.random_sample(param_shape).astype(np.float32)
     backprop_val = np.random.random_sample(x_shape).astype(np.float32)
     for use_gpu in [False, True]:
         with self.test_session(use_gpu=use_gpu) as sess:
             x = constant_op.constant(x_val, name="x")
             m = constant_op.constant(m_val, name="m")
             v = constant_op.constant(v_val, name="v")
             beta = constant_op.constant(beta_val, name="beta")
             gamma = constant_op.constant(gamma_val, name="gamma")
             backprop = constant_op.constant(backprop_val, name="backprop")
             epsilon = 0.001
             for scale_after_normalization in [True, False]:
                 dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
                     x, m, v, gamma, backprop, epsilon, scale_after_normalization
                 )
                 on = self._opsBatchNorm(x, m, v, beta, gamma, epsilon, scale_after_normalization)
                 odx, odm, odv, odb, odg = gradients.gradients([on], [x, m, v, beta, gamma], [backprop])
                 if scale_after_normalization:
                     all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
                     to_check = ["dx", "dm", "dv", "db", "dg"]
                 else:
                     all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
                     to_check = ["dx", "dm", "dv", "db"]
                 for i, n in enumerate(to_check):
                     print(n)
                     self.assertAllClose(all_grads[i + len(to_check)], all_grads[i], atol=0.000001)
Пример #11
0
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    params = nest.flatten(params)
    with backend.get_graph().as_default():
      grads = gradients.gradients(loss, params)
    for grad, param in zip(grads, params):
      if grad is None:
        raise ValueError("Variable {} has `None` for gradient. "
                         "Please make sure that all of your ops have a "
                         "gradient defined (i.e. are differentiable). "
                         "Common ops without gradient: "
                         "K.argmax, K.round, K.eval.".format(param))
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
  def testThatBackpropRuns(self):
    """Run optimization to ensure that gradients can be computed."""

    batch_size = 1
    image_height = 9
    image_width = 12
    image = variables.Variable(
        np.float32(
            np.random.uniform(size=[batch_size, image_height, image_width, 3])))
    control_point_locations = [[3., 3.]]
    control_point_locations = constant_op.constant(
        np.float32(np.expand_dims(control_point_locations, 0)))
    control_point_displacements = [[0.25, -0.5]]
    control_point_displacements = constant_op.constant(
        np.float32(np.expand_dims(control_point_displacements, 0)))
    warped_image, _ = sparse_image_warp.sparse_image_warp(
        image,
        control_point_locations,
        control_point_locations + control_point_displacements,
        num_boundary_points=3)

    loss = math_ops.reduce_mean(math_ops.abs(warped_image - image))
    optimizer = momentum.MomentumOptimizer(0.001, 0.9)
    grad = gradients.gradients(loss, [image])
    grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
    opt_func = optimizer.apply_gradients(zip(grad, [image]))
    init_op = variables.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init_op)
      for _ in range(5):
        sess.run([loss, opt_func])
Пример #13
0
def _compute_gradients(tensor, var_list):
  grads = gradients.gradients(tensor, var_list)
  # tf.gradients sometimes returns `None` when it should return 0.
  return [
      grad if grad is not None else array_ops.zeros_like(var)
      for var, grad in zip(var_list, grads)
  ]
Пример #14
0
    def _get_train_ops(self, features, targets):
        """See base class."""
        global_step = contrib_variables.get_global_step()
        assert global_step
        logits = self._logits(features, is_training=True)
        if self._enable_centered_bias:
            centered_bias_step = [self._centered_bias_step(targets, features)]
        else:
            centered_bias_step = []
        with ops.control_dependencies(centered_bias_step):
            loss = self._loss(logits, targets, features)
        logging_ops.scalar_summary("loss", loss)

        linear_vars = self._get_linear_vars()
        dnn_vars = self._get_dnn_vars()
        grads = gradients.gradients(loss, dnn_vars + linear_vars)
        if self._gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, self._gradient_clip_norm)

        dnn_grads = grads[0 : len(dnn_vars)]
        linear_grads = grads[len(dnn_vars) :]

        train_ops = self._get_linear_training_ops(linear_grads, linear_vars) + self._get_dnn_training_ops(
            dnn_grads, dnn_vars
        )

        train_step = control_flow_ops.group(*train_ops, name="combined_training_op")
        with ops.control_dependencies([train_step]):
            with ops.get_default_graph().colocate_with(global_step):
                return state_ops.assign_add(global_step, 1).op, loss
Пример #15
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = tensor.Tensor(np.array([0, 1]))
      ind2 = tensor.Tensor(np.array([2, 3]))
      ind3 = tensor.Tensor(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = tensor.Tensor(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]

    with context.graph_mode(), self.test_session():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
Пример #16
0
  def test_zero_grad_tf_gradients(self):
    if context.executing_eagerly():
      self.skipTest("tf.gradients not supported in eager.")

    x = constant_op.constant([-1., 0., 1.])
    g = self.evaluate(gradients.gradients(math_ops.pow(x, 2), x)[0])
    self.assertAllClose([-2., 0., 2.], g)
Пример #17
0
 def _Gradient(tensors, devices):
   inputs = [array_ops.placeholder(t.dtype, t.shape) for t in tensors]
   reduce_tensors = nccl_reduce(inputs, devices)
   losses = _DeviceTensors(tensors, [t.device for t in reduce_tensors])
   grads = gradients.gradients(
       reduce_tensors, inputs, losses, colocate_gradients_with_ops=True)
   return [g for g in grads if g is not None]
Пример #18
0
  def test_interpolation_gradient(self):
    """Make sure that backprop can run. Correctness of gradients is assumed.

    Here, we create a use a small 'training' set and a more densely-sampled
    set of query points, for which we know the true value in advance. The goal
    is to choose x locations for the training data such that interpolating using
    this training data yields the best reconstruction for the function
    values at the query points. The training data locations are optimized
    iteratively using gradient descent.
    """
    tp = _QuadraticPlusSinProblemND()
    (query_points, query_values, train_points,
     train_values) = tp.get_problem(optimizable=True)

    regularization = 0.001
    for interpolation_order in (1, 2, 3, 4):
      interpolator = interpolate_spline.interpolate_spline(
          train_points, train_values, query_points, interpolation_order,
          regularization)

      loss = math_ops.reduce_mean(math_ops.square(query_values - interpolator))

      optimizer = momentum.MomentumOptimizer(0.001, 0.9)
      grad = gradients.gradients(loss, [train_points])
      grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
      opt_func = optimizer.apply_gradients(zip(grad, [train_points]))
      init_op = variables.global_variables_initializer()

      with self.cached_session() as sess:
        sess.run(init_op)
        for _ in range(100):
          sess.run([loss, opt_func])
Пример #19
0
  def get_gradients(self, loss, params):
    """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
    loss = self._scale_loss(loss)
    grads = gradients.gradients(loss, params)
    if None in grads:
      raise ValueError("An operation has `None` for gradient. "
                       "Please make sure that all of your ops have a "
                       "gradient defined (i.e. are differentiable). "
                       "Common ops without gradient: "
                       "K.argmax, K.round, K.eval.")
    if hasattr(self, "clipnorm"):
      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
    if hasattr(self, "clipvalue"):
      grads = [
          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
          for g in grads
      ]
    return grads
Пример #20
0
  def testPlaysNicelyWithDefunSeparateGradientScope(self):
    with self.test_session(graph=ops.Graph()) as sess:
      with jit.experimental_jit_scope(True):

        @function.Defun(
            compiled=True, noinline=True, separate_compiled_gradients=True)
        def mulop(x1, x2):
          return x1 * x2

        x = constant_op.constant(1.0)
        r = mulop(x, x)
        g_r = gradients.gradients(r, x, name="GA")[0]

      # Ensure the forward function is compiled.
      graph_def = r.graph.as_graph_def()
      func_attrs = graph_def.library.function[0].attr
      self.assertTrue(func_attrs["_XlaCompile"].b)
      self.assertEqual(b"jit_scope_0", func_attrs["_XlaScope"].s)

      # Ensure the gradient (SymbolicGradient) is compiled, with a different
      # _XlaScope from the function itself.
      grad_op = g_r.op.inputs[0].op
      self.assertTrue(grad_op.get_attr("_XlaCompile"))
      self.assertEqual(b"jit_scope_0_grad_GA",
                       grad_op.get_attr("_XlaScope"))

      # Ensure the ops run: grad(x1*x1) = 2*x1
      self.assertAllClose([1.0, 1.0, 2.0], sess.run([x, r, g_r]))
Пример #21
0
  def testColocateGradientsWithAggregation(self):
    with ops.Graph().as_default() as g:
      with g.device("/device:GPU:1"):
        w = constant(1.0, shape=[1, 1])
      x = constant(1.0, shape=[1, 2])
      y = constant(1.0, shape=[1, 2])
      wx = math_ops.matmul(w, x)
      wy = math_ops.matmul(w, y)
      with g.device("/device:GPU:0"):
        z = wx + wy

      gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
      self.assertEqual(gw1.op.colocation_groups(), wx.op.colocation_groups())

      gw2 = gradients.gradients(z, [w], colocate_gradients_with_ops=False)[0]
      self.assertTrue(wx.op.colocation_groups() != gw2.op.colocation_groups())
Пример #22
0
  def test_tensor_array_grad(self):
    inp = constant_op.constant(np.random.rand(3, 4, 2), dtype=dtypes.float32)
    ta = tensor_array_ops.TensorArray(dtypes.float32, size=3)
    ta = ta.unstack(inp)

    def loop_fn(i):

      def body(j, x):
        value = ta.gather([j])
        value = array_ops.gather(array_ops.reshape(value, [4, 2]), i)
        return j + 1, x + value

      _, out = control_flow_ops.while_loop(lambda j, _: j < 3, body,
                                           (0, array_ops.zeros([2])))
      out = math_ops.reduce_prod(out)
      return out, gradient_ops.gradients(out, inp)[0]

    pfor_out, pfor_out_grad = pfor_control_flow_ops.pfor(loop_fn, 4)
    # Note that tf.while_loop does not work in the setup above. So we manually
    # construct the equivalent computation of the above loops here.
    real_out = math_ops.reduce_sum(inp, axis=[0])
    real_out = math_ops.reduce_prod(real_out, axis=[1])
    # Note that gradients of real_out will accumulate the gradients across the
    # output value. Hence we do the same aggregation on pfor_out_grad.
    real_out_grad = gradient_ops.gradients(real_out, inp)[0]
    sum_pfor_out_grad = math_ops.reduce_sum(pfor_out_grad, axis=[0])

    with session.Session() as sess:
      v1, v2, v1_grad, v2_grad = sess.run(
          [pfor_out, real_out, sum_pfor_out_grad, real_out_grad])
      self.assertAllClose(v1, v2)
      self.assertAllClose(v1_grad, v2_grad)
Пример #23
0
 def approximate_hessian(self, grads_and_vars, name=None):
   """
   I haven't tested this yet so I have no idea if it works, but even if it
   does it's probably super slow, and either way nothing else has been modified
   to deal with it.
   """
   
   gv = 0
   var_refs = []
   for g_t, x_tm1 in grads_and_vars:
     var_refs.append(x_tm1.ref())
     if g_t is None:
       continue
     with ops.name_scope('update_' + x_tm1.op.name), ops.device(x_tm1.device):
       if isinstance(g_t, ops.Tensor):
         gv += math_ops.reduce_sum(g_t * random_ops.random_normal(g_t.get_shape()))
       else:
         idxs, idxs_ = array_ops.unique(g_t.indices)
         g_t_ = math_ops.unsorted_segment_sum(g_t.values, idxs_, array_ops.size(idxs))
         gv += math_ops.reduce_sum(g_t_ * random_ops.random_normal(g_t_.get_shape()))
   hesses = gradients.gradients(gv, var_refs,
                                gate_gradients=(gate_gradients == Optimizer.GATE_OP),
                                aggregation_method=aggregation_method,
                                colocate_gradients_with_ops=colocate_gradients_with_ops)
   return zip([g_t for g_t, _ in grads_and_vars], [x_tm1 for _, x_tm1 in grads_and_vars], hesses)
Пример #24
0
 def loop_fn(i):
   image = array_ops.gather(images, i)
   label = array_ops.gather(labels, i)
   logits = array_ops.reshape(model(image, training=training), [-1])
   loss = losses.softmax_cross_entropy(
       logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE)
   return gradient_ops.gradients(loss, variables.trainable_variables())
Пример #25
0
 def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP,
                       aggregation_method=None, colocate_gradients_with_ops=False):
   """"""
   
   # Error checking
   if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                             Optimizer.GATE_GRAPH]:
     raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, " +
       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" % gate_gradients)
   self._assert_valid_dtypes([loss])
   if var_list is None:
     var_list = variables.trainable_variables()
   for x_tm1 in var_list:
     if not isinstance(x_tm1, variables.Variable):
       raise TypeError("Argument is not a tf.Variable: %s" % x_tm1)
   if not var_list:
     raise ValueError("No variables to optimize")
   
   # The actual stuff
   var_refs = [x_tm1.ref() for x_tm1 in var_list]
   grads = gradients.gradients(loss, var_refs,
                               gate_gradients=(gate_gradients == Optimizer.GATE_OP),
                               aggregation_method=aggregation_method,
                               colocate_gradients_with_ops=colocate_gradients_with_ops)
   if gate_gradients == Optimizer.GATE_GRAPH:
     grads = control_flow_ops.tuple(grads)
   grads_and_vars = list(zip(grads, var_list))
   self._assert_valid_dtypes([x_tm1 for g_t, x_tm1 in grads_and_vars if g_t is not None])
   return grads_and_vars
Пример #26
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = constant_op.constant(np.array([0, 1]))
      ind2 = constant_op.constant(np.array([2, 3]))
      ind3 = constant_op.constant(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = constant_op.constant(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]
    grad = self.evaluate(ops.convert_to_tensor(grad))

    if not context.executing_eagerly():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
Пример #27
0
 def testUnconnectedGradientsNoneUnconnectedGradients(self):
   with ops.Graph().as_default():
     x = constant(1.0, shape=[2, 2])
     y = constant(3.0, shape=[3, 1])
     grad = gradients.gradients(
         [y], [x], unconnected_gradients="none")
   self.assertIsNone(grad[0])
Пример #28
0
  def compute_gradients(self, loss, var_list=None,
                        gate_gradients=GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                              Optimizer.GATE_GRAPH]:
      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                       gate_gradients)
    self._assert_valid_dtypes([loss])
    if grad_loss is not None:
      self._assert_valid_dtypes([grad_loss])
    if var_list is None:
      var_list = (
          variables.trainable_variables() +
          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
    processors = [_get_processor(v) for v in var_list]
    if not var_list:
      raise ValueError("No variables to optimize.")
    var_refs = [p.target() for p in processors]
    grads = gradients.gradients(
        loss, var_refs, grad_ys=grad_loss,
        gate_gradients=(gate_gradients == Optimizer.GATE_OP),
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops)
    if gate_gradients == Optimizer.GATE_GRAPH:
      grads = control_flow_ops.tuple(grads)
    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
    return grads_and_vars
  def test_gradients_exist(self):
    """Check that backprop can run.

    The correctness of the gradients is assumed, since the forward propagation
    is tested to be correct and we only use built-in tf ops.
    However, we perform a simple test to make sure that backprop can actually
    run. We treat the flows as a tf.Variable and optimize them to minimize
    the difference between the interpolated image and the input image.
    """

    batch_size, height, width, numchannels = [4, 5, 6, 7]
    image_shape = [batch_size, height, width, numchannels]
    image = random_ops.random_normal(image_shape)
    flow_shape = [batch_size, height, width, 2]
    init_flows = np.float32(np.random.normal(size=flow_shape) * 0.25)
    flows = variables.Variable(init_flows)

    interp = dense_image_warp.dense_image_warp(image, flows)
    loss = math_ops.reduce_mean(math_ops.square(interp - image))

    optimizer = adam.AdamOptimizer(1.0)
    grad = gradients.gradients(loss, [flows])
    opt_func = optimizer.apply_gradients(zip(grad, [flows]))
    init_op = variables.global_variables_initializer()

    with self.test_session() as sess:
      sess.run(init_op)
      for _ in range(10):
        sess.run(opt_func)
Пример #30
0
  def testCustomGradientWithVariables(self):

    @custom_gradient.custom_gradient
    def F(x):
      out = core_layers.dense(x, 3, use_bias=False)

      def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
        self.assertEqual(1, len(variables))
        grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad)
        return grads[0], [array_ops.ones((4, 3))]

      return out, Grad

    with ops.Graph().as_default():
      x = array_ops.ones((2, 4))
      with variable_scope.variable_scope("f", use_resource=True) as vs:
        y = F(x)
        all_vars = vs.global_variables()
        assert len(all_vars) == 1
      grads = gradients.gradients(y, [x, all_vars[0]])
      for g in grads:
        self.assertTrue(g is not None)
      with session.Session() as sess:
        sess.run(variables.global_variables_initializer())
        dw = sess.run(math_ops.reduce_sum(grads[1]))
        self.assertEqual(12., dw)
Пример #31
0
  def compute_gradients(self,
                        loss,
                        var_list,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None,
                        stop_gradients=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize or a callable taking no
        arguments which returns the value to minimize. When eager execution is
        enabled it must be a callable.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKeys.TRAINABLE_VARIABLES`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
        through.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid, or var_list is None.
      RuntimeError: If called with eager execution enabled and `loss` is
        not callable.

    @compatibility(eager)
    When eager execution is enabled, `aggregation_method`, and
    `colocate_gradients_with_ops` are ignored.
    @end_compatibility
    """
    var_list = nest.flatten(var_list)
    # TODO(josh11b): Test that we handle weight decay in a reasonable way.
    if callable(loss):
      with backprop.GradientTape() as tape:
        tape.watch(var_list)
        loss_value = loss()
      grads = tape.gradient(loss_value, var_list, grad_loss)
    else:
      if context.executing_eagerly():
        raise RuntimeError("`loss` passed to Optimizer.compute_gradients "
                           "should be a function when eager execution is "
                           "enabled.")
      self._assert_valid_dtypes([loss])
      if grad_loss is not None:
        self._assert_valid_dtypes([grad_loss])
      grads = gradients.gradients(
          loss,
          var_list,
          grad_ys=grad_loss,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          stop_gradients=stop_gradients)

    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([
        v for g, v in grads_and_vars
        if g is not None and v.dtype != dtypes.resource
    ])

    return grads_and_vars
Пример #32
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Linear classifier model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.
      * joint_weights: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.

  Raises:
    ValueError: If mode is not any of the `ModeKeys`.
  """
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    parent_scope = "linear"
    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(features.values(),
                                          parent_scope,
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=[parent_scope],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
 def loop_fn(i):
     out_i = array_ops.gather(out, i)
     return gradient_ops.gradients(out_i, x)[0]
Пример #34
0
 def testVariableAsGraphElementGradient(self):
     with ops.Graph().as_default() as graph:
         init = constant_op.constant(100.0)
         var = variables.Variable(init)
         gradient = gradients.gradients(graph.as_graph_element(var), var)
         self.assertIsNotNone(gradient)
Пример #35
0
def _linear_classifier_model_fn(features, targets, mode, params):
    """Estimator's linear model_fn."""
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    feature_columns = params["feature_columns"]
    optimizer = params["optimizer"]
    gradient_clip_norm = params.get("gradient_clip_norm", None)
    enable_centered_bias = params.get("enable_centered_bias", True)
    num_ps_replicas = params.get("num_ps_replicas", 0)
    joint_weights = params.get("joint_weights", False)

    if not isinstance(features, dict):
        features = {"": features}

    num_label_columns = 1 if n_classes == 2 else n_classes
    loss_fn = _softmax_cross_entropy_loss
    if n_classes == 2:
        loss_fn = _log_loss_with_two_classes

    feat_values = (features.values()
                   if isinstance(features, dict) else [features])
    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas, min_slice_size=64 << 20)
    with variable_scope.variable_op_scope(feat_values,
                                          "linear",
                                          partitioner=partitioner) as scope:
        if joint_weights:
            logits, _, _ = (layers.joint_weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))
        else:
            logits, _, _ = (layers.weighted_sum_from_feature_columns(
                columns_to_tensors=features,
                feature_columns=feature_columns,
                num_outputs=num_label_columns,
                weight_collections=["linear"],
                scope=scope))

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    loss = None
    if mode != estimator.ModeKeys.INFER:
        loss = loss_fn(logits, targets)
        if weight_column_name:
            weight_tensor = array_ops.reshape(math_ops.to_float(
                features[weight_column_name]),
                                              shape=(-1, ))
            loss = _weighted_loss(loss, weight_tensor)
        else:
            loss = math_ops.reduce_mean(loss, name="loss")
        logging_ops.scalar_summary("loss", loss)

    train_ops = []
    if mode == estimator.ModeKeys.TRAIN:
        global_step = contrib_variables.get_global_step()

        my_vars = ops.get_collection("linear")
        grads = gradients.gradients(loss, my_vars)
        if gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm)
        train_ops.append(
            optimizer.apply_gradients(zip(grads, my_vars),
                                      global_step=global_step))
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

    predictions = {}
    if n_classes == 2:
        predictions[_LOGISTIC] = math_ops.sigmoid(logits)
        logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)

    return predictions, loss, control_flow_ops.group(*train_ops)
Пример #36
0
 def model_fn(inps, init_state):
     state = init_state
     for inp in inps:
         _, state = cell(inp, state)
     output = nn.l2_loss(state.c)
     return gradient_ops.gradients(output, variables.trainable_variables())
Пример #37
0
    def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass,
                                    has_scaling, fused_batch_norm,
                                    freeze_batch_norm_delay):
        """Tests that running folded and unfolded BN returns the same results.

    Args:
      relu: Callable that returns an Operation, a factory method for the Relu*.
      relu_op_name: String, name of the Relu* operation.
      with_bypass: Bool, when true there is an extra connection added from
        inputs to just before Relu*.
      has_scaling: Bool, when true the batch norm has scaling.
      fused_batch_norm: Bool, when true the batch norm is fused.
      freeze_batch_norm_delay: None or the number of steps after which training
      switches to using frozen mean and variance
    """
        random_seed.set_random_seed(1234)
        unfolded_g = ops.Graph()
        with unfolded_g.as_default():
            batch_size, height, width = 5, 128, 128
            inputs = random_ops.random_uniform((batch_size, height, width, 3),
                                               dtype=dtypes.float32,
                                               seed=1234)
            out_depth = 3 if with_bypass else 32
            stride = 1 if with_bypass else 2
            activation_fn = None if with_bypass else relu
            scope = 'test/test2' if with_bypass else 'test'
            node = conv2d(inputs,
                          out_depth, [5, 5],
                          stride=stride,
                          padding='SAME',
                          weights_initializer=self._WeightInit(0.09),
                          activation_fn=activation_fn,
                          normalizer_fn=batch_norm,
                          normalizer_params=self._BatchNormParams(
                              scale=has_scaling, fused=fused_batch_norm),
                          scope=scope)
            if with_bypass:
                node = math_ops.add(inputs, node, name='test/Add')
            relu_node = relu(node, name='test/' + relu_op_name)
        folded_g = self._CopyGraph(unfolded_g)
        with folded_g.as_default():
            fold_batch_norms.FoldBatchNorms(
                folded_g,
                is_training=True,
                freeze_batch_norm_delay=freeze_batch_norm_delay)
        with session.Session(graph=unfolded_g) as sess:
            sess.run(variables.global_variables_initializer())
            grad_node = gradients.gradients(relu_node, inputs)
            results = sess.run([relu_node, grad_node])
            unfolded_forward, unfolded_backward = results[0], results[1]

        with session.Session(graph=folded_g) as sess:
            sess.run(variables.global_variables_initializer())
            relu_node = folded_g.get_tensor_by_name(relu_node.name)
            inputs = folded_g.get_tensor_by_name(inputs.name)
            grad_node = gradients.gradients(relu_node, inputs)
            results = sess.run([relu_node, grad_node])
            folded_forward, folded_backward = results[0], results[1]

        # Check that the folded and unfolded results match.
        self.assertAllClose(unfolded_forward, folded_forward, atol=1e-3)
        self.assertAllClose(unfolded_backward, folded_backward, atol=1e-3)
Пример #38
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize or a callable taking
        no arguments which returns the value to minimize. When eager execution
        is enabled it must be a callable.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKeys.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
      RuntimeError: If called with eager execution enabled and `loss` is
        not callable.

    @compatibility(eager)
    When eager execution is enabled, `gate_gradients`, `aggregation_method`,
    and `colocate_gradients_with_ops` are ignored.
    @end_compatibility
    """
        if callable(loss):
            with backprop.GradientTape() as tape:
                if var_list is not None:
                    tape.watch(var_list)
                loss_value = loss()
            if var_list is None:
                var_list = tape.watched_variables()
            grads = tape.gradient(loss_value, var_list, grad_loss)
            return list(zip(grads, var_list))
        if context.in_eager_mode():
            raise RuntimeError(
                "`loss` passed to Optimizer.compute_gradients should "
                "be a function when eager execution is enabled.")
        if gate_gradients not in [
                Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH
        ]:
            raise ValueError(
                "gate_gradients must be one of: Optimizer.GATE_NONE, "
                "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                gate_gradients)
        self._assert_valid_dtypes([loss])
        if grad_loss is not None:
            self._assert_valid_dtypes([grad_loss])
        if var_list is None:
            var_list = (
                variables.trainable_variables() +
                ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
        else:
            var_list = nest.flatten(var_list)
        # pylint: disable=protected-access
        var_list += ops.get_collection(ops.GraphKeys._STREAMING_MODEL_PORTS)
        # pylint: enable=protected-access
        processors = [_get_processor(v) for v in var_list]
        if not var_list:
            raise ValueError("No variables to optimize.")
        var_refs = [p.target() for p in processors]
        grads = gradients.gradients(
            loss,
            var_refs,
            grad_ys=grad_loss,
            gate_gradients=(gate_gradients == Optimizer.GATE_OP),
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops)
        if gate_gradients == Optimizer.GATE_GRAPH:
            grads = control_flow_ops.tuple(grads)
        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes([
            v for g, v in grads_and_vars
            if g is not None and v.dtype != dtypes.resource
        ])
        return grads_and_vars
 def loop_fn(i):
     y_i = array_ops.gather(y, i)
     grad = gradient_ops.gradients(y_i, x)[0]
     return array_ops.gather(grad, i)
 def loop_fn(i):
     out_i = array_ops.gather(out, i, axis=1)
     return array_ops.reshape(gradient_ops.gradients(out_i, x)[0], [-1])
Пример #41
0
 def _get_fx(self, f, i, x):
     if isinstance(f, list):
         return f[0], f[1]
     fx = f(i, x)
     grad = gradients.gradients(fx, x)[0]
     return fx, grad
Пример #42
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
        if gate_gradients not in [
                Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH
        ]:
            raise ValueError(
                "gate_gradients must be one of: Optimizer.GATE_NONE, "
                "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                gate_gradients)
        self._assert_valid_dtypes([loss])
        if grad_loss is not None:
            self._assert_valid_dtypes([grad_loss])
        if var_list is None:
            var_list = (
                variables.trainable_variables() +
                ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
        processors = [_get_processor(v) for v in var_list]
        if not var_list:
            raise ValueError("No variables to optimize.")
        var_refs = [p.target() for p in processors]
        grads = gradients.gradients(
            loss,
            var_refs,
            grad_ys=grad_loss,
            gate_gradients=(gate_gradients == Optimizer.GATE_OP),
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops)
        if gate_gradients == Optimizer.GATE_GRAPH:
            grads = control_flow_ops.tuple(grads)
        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes(
            [v for g, v in grads_and_vars if g is not None])
        return grads_and_vars
Пример #43
0
def _compute_gradients(tensor, var_list):
  grads = gradients.gradients(tensor, var_list)
  # tf.gradients sometimes returns `None` when it should return 0.
  return [grad if grad is not None else array_ops.zeros_like(var)
          for var, grad in zip(var_list, grads)]
Пример #44
0
 def loop_fn(i):
   a = array_ops.gather(x, i)
   y = nn.bias_add(a, bias, data_format=data_format)
   loss = math_ops.reduce_sum(y * y)
   return y, gradient_ops.gradients(loss, bias)
Пример #45
0
 def model_fn(activation):
   for layer in layers:
     activation = layer(activation)
   activation = projection(activation)
   activation = nn.l2_loss(activation)
   return gradient_ops.gradients(activation, variables.trainable_variables())
Пример #46
0
 def testPreventGradient(self):
     with ops.Graph().as_default():
         inp = constant(1.0, shape=[100, 32], name="in")
         out = array_ops.prevent_gradient(inp)
         with self.assertRaisesRegexp(LookupError, "explicitly disabled"):
             _ = gradients.gradients(out, inp)
Пример #47
0
# g = tf.GradientTape().__enter__()
# model.variables
# model.trainable_weights[0]
# g.watch(model.variables)
# tf.GradientTape().__exit__()

# model.evaluate()
# model.layers[0].layers[6].updates
model.optimizer.get_gradients
# thing.updates

from tensorflow.python.ops import gradients

#alternatively try pytorch and cross validate with TF results
#aaah apparently there's no way, but GradientTape.jacobian does have it, it just doesnt support tf.conds!!! adkjshja
grads = [gradients.gradients(model2.output[i],model2.variables) for i in range(25)]
grads = [gradients.gradients(model(train_images[i:i+1]),model.variables) for i in range(25)]

sess = tf.Session()
sess.run(grads, feed_dict={model2.input:train_images[:25]})

grads = gradients.gradients(tf.expand_dims(tf.tile(model(train_images[:25]),tf.constant([1,25], tf.int32)),0),model.variables, grad_ys=tf.eye(25))

grads

grads[0].shape

tf.app.flags.DEFINE_string('f', '', 'kernel')

model.build()
Пример #48
0
 def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
     self.assertEqual(1, len(variables))
     grads = gradients.gradients(out, [x, variables[0]],
                                 grad_ys=out_grad)
     return grads[0], [array_ops.ones((4, 3))]
Пример #49
0
 def testStopGradient(self):
     with ops.Graph().as_default():
         inp = constant(1.0, shape=[100, 32], name="in")
         out = array_ops.stop_gradient(inp)
         igrad = gradients.gradients(out, inp)[0]
     assert igrad is None
Пример #50
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   output = nn.max_pool(
       x1, ksize, strides=[1, 2, 2, 1], padding="VALID", data_format="NHWC")
   loss = nn.l2_loss(output)
   return output, gradient_ops.gradients(loss, x1)
Пример #51
0
 def loop_fn(i):
   logits_i = array_ops.gather(logits, i)
   labels_i = array_ops.gather(labels, i)
   loss = nn.softmax_cross_entropy_with_logits(
       labels=labels_i, logits=logits_i)
   return loss, gradient_ops.gradients(math_ops.reduce_sum(loss), logits_i)
Пример #52
0
 def testUnconnectedGradientsNoneUnconnectedGradients(self):
     with ops.Graph().as_default():
         x = constant(1.0, shape=[2, 2])
         y = constant(3.0, shape=[3, 1])
         grad = gradients.gradients([y], [x], unconnected_gradients="none")
     self.assertIsNone(grad[0])
Пример #53
0
 def _xlogy_gradients(self, x, y):
     xlogy_xgrad = self.evaluate(
         gradients.gradients(math_ops.xlogy(x, y), x)[0])
     xlogy_ygrad = self.evaluate(
         gradients.gradients(math_ops.xlogy(x, y), y)[0])
     return xlogy_xgrad, xlogy_ygrad
Пример #54
0
def _compute_gradients(tensor, var_list):
    grads = gradients.gradients(tensor, var_list)
    return [
        grad if grad is not None else array_ops.zeros_like(var)
        for var, grad in zip(var_list, grads)
    ]
Пример #55
0
 def loop_fn(i):
     y = array_ops.gather(output, i, axis=1)
     return gradient_ops.gradients(y, inp)[0]
Пример #56
0
 def _xdivy_gradients(self, x, y):
     xdivy_xgrad = self.evaluate(
         gradients.gradients(math_ops.xdivy(x, y), x)[0])
     xdivy_ygrad = self.evaluate(
         gradients.gradients(math_ops.xdivy(x, y), y)[0])
     return xdivy_xgrad, xdivy_ygrad
Пример #57
0
 def testVariableRefGradient(self):
     with ops.Graph().as_default():
         init = constant_op.constant(100.0)
         var = variables.Variable(init)
         gradient = gradients.gradients(var._ref(), var)
         self.assertIsNotNone(gradient)
Пример #58
0
 def loop_fn(i):
     y = array_ops.gather(output, i)
     return gradient_ops.gradients(y, flat_inputs)
Пример #59
0
 def _Gradients(ys, xs, **kwargs):
     dydxs = gradients.gradients(ys, xs, **kwargs)
     dydxs = [
         0. * x if dydx is None else dydx for x, dydx in zip(xs, dydxs)
     ]
     return dydxs
Пример #60
0
 def loop_fn(i):
   x1 = array_ops.gather(x, i)
   y = op(x1)
   loss = math_ops.reduce_sum(y * y)
   return op(x), y, gradient_ops.gradients(loss, x1)