Пример #1
0
  def testMap_Scoped(self):
    with self.cached_session() as sess:

      def double_scoped(x):
        """2x with a dummy 2 that is scoped."""
        with variable_scope.variable_scope("body"):
          # Dummy variable, just to check that scoping works as intended.
          two = variable_scope.get_variable(
              "two", [],
              dtype=dtypes.int32,
              initializer=init_ops.constant_initializer(2))
          return math_ops.multiply(x, two)

      with variable_scope.variable_scope("root") as varscope:
        elems = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
        doubles = np.array([2 * x for x in [1, 2, 3, 4, 5, 6]])

        r = functional_ops.map_fn(double_scoped, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(variables.trainable_variables()), 1)
        self.assertEqual(variables.trainable_variables()[0].name,
                         "root/body/two:0")
        sess.run([variables.global_variables_initializer()])
        self.assertAllEqual(doubles, self.evaluate(r))

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = functional_ops.map_fn(double_scoped, elems)
        self.assertEqual(len(variables.trainable_variables()), 1)
        self.assertAllEqual(doubles, self.evaluate(r))
Пример #2
0
 def testFunctionalDenseTwiceReuse(self):
   inputs = random_ops.random_uniform((5, 3), seed=1)
   core_layers.dense(inputs, 2, name='my_dense')
   vars1 = variables.trainable_variables()
   core_layers.dense(inputs, 2, name='my_dense', reuse=True)
   vars2 = variables.trainable_variables()
   self.assertEqual(vars1, vars2)
Пример #3
0
 def testFunctionalConv2DReuse(self):
   height, width = 7, 9
   images = random_ops.random_uniform((5, height, width, 3), seed=1)
   conv_layers.conv2d(images, 32, [3, 3], name='conv1')
   self.assertEqual(len(variables.trainable_variables()), 2)
   conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True)
   self.assertEqual(len(variables.trainable_variables()), 2)
Пример #4
0
 def testFunctionalConv3DTransposeNoReuse(self):
   depth, height, width = 5, 7, 9
   volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1)
   conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3])
   self.assertEqual(len(variables.trainable_variables()), 2)
   conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3])
   self.assertEqual(len(variables.trainable_variables()), 4)
Пример #5
0
  def testTensorLearningRateAndMomentum(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      with self.cached_session():
        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
        mom_opt = momentum_lib.MomentumOptimizer(
            learning_rate=constant_op.constant(2.0),
            momentum=constant_op.constant(0.9))
        mom_update = mom_opt.apply_gradients(
            zip([grads0, grads1], [var0, var1]))
        variables.global_variables_initializer().run()
        # Check we have slots
        self.assertEqual(["momentum"], mom_opt.get_slot_names())
        slot0 = mom_opt.get_slot(var0, "momentum")
        self.assertEquals(slot0.get_shape(), var0.get_shape())
        self.assertFalse(slot0 in variables.trainable_variables())
        slot1 = mom_opt.get_slot(var1, "momentum")
        self.assertEquals(slot1.get_shape(), var1.get_shape())
        self.assertFalse(slot1 in variables.trainable_variables())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Step 1: the momentum accumulators where 0. So we should see a normal
        # update: v -= grad * learning_rate
        mom_update.run()
        # Check that the momentum accumulators have been updated.
        self.assertAllCloseAccordingToType(
            np.array([0.1, 0.1]), self.evaluate(slot0))
        self.assertAllCloseAccordingToType(
            np.array([0.01, 0.01]), self.evaluate(slot1))
        # Check that the parameters have been updated.
        self.assertAllCloseAccordingToType(
            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
            self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
            self.evaluate(var1))
        # Step 2: the momentum accumulators contain the previous update.
        mom_update.run()
        # Check that the momentum accumulators have been updated.
        self.assertAllCloseAccordingToType(
            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
            self.evaluate(slot0))
        self.assertAllCloseAccordingToType(
            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
            self.evaluate(slot1))
        # Check that the parameters have been updated.
        self.assertAllCloseAccordingToType(
            np.array([
                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
            ]), self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            np.array([
                2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
            ]), self.evaluate(var1))
Пример #6
0
 def testFunctionalConv1DNoReuse(self):
   length = 10
   data = random_ops.random_uniform((5, length, 3), seed=1)
   conv_layers.separable_conv1d(data, 32, 3)
   self.assertEqual(len(variables.trainable_variables()), 3)
   conv_layers.separable_conv1d(data, 32, 3)
   self.assertEqual(len(variables.trainable_variables()), 6)
Пример #7
0
 def testFunctionalConv2DTransposeNoReuse(self):
   height, width = 7, 9
   images = random_ops.random_uniform((5, height, width, 3), seed=1)
   conv_layers.conv2d_transpose(images, 32, [3, 3])
   self.assertEqual(len(variables.trainable_variables()), 2)
   conv_layers.conv2d_transpose(images, 32, [3, 3])
   self.assertEqual(len(variables.trainable_variables()), 4)
Пример #8
0
 def testFunctionalDenseTwice(self):
   inputs = random_ops.random_uniform((5, 3), seed=1)
   core_layers.dense(inputs, 2)
   vars1 = variables.trainable_variables()
   core_layers.dense(inputs, 2)
   vars2 = variables.trainable_variables()
   self.assertEqual(len(vars1), 2)
   self.assertEqual(len(vars2), 4)
Пример #9
0
  def _CheckDecay(self, ema, actual_decay, dim):
    tens = _Repeat(10.0, dim)
    thirties = _Repeat(30.0, dim)
    var0 = variables.Variable(tens, name="v0")
    var1 = variables.Variable(thirties, name="v1")
    variables.initialize_all_variables().run()
    # Note that tensor2 is not a Variable but just a plain Tensor resulting
    # from the sum operation.
    tensor2 = var0 + var1
    update = ema.apply([var0, var1, tensor2])
    avg0 = ema.average(var0)
    avg1 = ema.average(var1)
    avg2 = ema.average(tensor2)

    self.assertFalse(avg0 in variables.trainable_variables())
    self.assertFalse(avg1 in variables.trainable_variables())
    self.assertFalse(avg2 in variables.trainable_variables())
    variables.initialize_all_variables().run()

    self.assertEqual("v0/ExponentialMovingAverage:0", avg0.name)
    self.assertEqual("v1/ExponentialMovingAverage:0", avg1.name)
    self.assertEqual("add/ExponentialMovingAverage:0", avg2.name)

    # Check initial values.
    self.assertAllClose(tens, var0.eval())
    self.assertAllClose(thirties, var1.eval())
    self.assertAllClose(_Repeat(10.0 + 30.0, dim), tensor2.eval())

    # Check that averages are initialized correctly.
    self.assertAllClose(tens, avg0.eval())
    self.assertAllClose(thirties, avg1.eval())
    # Note that averages of Tensor's initialize to zeros_like since no value
    # of the Tensor is known because the Op has not been run (yet).
    self.assertAllClose(_Repeat(0.0, dim), avg2.eval())

    # Update the averages and check.
    update.run()
    dk = actual_decay

    expected = _Repeat(10.0 * dk + 10.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk), dim)
    self.assertAllClose(expected, avg2.eval())

    # Again, update the averages and check.
    update.run()
    expected = _Repeat((10.0 * dk + 10.0 * (1 - dk)) * dk + 10.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg0.eval())
    expected = _Repeat((30.0 * dk + 30.0 * (1 - dk)) * dk + 30.0 * (1 - dk),
                       dim)
    self.assertAllClose(expected, avg1.eval())
    expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk +
                        (10.0 + 30.0) * (1 - dk)),
                       dim)
    self.assertAllClose(expected, avg2.eval())
Пример #10
0
 def testFunctionalConv2DTransposeReuseFromScope(self):
   with variable_scope.variable_scope('scope'):
     height, width = 7, 9
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
     conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
     self.assertEqual(len(variables.trainable_variables()), 2)
   with variable_scope.variable_scope('scope', reuse=True):
     conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1')
     self.assertEqual(len(variables.trainable_variables()), 2)
Пример #11
0
 def testFunctionalConv1DReuseFromScope(self):
   with variable_scope.variable_scope('scope'):
     length = 10
     data = random_ops.random_uniform((5, length, 3), seed=1)
     conv_layers.separable_conv1d(data, 32, 3, name='sepconv1')
     self.assertEqual(len(variables.trainable_variables()), 3)
   with variable_scope.variable_scope('scope', reuse=True):
     conv_layers.separable_conv1d(data, 32, 3, name='sepconv1')
     self.assertEqual(len(variables.trainable_variables()), 3)
Пример #12
0
 def testFunctionalConv3DTransposeReuseFromScope(self):
   with variable_scope.variable_scope('scope'):
     depth, height, width = 5, 7, 9
     volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1)
     conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1')
     self.assertEqual(len(variables.trainable_variables()), 2)
   with variable_scope.variable_scope('scope', reuse=True):
     conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1')
     self.assertEqual(len(variables.trainable_variables()), 2)
Пример #13
0
  def testTimeReversedFusedRNN(self):
    with self.test_session() as sess:
      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890213)
      fw_cell = core_rnn_cell_impl.BasicRNNCell(10)
      bw_cell = core_rnn_cell_impl.BasicRNNCell(10)
      batch_size = 5
      input_size = 20
      timelen = 15
      inputs = constant_op.constant(
          np.random.randn(timelen, batch_size, input_size))

      # test bi-directional rnn
      with variable_scope.variable_scope("basic", initializer=initializer):
        unpacked_inputs = array_ops.unstack(inputs)
        outputs, fw_state, bw_state = core_rnn.static_bidirectional_rnn(
            fw_cell, bw_cell, unpacked_inputs, dtype=dtypes.float64)
        packed_outputs = array_ops.stack(outputs)
        basic_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("basic/")
        ]
        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_fw_state, basic_bw_state = sess.run(
            [packed_outputs, fw_state, bw_state])
        basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(packed_outputs, basic_vars))

      with variable_scope.variable_scope("fused", initializer=initializer):
        fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(
            core_rnn_cell_impl.BasicRNNCell(10))
        fused_bw_cell = fused_rnn_cell.TimeReversedFusedRNN(
            fused_rnn_cell.FusedRNNCellAdaptor(
                core_rnn_cell_impl.BasicRNNCell(10)))
        fw_outputs, fw_state = fused_cell(
            inputs, dtype=dtypes.float64, scope="fw")
        bw_outputs, bw_state = fused_bw_cell(
            inputs, dtype=dtypes.float64, scope="bw")
        outputs = array_ops.concat([fw_outputs, bw_outputs], 2)
        fused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused/")
        ]
        sess.run([variables.global_variables_initializer()])
        fused_outputs, fused_fw_state, fused_bw_state = sess.run(
            [outputs, fw_state, bw_state])
        fused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars))

      self.assertAllClose(basic_outputs, fused_outputs)
      self.assertAllClose(basic_fw_state, fused_fw_state)
      self.assertAllClose(basic_bw_state, fused_bw_state)
      self.assertAllClose(basic_grads, fused_grads)
      for basic, fused in zip(basic_wgrads, fused_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
Пример #14
0
 def _rnn_get_variable(self, getter, *args, **kwargs):
   variable = getter(*args, **kwargs)
   trainable = (variable in tf_variables.trainable_variables() or
                (isinstance(variable, tf_variables.PartitionedVariable) and
                 list(variable)[0] in tf_variables.trainable_variables()))
   if trainable and variable not in self._trainable_weights:
     self._trainable_weights.append(variable)
   elif not trainable and variable not in self._non_trainable_weights:
     self._non_trainable_weights.append(variable)
   return variable
Пример #15
0
 def testFunctionalDenseTwiceReuseFromScope(self):
   with self.test_session():
     with variable_scope.variable_scope('scope'):
       inputs = random_ops.random_uniform((5, 3), seed=1)
       core_layers.dense(inputs, 2, name='my_dense')
       vars1 = variables.trainable_variables()
     with variable_scope.variable_scope('scope', reuse=True):
       core_layers.dense(inputs, 2, name='my_dense')
       vars2 = variables.trainable_variables()
     self.assertEqual(vars1, vars2)
  def compute_gradients(self,
                        loss,
                        var_list=None,
                        gate_gradients=optimizer.Optimizer.GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    Add rho*elastic_difference to loss to control the exploration
    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if not var_list:
      var_list = variables.trainable_variables()

    elastic_difference = [
        math_ops.subtract(v, lv)
        for v, lv in zip(variables.trainable_variables(),
                         [self._local_map[var] for var in var_list])
    ]

    distance_loss = self._rho * math_ops.add_n(
        [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])

    total_loss = loss + distance_loss
    return self._opt.compute_gradients(total_loss, var_list, gate_gradients,
                                       aggregation_method,
                                       colocate_gradients_with_ops, grad_loss)
Пример #17
0
 def _rnn_get_variable(self, getter, *args, **kwargs):
   variable = getter(*args, **kwargs)
   if context.in_graph_mode():
     trainable = (variable in tf_variables.trainable_variables() or
                  (isinstance(variable, tf_variables.PartitionedVariable) and
                   list(variable)[0] in tf_variables.trainable_variables()))
   else:
     trainable = variable._trainable  # pylint: disable=protected-access
   if trainable and variable not in self._trainable_weights:
     self._trainable_weights.append(variable)
   elif not trainable and variable not in self._non_trainable_weights:
     self._non_trainable_weights.append(variable)
   return variable
Пример #18
0
  def testLayerBasic(self):
    num_layers = 4
    num_units = 2
    batch_size = 8
    direction = CUDNN_RNN_UNIDIRECTION
    dir_count = 1

    with vs.variable_scope("main"):
      kernel_initializer = init_ops.constant_initializer(0.)
      bias_initializer = init_ops.constant_initializer(0.)
      inputs = random_ops.random_uniform([
          num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32)

      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Build the layer
      outputs1, _ = lstm(inputs)
      # Reuse the layer
      outputs2, _ = lstm(inputs)

      total_sum1 = math_ops.reduce_sum(outputs1)
      total_sum2 = math_ops.reduce_sum(outputs2)

    with vs.variable_scope("main", reuse=True):
      lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units,
                                 direction=direction,
                                 kernel_initializer=kernel_initializer,
                                 bias_initializer=bias_initializer,
                                 name="awesome_lstm")

      # Reuse the layer
      outputs3, _ = lstm(inputs)
      total_sum3 = math_ops.reduce_sum(outputs3)

    self.assertEqual(1, len(variables.trainable_variables()))
    self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS)))
    self.assertEqual("main/awesome_lstm/opaque_kernel",
                     variables.trainable_variables()[0].op.name)

    with self.test_session(use_gpu=True) as sess:
      sess.run(variables.global_variables_initializer())
      (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run(
          [total_sum1, total_sum2, total_sum3])
      self.assertEqual(0, total_sum1_v)
      self.assertEqual(0, total_sum2_v)
      self.assertEqual(0, total_sum3_v)
Пример #19
0
  def testCollectionsWithScope(self):
    with self.cached_session():
      with ops.name_scope("scope_1"):
        var_x = variables.VariableV1(2.0)
      with ops.name_scope("scope_2"):
        var_y = variables.VariableV1(2.0)

      self.assertEqual([var_x, var_y], variables.global_variables())
      self.assertEqual([var_x], variables.global_variables("scope_1"))
      self.assertEqual([var_y], variables.global_variables("scope_2"))

      self.assertEqual([var_x, var_y], variables.trainable_variables())
      self.assertEqual([var_x], variables.trainable_variables("scope_1"))
      self.assertEqual([var_y], variables.trainable_variables("scope_2"))
Пример #20
0
  def compute_gradients(self, loss, var_list=None,
                        gate_gradients=GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph
        under the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with
        the corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                              Optimizer.GATE_GRAPH]:
      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                       gate_gradients)
    self._assert_valid_dtypes([loss])
    if grad_loss is not None:
      self._assert_valid_dtypes([grad_loss])
    if var_list is None:
      var_list = (
          variables.trainable_variables() +
          ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
    processors = [_get_processor(v) for v in var_list]
    if not var_list:
      raise ValueError("No variables to optimize.")
    var_refs = [p.target() for p in processors]
    grads = gradients.gradients(
        loss, var_refs, grad_ys=grad_loss,
        gate_gradients=(gate_gradients == Optimizer.GATE_OP),
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops)
    if gate_gradients == Optimizer.GATE_GRAPH:
      grads = control_flow_ops.tuple(grads)
    grads_and_vars = list(zip(grads, var_list))
    self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
    return grads_and_vars
  def test_gradients_are_computed_with_mean_reduction(self):
    with self.test_session() as session:
      tower_specs = replicate_model_fn._get_loss_towers(
          self.model_fn,
          mode=model_fn_lib.ModeKeys.EVAL,
          features=[[0.6], [1.6]],
          labels=[[0.6], [0.6]],
          params=None,
          loss_reduction=losses.Reduction.MEAN,
          config=None,
          devices=['/gpu:0', '/gpu:1'],
          local_ps_devices=['/gpu:0'],
          name_scope_pattern='test_tower_{}')
      session.run(variables.global_variables_initializer())

      self.assertEqual(len(tower_specs), 2)

      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
      self.assertEqual(0.5, session.run(tower_specs[0].loss))

      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
      # The input batch for the second tower had a loss that is 1.0
      # bigger: 0.6 vs 1.6.
      self.assertEqual(1.0, session.run(tower_specs[1].loss))

      self.assertEqual(1, len(variables.global_variables()))
      self.assertEqual(1, len(variables.trainable_variables()))

      with variable_scope.variable_scope('', reuse=True):
        c = variable_scope.get_variable('c', dtype=dtypes.float64)
        self.assertEqual(0.25, session.run(c))
Пример #22
0
  def testWithIsRecomputeKwarg(self):

    kwarg_values = []

    @rev_block_lib.recompute_grad
    def layer_with_recompute(inputs, is_recomputing=False):
      kwarg_values.append(is_recomputing)
      out = core_layers.dense(inputs, 2)
      out = normalization_layers.batch_normalization(out, training=True)
      if is_recomputing:
        # Ensure that the updates are not duplicated by popping off the latest
        # 2 additions.
        update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS)
        update_ops.pop()
        update_ops.pop()
      return out

    x = array_ops.ones((2, 4), dtypes.float32)
    with variable_scope.variable_scope("layer1", use_resource=True):
      y = layer_with_recompute(x)
    loss = math_ops.reduce_sum(y)
    tvars = variables.trainable_variables()
    gradients_impl.gradients(loss, [x] + tvars)

    update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
    self.assertEqual(2, len(update_ops))
    self.assertEqual([False, True], kwarg_values)
Пример #23
0
 def loop_fn(i):
   image = array_ops.gather(images, i)
   label = array_ops.gather(labels, i)
   logits = array_ops.reshape(model(image, training=training), [-1])
   loss = losses.softmax_cross_entropy(
       logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE)
   return gradient_ops.gradients(loss, variables.trainable_variables())
Пример #24
0
 def testFunctionalDenseInScope(self):
   with variable_scope.variable_scope('test'):
     inputs = random_ops.random_uniform((5, 3), seed=1)
     core_layers.dense(inputs, 2, name='my_dense')
     var = variables.trainable_variables()[0]
     self.assertEqual(var.name, 'test/my_dense/weights:0')
   with variable_scope.variable_scope('test1') as scope:
     inputs = random_ops.random_uniform((5, 3), seed=1)
     core_layers.dense(inputs, 2, name=scope)
     var = variables.trainable_variables()[2]
     self.assertEqual(var.name, 'test1/weights:0')
   with variable_scope.variable_scope('test2'):
     inputs = random_ops.random_uniform((5, 3), seed=1)
     core_layers.dense(inputs, 2)
     var = variables.trainable_variables()[4]
     self.assertEqual(var.name, 'test2/dense/weights:0')
Пример #25
0
def create_lstm_per_eg_grad(batch_size, state_size, steps):
  inputs = [
      random_ops.random_normal([batch_size, state_size]) for _ in range(steps)
  ]
  cell = rnn_cell.BasicLSTMCell(state_size)
  init_state = cell.zero_state(batch_size, dtypes.float32)

  def model_fn(inps, init_state):
    state = init_state
    for inp in inps:
      _, state = cell(inp, state)
    output = nn.l2_loss(state.c)
    return gradient_ops.gradients(output, variables.trainable_variables())

  def loop_fn(i):
    loop_inputs = [
        array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs
    ]
    loop_init_state = rnn_cell.LSTMStateTuple(
        *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state])
    return model_fn(loop_inputs, loop_init_state)

  pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size)
  loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()]
  while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size)
  return pfor_outputs, while_outputs
    def apply(self, var_list=None):
        # TODO(touts): op_scope
        if var_list is None:
            var_list = variables.trainable_variables()
        for var in var_list:
            if var.dtype.base_dtype not in [dtypes.float32, dtypes.float64]:
                raise TypeError(
                    "The variables must be float or double: %s" % var)
            if var in self._averages:
                raise ValueError(
                    "Moving average already computed for: %s" % var)

            # For variables: to lower communication bandwidth across devices we keep
            # the moving averages on the same device as the variables. For other
            # tensors, we rely on the existing device allocation mechanism.
            if isinstance(var, variables.Variable):
                avg = slot_creator.create_slot(
                    var, var.initialized_value(), self._name,
                    colocate_with_primary=True)
            else:
                avg = slot_creator.create_zeros_slot(
                    var, self._name, colocate_with_primary=(var.op.type == "Variable"))
            self._averages[var] = avg

        with ops.name_scope(self._name) as scope:
            decay = self._num_updates / (self._num_updates + 1)
            updates = []
            updates.append(self._num_updates_op)
            for var in var_list:
                updates.append(assign_moving_average(
                    self._averages[var], var, decay))
            return control_flow_ops.group(*updates, name=scope)
Пример #27
0
def _create_multi_lstm_cell_ops(batch_size, num_units, input_depth,
                                num_layers, max_time, compiled):
  with variable_scope.variable_scope(
      "root",
      initializer=init_ops.random_uniform_initializer(-0.1, 0.1, seed=2)):
    inputs = variable_scope.get_variable(
        "inputs", initializer=random_ops.random_uniform(
            (max_time, batch_size, input_depth), seed=1))
    maybe_xla = lambda c: rnn_cell.CompiledWrapper(c) if compiled else c
    cell = core_rnn_cell_impl.MultiRNNCell(
        [maybe_xla(core_rnn_cell_impl.LSTMCell(num_units))
         for _ in range(num_layers)])
    initial_state = cell.zero_state(
        batch_size=batch_size, dtype=dtypes.float32)
    outputs, final_state = rnn.dynamic_rnn(
        cell=cell, inputs=inputs, initial_state=initial_state,
        time_major=True)
    flat_final_state = nest.flatten(final_state)
    trainable_variables = variables.trainable_variables()
    outputs_grad = gradients_impl.gradients(
        [outputs],
        trainable_variables + [inputs] + nest.flatten(initial_state))
    final_state_grad = gradients_impl.gradients(
        flat_final_state,
        trainable_variables + [inputs] + nest.flatten(initial_state))

    return {"outputs": outputs,
            "final_state": flat_final_state,
            "outputs_grad": outputs_grad,
            "final_state_grad": final_state_grad}
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'
    images, labels = cifar10.inputs(eval_data=eval_data)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = cifar10.inference(images)

    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY)
    # variables_to_restore = variable_averages.variables_to_restore()
    variables_to_restore = variable_averages.variables_to_restore(variables.trainable_variables())
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Пример #29
0
  def testCustomGrad(self):

    def fn(a, b, c):
      return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c)

    def grad_fn(inputs, trainable_variables, unused_outputs,
                unused_grad_outputs):
      grad_inputs = [
          array_ops.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)
      ]
      grad_vars = [
          array_ops.ones_like(t) * (i + len(inputs) + 1.)
          for i, t in enumerate(trainable_variables)
      ]
      return grad_inputs, grad_vars

    a = random_ops.random_uniform([11, 6])
    b = random_ops.random_uniform([11, 7])
    c = random_ops.random_uniform([7, 10])
    w = random_ops.random_uniform([6, 10])
    out = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)(a, b, c)
    loss = math_ops.reduce_mean(out)
    grads = gradients_impl.gradients(
        loss, [a, b, c, variables.trainable_variables()[0]])
    expected_grads = [
        array_ops.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w])
    ]
    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      g_val, eg_val = sess.run([grads, expected_grads])
      for g1, g2 in zip(g_val, eg_val):
        self.assertAllClose(g1, g2)
Пример #30
0
  def testReuse(self):

    def f(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    def g(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    x = random_ops.random_uniform(
        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
    x1, x2 = array_ops.split(x, 2, axis=-1)

    with variable_scope.variable_scope("test"):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_before = len(variables.global_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)

    loss = math_ops.reduce_mean(y1 + y2)
    _ = gradients_impl.gradients(loss,
                                 [x] + variables.trainable_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)
Пример #31
0
 def testFunctionalConv2DInitializerFromScope(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         'scope', initializer=init_ops.ones_initializer()):
       height, width = 7, 9
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1')
       weights = variables.trainable_variables()
       # Check the names of weights in order.
       self.assertTrue('depthwise_kernel' in weights[0].name)
       self.assertTrue('pointwise_kernel' in weights[1].name)
       self.assertTrue('bias' in weights[2].name)
       sess.run(variables.global_variables_initializer())
       weights = sess.run(weights)
       # Check that the kernel weights got initialized to ones (from scope)
       self.assertAllClose(weights[0], np.ones((3, 3, 3, 1)))
       self.assertAllClose(weights[1], np.ones((1, 1, 3, 32)))
       # Check that the bias still got initialized to zeros.
       self.assertAllClose(weights[2], np.zeros((32)))
def debug_grads(sess, feed_dict):
    var_list = (variables.trainable_variables() +
                ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
    print('variables')
    for v in var_list:
        print('  ', v.name)
    # get all gradients
    grads_and_vars = optimizer.compute_gradients(loss)
    # train_op = optimizer.apply_gradients(grads_and_vars)

    zipped_val = sess.run(grads_and_vars, feed_dict=feed_dict)

    for rsl, tensor in zip(zipped_val, grads_and_vars):
        print('-----------------------------------------')
        print(
            'name', tensor[0].name.replace('/tuple/control_dependency_1:0',
                                           '').replace('gradients/', ''))
        print('gradient', rsl[0])
        print('value', rsl[1])
    def test_run_inception_multiple_outputs(self):
        """Test `run_inception` graph construction with multiple outputs."""
        batch_size = 3
        img = array_ops.ones([batch_size, 299, 299, 3])
        logits, pool = _run_with_mock(
            classifier_metrics.run_inception,
            img,
            output_tensor=[
                classifier_metrics.INCEPTION_OUTPUT,
                classifier_metrics.INCEPTION_FINAL_POOL
            ])

        self.assertTrue(isinstance(logits, ops.Tensor))
        self.assertTrue(isinstance(pool, ops.Tensor))
        logits.shape.assert_is_compatible_with([batch_size, 1001])
        pool.shape.assert_is_compatible_with([batch_size, 2048])

        # Check that none of the model variables are trainable.
        self.assertListEqual([], variables.trainable_variables())
Пример #34
0
    def testReuse(self):
        def f(x):
            return core_layers.dense(x, self.CHANNELS // 2)

        def g(x):
            return core_layers.dense(x, self.CHANNELS // 2)

        x = random_ops.random_uniform([self.BATCH_SIZE, self.CHANNELS],
                                      dtype=dtypes.float32)
        x1, x2 = array_ops.split(x, 2, axis=-1)

        with variable_scope.variable_scope("test"):
            y1, y2 = rev_block_lib.rev_block(x1,
                                             x2,
                                             f,
                                             g,
                                             num_layers=self.NUM_LAYERS)

        num_vars_before = len(variables.global_variables())

        with variable_scope.variable_scope("test", reuse=True):
            y1, y2 = rev_block_lib.rev_block(x1,
                                             x2,
                                             f,
                                             g,
                                             num_layers=self.NUM_LAYERS)

        num_vars_after = len(variables.global_variables())
        self.assertEqual(num_vars_before, num_vars_after)

        loss = math_ops.reduce_mean(y1 + y2)
        _ = gradients_impl.gradients(loss,
                                     [x] + variables.trainable_variables())

        with variable_scope.variable_scope("test", reuse=True):
            y1, y2 = rev_block_lib.rev_block(x1,
                                             x2,
                                             f,
                                             g,
                                             num_layers=self.NUM_LAYERS)

        num_vars_after = len(variables.global_variables())
        self.assertEqual(num_vars_before, num_vars_after)
Пример #35
0
    def get_init_op(self, task_index):
        """Returns the op to let all the local variables and local center

    variables equal to the global center variables before the training begins
    """
        init_ops = []
        local_vars = variables.trainable_variables()
        global_center_vars = [self._global_map[var] for var in local_vars]
        grad_vars = [self._grad_map[var] for var in local_vars]
        if not (local_vars and global_center_vars and grad_vars):
            raise ValueError(
                'The lists of local_variables, global_center_variables,'
                'grad_center_variables should not be empty')
        for lvar, gc_var in zip(local_vars, global_center_vars):
            init_ops.append(state_ops.assign(lvar, gc_var))
        for g in grad_vars:
            init_ops.append(state_ops.assign(g, array_ops.zeros_like(g)))
        init_op = control_flow_ops.group(*(init_ops))
        return init_op
Пример #36
0
  def loss(self, data, labels):
    """The loss to minimize while training."""

    if self.is_regression:
      diff = self.training_inference_graph(data) - math_ops.cast(
          labels, dtypes.float32)
      mean_squared_error = math_ops.reduce_mean(diff * diff)
      root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss")
      loss = root_mean_squared_error
    else:
      loss = math_ops.reduce_mean(
          nn_ops.sparse_softmax_cross_entropy_with_logits(
              labels=array_ops.squeeze(math_ops.cast(labels, dtypes.int32)),
              logits=self.training_inference_graph(data)),
          name="loss")
    if self.regularizer:
      loss += layers.apply_regularization(self.regularizer,
                                          variables.trainable_variables())
    return loss
Пример #37
0
  def compute_gradients(self, loss, var_list=None, gate_gradients=GATE_OP):
    """Compute gradients of "loss" for the variables in "var_list".

    This is the first part of minimize().  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a Tensor, a
    IndexedSlices, or None if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list of variables.Variable to update to minimize
        "loss".  Defaults to the list of variables collected in the graph
        under the key GraphKey.TRAINABLE_VARIABLES.
      gate_gradients: How to gate the computation of gradients.  Can be
        GATE_NONE, GATE_OP, or  GATE_GRAPH.

    Returns:
      A list of (gradient, variable) pairs.

    Raises:
      TypeError: If var_list contains anything else than variables.Variable.
      ValueError: If some arguments are invalid.
    """
    if gate_gradients not in [Optimizer.GATE_NONE, Optimizer.GATE_OP,
                              Optimizer.GATE_GRAPH]:
      raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
                       "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                       gate_gradients)
    self._assert_valid_dtypes([loss])
    if var_list is None:
      var_list = variables.trainable_variables()
    for var in var_list:
      if not isinstance(var, variables.Variable):
        raise TypeError("Argument is not a variables.Variable: %s" % var)
    grads = gradients.gradients(
        loss, var_list, gate_gradients=(gate_gradients == Optimizer.GATE_OP))
    if gate_gradients == Optimizer.GATE_GRAPH:
      grads = control_flow_ops.tuple(grads)
    grads_and_vars = zip(grads, var_list)
    self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
    return grads_and_vars
    def testLinear(self):
        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(1.0)):
                x = array_ops.zeros([1, 2])
                l = linear([x], 2, False)
                sess.run([variables_lib.global_variables_initializer()])
                res = sess.run([l], {x.name: np.array([[1., 2.]])})
                self.assertAllClose(res[0], [[3.0, 3.0]])

                # Checks prevent you from accidentally creating a shared function.
                with self.assertRaises(ValueError):
                    l1 = linear([x], 2, False)

                # But you can create a new one in a new scope and share the variables.
                with variable_scope.variable_scope("l1") as new_scope:
                    l1 = linear([x], 2, False)
                with variable_scope.variable_scope(new_scope, reuse=True):
                    linear([l1], 2, False)
                self.assertEqual(len(variables_lib.trainable_variables()), 2)
Пример #39
0
  def test_run_inception_graph_pool_output(self, use_default_graph_def):
    """Test `run_inception` graph construction with pool output."""
    batch_size = 3
    img = array_ops.ones([batch_size, 299, 299, 3])

    if use_default_graph_def:
      pool = _run_with_mock(
          classifier_metrics.run_inception,
          img,
          output_tensor=classifier_metrics.INCEPTION_FINAL_POOL)
    else:
      pool = classifier_metrics.run_inception(
          img, _get_dummy_graphdef(),
          output_tensor=classifier_metrics.INCEPTION_FINAL_POOL)

    self.assertTrue(isinstance(pool, ops.Tensor))
    pool.shape.assert_is_compatible_with([batch_size, 2048])

    # Check that none of the model variables are trainable.
    self.assertListEqual([], variables.trainable_variables())
Пример #40
0
 def testFunctionalConv3DTransposeInitializerFromScope(self):
     with self.test_session() as sess:
         with variable_scope.variable_scope(
                 'scope', initializer=init_ops.ones_initializer()):
             depth, height, width = 5, 7, 9
             volumes = random_ops.random_uniform(
                 (5, depth, height, width, 32), seed=1)
             conv_layers.conv3d_transpose(volumes,
                                          4, [3, 3, 3],
                                          name='deconv1')
             weights = variables.trainable_variables()
             # Check the names of weights in order.
             self.assertTrue('kernel' in weights[0].name)
             self.assertTrue('bias' in weights[1].name)
             sess.run(variables.global_variables_initializer())
             weights = sess.run(weights)
             # Check that the kernel weights got initialized to ones (from scope)
             self.assertAllClose(weights[0], np.ones((3, 3, 3, 4, 32)))
             # Check that the bias still got initialized to zeros.
             self.assertAllClose(weights[1], np.zeros((4)))
Пример #41
0
    def testDoubleCallInUniqueScope(self):
        @rev_block_lib.recompute_grad
        def layer_with_recompute(inputs):
            with variable_scope.variable_scope("inner", use_resource=True):
                return core_layers.dense(inputs, 2)

        with variable_scope.variable_scope("layer", use_resource=True):
            inputs = array_ops.ones((2, 4), dtypes.float32)

            with variable_scope.variable_scope("layer1", use_resource=True):
                out1 = layer_with_recompute(inputs)
            with variable_scope.variable_scope("layer2", use_resource=True):
                out2 = layer_with_recompute(inputs) + out1
            out = math_ops.reduce_sum(out2)

        tvars = variables.trainable_variables()
        assert len(tvars) == 4
        grads = gradients_impl.gradients(out, [inputs] + tvars)
        for grad in grads:
            self.assertIsNotNone(grad)
 def variables_to_restore(self, moving_avg_variables=None):
     """Returns a map of names to `Variables` to restore.
     If a variable has a moving average, use the moving average variable name as
     the restore name; otherwise, use the variable name.
     For example,
     ```python
       variables_to_restore = ema.variables_to_restore()
       saver = tf.train.Saver(variables_to_restore)
     ```
     Below is an example of such mapping:
     ```
       conv/batchnorm/gamma/ExponentialMovingAverage: conv/batchnorm/gamma,
       conv_4/conv2d_params/ExponentialMovingAverage: conv_4/conv2d_params,
       global_step: global_step
     ```
     Args:
       moving_avg_variables: a list of variables that require to use of the
         moving variable name to be restored. If None, it will default to
         variables.moving_average_variables() + variables.trainable_variables()
     Returns:
       A map from restore_names to variables. The restore_name can be the
       moving_average version of the variable name if it exist, or the original
       variable name.
     """
     name_map = {}
     if moving_avg_variables is None:
         # Include trainable variables and variables which have been explicitly
         # added to the moving_average_variables collection.
         moving_avg_variables = variables.trainable_variables()
         moving_avg_variables += variables.moving_average_variables()
     # Remove duplicates
     moving_avg_variables = set(moving_avg_variables)
     # Collect all the variables with moving average,
     for v in moving_avg_variables:
         name_map[self.average_name(v)] = v
     # Make sure we restore variables without moving averages as well.
     moving_avg_variable_names = set([v.name for v in moving_avg_variables])
     for v in list(set(variables.global_variables())):
         if v.name not in moving_avg_variable_names and v.op.name not in name_map:
             name_map[v.op.name] = v
     return name_map
Пример #43
0
    def variables_to_restore(self):
        """Returns a map of names to `Variables` to restore.

    If a variable has a moving average, use the moving average variable name as
    the restore name; otherwise, use the variable name.

    For example,

    ```python
      variables_to_restore = ema.variables_to_restore()
      saver = tf.train.Saver(variables_to_restore)
    ```

    Below is an example of such mapping:

    ```
      conv/batchnorm/gamma/ExponentialMovingAverage: conv/batchnorm/gamma,
      conv_4/conv2d_params/ExponentialMovingAverage: conv_4/conv2d_params,
      global_step: global_step
    ```

    Returns:
      A map from restore_names to variables. The restore_name can be the
      moving_average version of the variable name if it exist, or the original
      variable name.
    """
        name_map = {}
        # Collect all the variables with moving average, including all
        # the trainable variables and variables which have been explicitly
        # added to the collection.
        moving_avg_variables = list(
            set(variables.moving_average_variables() +
                variables.trainable_variables()))
        for v in moving_avg_variables:
            name_map[self.average_name(v)] = v
        # Make sure we restore variables without moving average as well.
        for v in list(
                set(variables.all_variables()) - set(moving_avg_variables)):
            if v.op.name not in name_map:
                name_map[v.op.name] = v
        return name_map
Пример #44
0
    def _model_fn(features, labels, mode, config, params):
        """A Estimator `model_fn` for TPUEstimator."""
        model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, mode,
                                           train_batch_size)

        # TODO(jhseu): Move to EVAL and PREDICT to TPU.
        if not use_tpu or mode != model_fn_lib.ModeKeys.TRAIN:
            return model_fn_wrapper.call_without_tpu(features, labels)

        inputs = _InputsHolder(features=features,
                               labels=labels,
                               num_shards=config.tpu_config.num_shards)

        dequeue_fn, enqueue_fn = _create_infeed_enqueue_ops_and_dequeue_fn(
            inputs, config)

        loss = _train_on_tpu_system(model_fn_wrapper, dequeue_fn)

        # Gets the variables back from TPU nodes. This means the variables updated
        # by TPU will now be *synced* to host memory.
        update_ops = [
            array_ops.check_numerics(v.read_value(),
                                     'Gradient for %s is NaN' % v.name).op
            for v in variables.trainable_variables()
        ]

        hooks = [
            TPUInfeedSessionHook(config, enqueue_fn),
            training.LoggingTensorHook(
                {
                    'loss': array_ops.identity(loss),
                    'step': training.get_global_step()
                },
                every_n_secs=30)
        ]

        return model_fn_lib.EstimatorSpec(
            mode,
            loss=array_ops.identity(loss),
            training_hooks=hooks,
            train_op=control_flow_ops.group(*update_ops))
Пример #45
0
    def get_init_op(self, task_index):
        """Returns the op to let all the local variables and local center
    variables equal to the global center variables before the training begins"""
        def _Add_sync_queues_and_barrier(enqueue_after_list):
            """Adds ops to enqueu on all worker queues"""
            sync_queues = [
                data_flow_ops.FIFOQueue(self._num_worker, [dtypes.bool],
                                        shapes=[[]],
                                        shared_name='%s%s' %
                                        ('variable_init_sync_queue', i))
                for i in range(self._num_worker)
            ]
            queue_ops = []
            # For each other worker, add an entry in a queue
            token = constant_op.constant(False)
            with ops.control_dependencies(enqueue_after_list):
                for i, q in enumerate(sync_queues):
                    if i == task_index:
                        queue_ops.append(control_flow_ops.no_op())
                    else:
                        queue_ops.append(q.enqueue(token))
            queue_ops.append(
                sync_queues[task_index].dequeue_many(len(sync_queues) - 1))
            return control_flow_ops.group(*queue_ops)

        init_ops = []
        local_vars = variables.trainable_variables()
        global_center_vars = [self._global_map[var] for var in local_vars]
        local_center_vars = [self._local_map[var] for var in local_vars]
        if not (local_vars and global_center_vars and local_center_vars):
            raise ValueError(
                'The lists of local_variables, global_center_variables, '
                'local_center_variables should not be empty  ')
        for lvar, gc_var, lc_var in zip(local_vars, global_center_vars,
                                        local_center_vars):
            init_ops.append(state_ops.assign(lvar, gc_var))
            init_ops.append(state_ops.assign(lc_var, gc_var))

        init_op = control_flow_ops.group(*(init_ops))
        sync_queue_op = _Add_sync_queues_and_barrier([init_op])
        return sync_queue_op
Пример #46
0
def create_mnist_per_eg_grad(batch_size, data_format, training):
  images = random_ops.random_uniform([batch_size, 28, 28])
  sparse_labels = np.random.randint(
      low=0, high=10, size=[batch_size]).astype(np.int32)
  labels = np.zeros((batch_size, 10)).astype(np.float32)
  labels[np.arange(batch_size), sparse_labels] = 1.
  model = Mnist(data_format)

  def loop_fn(i):
    image = array_ops.gather(images, i)
    label = array_ops.gather(labels, i)
    logits = array_ops.reshape(model(image, training=training), [-1])
    loss = losses.softmax_cross_entropy(
        logits=logits, onehot_labels=label, reduction=losses.Reduction.NONE)
    return gradient_ops.gradients(loss, variables.trainable_variables())

  pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size)
  while_outputs = control_flow_ops.for_loop(
      loop_fn, [dtypes.float32] * len(variables.trainable_variables()),
      batch_size)
  return pfor_outputs, while_outputs
Пример #47
0
    def testGetterThatCreatesTwoVariablesAndSumsThem(self):
        def custom_getter(getter, name, *args, **kwargs):
            g_0 = getter("%s/0" % name, *args, **kwargs)
            g_1 = getter("%s/1" % name, *args, **kwargs)
            with ops.name_scope("custom_getter"):
                return g_0 + g_1

        with variable_scope.variable_scope("scope",
                                           custom_getter=custom_getter):
            v = variable_scope.get_variable("v", [1, 2, 3])

        self.assertEqual([1, 2, 3], v.get_shape())
        true_vars = variables_lib.trainable_variables()
        self.assertEqual(2, len(true_vars))
        self.assertEqual("scope/v/0:0", true_vars[0].name)
        self.assertEqual("scope/v/1:0", true_vars[1].name)
        self.assertEqual("custom_getter/add:0", v.name)
        with self.test_session() as sess:
            variables_lib.global_variables_initializer().run()
            np_vars, np_v = sess.run([true_vars, v])
            self.assertAllClose(np_v, sum(np_vars))
Пример #48
0
def create_fc_per_eg_grad(batch_size, activation_size, num_layers):
  inp = random_ops.random_normal([batch_size, activation_size])
  layers = [
      tf_layers.Dense(activation_size, activation=nn.relu)
      for _ in range(num_layers)
  ]
  projection = tf_layers.Dense(1)

  def model_fn(activation):
    for layer in layers:
      activation = layer(activation)
    activation = projection(activation)
    activation = nn.l2_loss(activation)
    return gradient_ops.gradients(activation, variables.trainable_variables())

  def loop_fn(i):
    return model_fn(array_ops.expand_dims(array_ops.gather(inp, i), 0))

  pfor_outputs = control_flow_ops.pfor(loop_fn, batch_size)
  loop_fn_dtypes = [x.dtype for x in variables.trainable_variables()]
  while_outputs = control_flow_ops.for_loop(loop_fn, loop_fn_dtypes, batch_size)
  return pfor_outputs, while_outputs
Пример #49
0
def _minimize_towers(tower_specs, optimizer):
    """Aggregate and apply gradients for computed losses."""
    grad_lists = {}
    for tower_spec in tower_specs:
        with ops_lib.device(tower_spec.loss.device):
            variables = variables_lib.trainable_variables()
            gradients = gradients_lib.gradients(tower_spec.loss, variables)

            for var, grad in zip(variables, gradients):
                if grad is not None:
                    grad_lists.setdefault(var, []).append(grad)

    aggregated_grads = []
    with ops_lib.name_scope('gradient_aggregating'):
        for var, grads in six.iteritems(grad_lists):
            grad = _compute_sum_on_device(grads, var.device)
            aggregated_grads.append((grad, var))

    train_op = optimizer.apply_gradients(
        aggregated_grads, global_step=training_util.get_global_step())

    return train_op
Пример #50
0
    def _RunRnn(self,
                numpy_inputs,
                numpy_slen,
                cell_name,
                variable_cache,
                is_dynamic,
                time_major=None,
                is_bidirectional=False):
        with ops.Graph().as_default() as graph:
            tf_inputs = array_ops.placeholder(dtypes.float32,
                                              shape=numpy_inputs.shape)
            tf_slen = array_ops.placeholder(dtypes.int32)
            feeds = {tf_inputs: numpy_inputs, tf_slen: numpy_slen}
            cell = self._CreateCell(cell_name)
            if is_dynamic:
                if is_bidirectional:
                    fn = rnn_lib.bidirectional_dynamic_rnn
                else:
                    fn = rnn_lib.dynamic_rnn
            else:
                if is_bidirectional:
                    fn = functional_rnn.bidirectional_functional_rnn
                else:
                    fn = functional_rnn.functional_rnn

            fetches = self._CreateRnnGraph(fn,
                                           cell,
                                           tf_inputs,
                                           tf_slen,
                                           is_bidirectional,
                                           time_major=time_major)
            with self.session(graph=graph) as sess:
                sess.run(variables.global_variables_initializer())
                # Note that cell.trainable_variables it not always set.
                self._MaybeResetVariables(variable_cache, sess,
                                          variables.trainable_variables())
                val = sess.run(fetches, feed_dict=feeds)
            graph_def = graph.as_graph_def()
            return graph_def, val
Пример #51
0
    def compute_gradients(self,
                          loss,
                          var_list=None,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False):
        """"""

        # Error checking
        if gate_gradients not in [
                Optimizer.GATE_NONE, Optimizer.GATE_OP, Optimizer.GATE_GRAPH
        ]:
            raise ValueError(
                "gate_gradients must be one of: Optimizer.GATE_NONE, " +
                "Optimizer.GATE_OP, Optimizer.GATE_GRAPH. Not %s" %
                gate_gradients)
        self._assert_valid_dtypes([loss])
        if var_list is None:
            var_list = variables.trainable_variables()
        for x_tm1 in var_list:
            if not isinstance(x_tm1, variables.Variable):
                raise TypeError("Argument is not a tf.Variable: %s" % x_tm1)
        if not var_list:
            raise ValueError("No variables to optimize")

        # The actual stuff
        var_refs = [x_tm1.ref() for x_tm1 in var_list]
        grads = gradients.gradients(
            loss,
            var_refs,
            gate_gradients=(gate_gradients == Optimizer.GATE_OP),
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops)
        if gate_gradients == Optimizer.GATE_GRAPH:
            grads = control_flow_ops.tuple(grads)
        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes(
            [x_tm1 for g_t, x_tm1 in grads_and_vars if g_t is not None])
        return grads_and_vars
Пример #52
0
    def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0):
        tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
        tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

        tf_predictions = LogisticClassifier(tf_inputs)
        loss_ops.log_loss(tf_predictions, tf_labels)
        total_loss = loss_ops.get_total_loss()

        optimizer = gradient_descent.GradientDescentOptimizer(
            learning_rate=learning_rate)

        if gradient_multiplier != 1.0:
            variables = variables_lib.trainable_variables()
            gradient_multipliers = {
                var: gradient_multiplier
                for var in variables
            }
        else:
            gradient_multipliers = None

        return learning.create_train_op(
            total_loss, optimizer, gradient_multipliers=gradient_multipliers)
Пример #53
0
    def __init__(self,
                 learning_rate=0.001,
                 decay=0.9,
                 epsilon=1e-10,
                 damping=0.001,
                 cov_ema_decay=0.95,
                 lrdecay=0.96,
                 decay_interval=50,
                 layer_collection=None,
                 estimation_mode='gradients',
                 colocate_gradient_with_ops=True,
                 use_locking=False,
                 name="kSGLDOpt"):
        super(kSGLDOpt, self).__init__(use_locking, name)
        self._lr = learning_rate
        self._decay = decay
        self._epsilon = epsilon
        self._lrdecay = lrdecay
        self._decay_interval = decay_interval

        self._variables = tf_variables.trainable_variables()
        self.damping_fn = lambda: damping
        self.cov_ema_decay = cov_ema_decay
        self.layer_collection = layer_collection
        self.estimation_mode = estimation_mode
        self.colocate_gradient_with_ops = colocate_gradient_with_ops

        # Tensor versions of the constructor arguments, created in _prepare().
        self._lr_t = None
        self._decay_t = None
        self._epsilon_t = None

        self._fisher_est = est.FisherEstimator(self.damping_fn,
                                               self._variables,
                                               self.cov_ema_decay,
                                               self.layer_collection,
                                               self.estimation_mode,
                                               self.colocate_gradient_with_ops)
Пример #54
0
    def testMultivariateNormalDiagNegLogLikelihood(self):
        num_draws = 50
        dims = 3
        with self.cached_session() as sess:
            x_pl = array_ops.placeholder(dtype=dtypes.float32,
                                         shape=[None, dims],
                                         name="x")
            mu_var = variable_scope.get_variable(
                name="mu",
                shape=[dims],
                dtype=dtypes.float32,
                initializer=init_ops.constant_initializer(1.))
            sess.run([variables.global_variables_initializer()])

            mvn = ds.MultivariateNormalDiag(loc=mu_var,
                                            scale_diag=array_ops.ones(
                                                shape=[dims],
                                                dtype=dtypes.float32))

            # Typically you'd use `mvn.log_prob(x_pl)` which is always at least as
            # numerically stable as `tf.log(mvn.prob(x_pl))`. However in this test
            # we're testing a bug specific to `prob` and not `log_prob`;
            # http://stackoverflow.com/q/45109305. (The underlying issue was not
            # related to `Distributions` but that `reduce_prod` didn't correctly
            # handle negative indexes.)
            neg_log_likelihood = -math_ops.reduce_sum(
                math_ops.log(mvn.prob(x_pl)))
            grad_neg_log_likelihood = gradients_impl.gradients(
                neg_log_likelihood, variables.trainable_variables())

            x = np.zeros([num_draws, dims], dtype=np.float32)
            grad_neg_log_likelihood_ = sess.run(grad_neg_log_likelihood,
                                                feed_dict={x_pl: x})
            self.assertEqual(1, len(grad_neg_log_likelihood_))
            self.assertAllClose(grad_neg_log_likelihood_[0],
                                np.tile(num_draws, dims),
                                rtol=1e-6,
                                atol=0.)
Пример #55
0
    def testStochasticVariables(self):
        shape = (10, 20)
        with variable_scope.variable_scope(
                "stochastic_variables",
                custom_getter=sv.make_stochastic_variable_getter(
                    dist_cls=dist.NormalWithSoftplusScale)):
            v = variable_scope.get_variable("sv", shape)

        self.assertTrue(isinstance(v, st.StochasticTensor))
        self.assertTrue(
            isinstance(v.distribution, dist.NormalWithSoftplusScale))

        self.assertEqual(
            {"stochastic_variables/sv_loc", "stochastic_variables/sv_scale"},
            set([v.op.name for v in variables.global_variables()]))
        self.assertEqual(set(variables.trainable_variables()),
                         set(variables.global_variables()))

        v = ops.convert_to_tensor(v)
        self.assertEqual(list(shape), v.get_shape().as_list())
        with self.test_session() as sess:
            sess.run(variables.global_variables_initializer())
            self.assertEqual(shape, sess.run(v).shape)
  def _generate_shared_variables(self):
    """Generate a global variable placed on ps for each trainable variable.

       This creates a new copy of each user-defined trainable variable and places
       them on ps_device. These variables store the averaged parameters.
    """
    # Only the chief should initialize the variables
    if self._is_chief:
      collections = [ops.GraphKeys.GLOBAL_VARIABLES, "global_model"]
    else:
      collections = ["global_model"]

    # Generate new global variables dependent on trainable variables.
    with ops.device(self._device_setter):
      for v in variables.trainable_variables():
        _ = variable_scope.variable(
            name="%s/%s" % (self._name, v.op.name),
            initial_value=v.initialized_value(), trainable=False,
            collections=collections)

      # Place the global step in the ps so that all the workers can see it
      self._global_step = variables.Variable(0, name="%s_global_step" %
          self._name, trainable=False)
Пример #57
0
    def _model_fn(features, labels, mode):
        """model_fn."""
        # TODO(jhseu): Move to EVAL and PREDICT to TPU.
        if mode != model_fn_lib.ModeKeys.TRAIN:
            return model_fn(features, labels, mode)

        dequeue_fn, enqueue_fn = (_create_infeed_enqueue_ops_and_dequeue_fn(
            run_config, features, labels))

        loss = _train_on_tpu_shards(run_config,
                                    train_step=_convert_model_fn_to_train_step(
                                        model_fn, dequeue_fn, mode,
                                        run_config))

        # Gets the variables back from TPU nodes. This means the variables updated
        # by TPU will now be *synced* to host memory.
        update_ops = [
            array_ops.check_numerics(v.read_value(),
                                     'Gradient for %s is NaN' % v.name).op
            for v in variables.trainable_variables()
        ]

        hooks = [
            TpuInfeedSessionHook(run_config, enqueue_fn),
            training.LoggingTensorHook(
                {
                    'loss': array_ops.identity(loss),
                    'step': training.get_global_step()
                },
                every_n_secs=30)
        ]

        return model_fn_lib.EstimatorSpec(
            mode,
            loss=array_ops.identity(loss),
            training_hooks=hooks,
            train_op=control_flow_ops.group(*update_ops))
Пример #58
0
def compute_gradients_with_injected_short_circuiting(loss, var_list=None,
                                                     gate_gradients=optimizer.Optimizer.GATE_OP,
                                                     aggregation_method=None,
                                                     colocate_gradients_with_ops=False,
                                                     should_stop_queue=None,
                                                     global_step=None,
                                                     grad_loss=None):
    assert should_stop_queue is not None
    assert global_step is not None
    if gate_gradients not in [optimizer.Optimizer.GATE_NONE, optimizer.Optimizer.GATE_OP,
                              optimizer.Optimizer.GATE_GRAPH]:
        raise ValueError("gate_gradients must be one of: Optimizer.GATE_NONE, "
                         "Optimizer.GATE_OP, Optimizer.GATE_GRAPH.  Not %s" %
                         gate_gradients)
    assert_valid_dtypes([loss])
    if grad_loss is not None:
        assert_valid_dtypes([grad_loss])
    if var_list is None:
      var_list = variables.trainable_variables()
    for var in var_list:
      if not isinstance(var, variables.Variable):
        raise TypeError("Argument is not a tf.Variable: %s" % var)
    if not var_list:
      raise ValueError("No variables to optimize")
    var_refs = [v._ref() for v in var_list]
    grads = gradients.gradients_short_circuited(
        loss, var_refs, grad_ys=grad_loss,
        gate_gradients=(gate_gradients == optimizer.Optimizer.GATE_OP),
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops,
        should_stop_queue=should_stop_queue,
        global_step=global_step)
    if gate_gradients == optimizer.Optimizer.GATE_GRAPH:
        grads = control_flow_ops.tuple(grads)
    grads_and_vars = list(zip(grads, var_list))
    assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
    return grads_and_vars
Пример #59
0
    def begin(self):
        self._fed_avg_optimizer._generate_shared_variables()
        local_vars = variables.trainable_variables()
        global_vars = ops.get_collection_ref("global_model")
        self._refresh_local_vars_op = self._fed_avg_optimizer._assign_vars(
            local_vars, global_vars)
        local_and_init_vars = list(zip(local_vars, global_vars))

        self._apply_ma_op = self._fed_avg_optimizer._apply_model_average(
            local_and_init_vars, global_vars)

        if self._is_chief:
            self._local_init_op = self._fed_avg_optimizer.chief_init_op
            self._ready_for_local_init_op = (
                self._fed_avg_optimizer.ready_for_local_init_op)
            self._q_runner = self._fed_avg_optimizer.get_chief_queue_runner()
            self._init_tokens_op = self._fed_avg_optimizer.get_init_tokens_op(
                self._num_tokens)
        else:
            self._local_init_op = self._fed_avg_optimizer.local_step_init_op
            self._ready_for_local_init_op = (
                self._fed_avg_optimizer.ready_for_local_init_op)
            self._q_runner = None
            self._init_tokens_op = None
Пример #60
0
    def _build_network(self, scope=None):
        with vs.variable_scope(scope, "ActorNetwork") as s:
            inputs = array_ops.placeholder(
                shape=[None, self._num_units],
                dtype=dtypes.float32, name="inputs")
            kernel = vs.get_variable(
                name="network_kernel",
                shape=[self._num_units, self._num_actions])
            bias = vs.get_variable(
                name="network_bias",
                shape=[self._num_actions])

            if self._batch_norm:
                normalized_inputs = contrib_layers.batch_norm(
                    inputs=inputs,
                    is_training=True,
                    # force the updates in place
                    # but have a speed penalty
                    updates_collections=None)
            else:
                normalized_inputs = inputs

            # for easier fetching
            normalized_inputs = array_ops.identity(
                normalized_inputs, name="normalized_inputs")

            # one layer without linearity
            outputs = math_ops.matmul(normalized_inputs, kernel)
            outputs = nn_ops.bias_add(outputs, bias, name="outputs")
            
            if self._activation is not None:
                outputs = self._activation(outputs, name="outputs_activated")

        parameters = variables.trainable_variables(s.name)
        
        return inputs, outputs, parameters, normalized_inputs