示例#1
0
  def testReuse(self):

    def f(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    def g(x):
      return core_layers.dense(x, self.CHANNELS // 2)

    x = random_ops.random_uniform(
        [self.BATCH_SIZE, self.CHANNELS], dtype=dtypes.float32)
    x1, x2 = array_ops.split(x, 2, axis=-1)

    with variable_scope.variable_scope("test"):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_before = len(variables.global_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)

    loss = math_ops.reduce_mean(y1 + y2)
    _ = gradients_impl.gradients(loss,
                                 [x] + variables.trainable_variables())

    with variable_scope.variable_scope("test", reuse=True):
      y1, y2 = rev_block_lib.rev_block(x1, x2, f, g, num_layers=self.NUM_LAYERS)

    num_vars_after = len(variables.global_variables())
    self.assertEqual(num_vars_before, num_vars_after)
 def testFunctionalReuseFromScope(self):
   inputs = variables.Variable(
       np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
   epsilon = 1e-3
   training = array_ops.placeholder(dtype='bool')
   with variable_scope.variable_scope('scope'):
     _ = normalization_layers.batch_norm(
         inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training)
     self.assertEqual(len(variables.global_variables()), 5)
   with variable_scope.variable_scope('scope', reuse=True):
     _ = normalization_layers.batch_norm(
         inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training)
     self.assertEqual(len(variables.global_variables()), 5)
示例#3
0
  def testCollectionsWithScope(self):
    with self.cached_session():
      with ops.name_scope("scope_1"):
        var_x = variables.VariableV1(2.0)
      with ops.name_scope("scope_2"):
        var_y = variables.VariableV1(2.0)

      self.assertEqual([var_x, var_y], variables.global_variables())
      self.assertEqual([var_x], variables.global_variables("scope_1"))
      self.assertEqual([var_y], variables.global_variables("scope_2"))

      self.assertEqual([var_x, var_y], variables.trainable_variables())
      self.assertEqual([var_x], variables.trainable_variables("scope_1"))
      self.assertEqual([var_y], variables.trainable_variables("scope_2"))
  def testStochasticVariablesWithConstantInitializer(self):
    shape = (10, 20)
    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma,
            dist_kwargs={"validate_args": True},
            param_initializers={
                "mu": np.ones(shape) * 4.,
                "sigma": np.ones(shape) * 2.
            })):
      v = variable_scope.get_variable("sv")

    for var in variables.global_variables():
      if "mu" in var.name:
        mu_var = var
      if "sigma" in var.name:
        sigma_var = var

    v = ops.convert_to_tensor(v)
    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
      self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
      self.assertEqual(shape, sess.run(v).shape)
  def testWaitForSessionLocalInit(self):
    server = server_lib.Server.create_local_server()
    with ops.Graph().as_default() as graph:
      v = variables.Variable(1, name="v")
      w = variables.Variable(
          v,
          trainable=False,
          collections=[ops.GraphKeys.LOCAL_VARIABLES],
          name="w")
      sm = session_manager.SessionManager(
          graph=graph,
          ready_op=variables.report_uninitialized_variables(),
          ready_for_local_init_op=variables.report_uninitialized_variables(
              variables.global_variables()),
          local_init_op=w.initializer)

      # Initialize v but not w
      s = session_lib.Session(server.target, graph=graph)
      s.run(v.initializer)

      sess = sm.wait_for_session(server.target, max_wait_secs=3)
      self.assertEqual(
          True,
          variables.is_variable_initialized(
              sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
      self.assertEqual(
          True,
          variables.is_variable_initialized(
              sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
      self.assertEquals(1, sess.run(v))
      self.assertEquals(1, sess.run(w))
示例#6
0
  def testVariableReuse(self):

    def LinearWithReuse(input_tensor, reuse=None):
      size = input_tensor.shape.dims[1]
      with variable_scope.variable_scope("linear", reuse=reuse):
        w = variable_scope.get_variable(
            "w", shape=[size, size], dtype=input_tensor.dtype)
      return math_ops.matmul(input_tensor, w)

    @function.Defun(dtypes.float32)
    def Foo(inputs):
      inputs = array_ops.reshape(inputs, [32, 100])
      hidden = LinearWithReuse(inputs)
      return LinearWithReuse(hidden, reuse=True)

    input_op = array_ops.placeholder(shape=[32, 100], dtype=dtypes.float32)
    output_op = Foo(input_op)

    global_vars = variables.global_variables()
    self.assertEqual(len(global_vars), 1)
    self.assertEqual(global_vars[0].name, "linear/w:0")

    with session.Session() as sess:
      sess.run(variables.global_variables_initializer())
      output_val = sess.run(
          output_op, feed_dict={input_op: np.random.rand(32, 100)})
      self.assertEqual(output_val.shape, (32, 100))
 def testBasicLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.test_session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         m = array_ops.zeros([1, 8], dtype=dtype)
         cell = rnn_cell_impl.MultiRNNCell(
             [
                 rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
                 for _ in range(2)
             ],
             state_is_tuple=False)
         self.assertEqual(cell.dtype, None)
         g, out_m = cell(x, m)
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(expected_variable_names,
                          [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run([g, out_m], {
             x.name: np.array([[1., 1.]]),
             m.name: 0.1 * np.ones([1, 8])
         })
         self.assertEqual(len(res), 2)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # The numbers in results were not calculated, this is just a
         # smoke test.
         self.assertAllClose(res[0], np.array(
             [[0.240, 0.240]], dtype=np_dtype), 1e-2)
         expected_mem = np.array(
             [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
             dtype=np_dtype)
         self.assertAllClose(res[1], expected_mem, 1e-2)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test BasicLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         m = array_ops.zeros([1, 4], dtype=dtype)
         g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m], {
                 x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
                 m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)
             })
         self.assertEqual(len(res), 2)
 def testPrepareSessionWithReadyForLocalInitOp(self):
   with ops.Graph().as_default():
     v = variables.Variable(1, name="v")
     w = variables.Variable(
         v,
         trainable=False,
         collections=[ops.GraphKeys.LOCAL_VARIABLES],
         name="w")
     with self.test_session():
       self.assertEqual(False, variables.is_variable_initialized(v).eval())
       self.assertEqual(False, variables.is_variable_initialized(w).eval())
     sm2 = session_manager.SessionManager(
         ready_op=variables.report_uninitialized_variables(),
         ready_for_local_init_op=variables.report_uninitialized_variables(
             variables.global_variables()),
         local_init_op=w.initializer)
     sess = sm2.prepare_session("", init_op=v.initializer)
     self.assertEqual(
         True,
         variables.is_variable_initialized(
             sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
     self.assertEqual(
         True,
         variables.is_variable_initialized(
             sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
     self.assertEquals(1, sess.run(v))
     self.assertEquals(1, sess.run(w))
示例#9
0
 def testNotInLocalVariables(self):
   with self.test_session():
     with variable_scope.variable_scope('A'):
       a = variables_lib2.model_variable('a', [5])
       self.assertTrue(a in variables_lib.global_variables())
       self.assertTrue(a in ops.get_collection(ops.GraphKeys.MODEL_VARIABLES))
       self.assertFalse(a in variables_lib.local_variables())
  def test_gradients_are_computed_with_mean_reduction(self):
    with self.test_session() as session:
      tower_specs = replicate_model_fn._get_loss_towers(
          self.model_fn,
          mode=model_fn_lib.ModeKeys.EVAL,
          features=[[0.6], [1.6]],
          labels=[[0.6], [0.6]],
          params=None,
          loss_reduction=losses.Reduction.MEAN,
          config=None,
          devices=['/gpu:0', '/gpu:1'],
          local_ps_devices=['/gpu:0'],
          name_scope_pattern='test_tower_{}')
      session.run(variables.global_variables_initializer())

      self.assertEqual(len(tower_specs), 2)

      self.assertEqual('/device:GPU:0', tower_specs[0].loss.device)
      self.assertEqual('averaged_loss:0', tower_specs[0].loss.name)
      self.assertEqual(0.5, session.run(tower_specs[0].loss))

      self.assertEqual('/device:GPU:1', tower_specs[1].loss.device)
      self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name)
      # The input batch for the second tower had a loss that is 1.0
      # bigger: 0.6 vs 1.6.
      self.assertEqual(1.0, session.run(tower_specs[1].loss))

      self.assertEqual(1, len(variables.global_variables()))
      self.assertEqual(1, len(variables.trainable_variables()))

      with variable_scope.variable_scope('', reuse=True):
        c = variable_scope.get_variable('c', dtype=dtypes.float64)
        self.assertEqual(0.25, session.run(c))
示例#11
0
  def testAverages(self):
    with self.test_session() as session:
      scale = 2.
      grad = array_ops.ones([3, 4]) * scale
      log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements()))
      grads_and_vars = [(grad, grad)]
      grads_and_vars = optimizers_lib.adaptive_clipping_fn(
          decay=0.5)(grads_and_vars)

      var_dict = {}
      for var in variables.global_variables():
        if var.name.startswith("AdaptiveMaxNorm"):
          var_dict[var.name.split(":")[0]] = var
      self.assertEqual(2, len(var_dict))
      moving_mean = var_dict["AdaptiveMaxNorm/mean"]
      moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
      variables.global_variables_initializer().run()
      mean, sq_mean = session.run([moving_mean, moving_sq_mean])
      self.assertEqual([0], mean)
      self.assertEqual([0], sq_mean)
      for i in range(20):
        mean, sq_mean, _ = session.run(
            [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
        if i == 0:
          self.assertLess(mean, 0.9 * log_norm)
          self.assertLess(sq_mean, 0.9 * log_norm**2)

      self.assertAlmostEqual(float(mean), log_norm, places=4)
      self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
示例#12
0
  def testFunctionCallInDifferentVariableScopes(self):

    @function.Defun(dtypes.float32)
    def Foo(inputs):
      var = variable_scope.get_variable(
          "var",
          shape=[10],
          dtype=dtypes.float32,
          initializer=init_ops.ones_initializer())
      return inputs + var

    input_op = array_ops.placeholder(shape=[10], dtype=dtypes.float32)
    with variable_scope.variable_scope("vs1"):
      out1_op = Foo(input_op)

    with variable_scope.variable_scope("vs2"):
      out2_op = Foo(input_op)

    global_vars = variables.global_variables()
    self.assertEqual(len(global_vars), 1)
    self.assertEqual(global_vars[0].name, "vs1/var:0")

    with session.Session() as sess:
      sess.run(variables.global_variables_initializer())
      out1, out2 = sess.run(
          [out1_op, out2_op], feed_dict={input_op: np.linspace(1, 10, 10)})
      self.assertAllEqual(out1, np.linspace(2, 11, 10))
      self.assertAllEqual(out2, np.linspace(2, 11, 10))
示例#13
0
def _get_saver():
  """Lazy init and return saver."""
  saver = _get_first_op_from_collection(ops.GraphKeys.SAVERS)
  if saver is None and variables.global_variables():
    saver = tf_saver.Saver()
    ops.add_to_collection(ops.GraphKeys.SAVERS, saver)
  return saver
  def testStochasticVariablesWithCallableInitializer(self):
    shape = (10, 20)

    def sigma_init(shape, dtype, partition_info):
      _ = partition_info
      return array_ops.ones(shape, dtype=dtype) * 2.

    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusScale,
            dist_kwargs={"validate_args": True},
            param_initializers={
                "loc": np.ones(
                    shape, dtype=np.float32) * 4.,
                "scale": sigma_init
            })):
      v = variable_scope.get_variable("sv", shape)

    for var in variables.global_variables():
      if "loc" in var.name:
        mu_var = var
      if "scale" in var.name:
        sigma_var = var

    v = ops.convert_to_tensor(v)
    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
      self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
      self.assertEqual(shape, sess.run(v).shape)
示例#15
0
  def add_meta_graph(self,
                     tags,
                     signature_def_map=None,
                     assets_collection=None,
                     legacy_init_op=None,
                     clear_devices=False,
                     main_op=None):
    """Adds the current meta graph to the SavedModel.

    Creates a Saver in the current scope and uses the Saver to export the meta
    graph def. Invoking this API requires the `add_meta_graph_and_variables()`
    API to have been invoked before.

    Args:
      tags: The set of tags to annotate the meta graph def with.
      signature_def_map: The map of signature defs to be added to the meta graph
          def.
      assets_collection: Assets collection to be saved with SavedModel. Note
          that this collection should be a subset of the assets saved as part of
          the first meta graph in the SavedModel.
      legacy_init_op: Legacy support for op or group of ops to execute after the
          restore op upon a load.
      clear_devices: Set to true if the device info on the default graph should
          be cleared.
      main_op: Op or group of ops to execute when the graph is loaded.

    Raises:
      AssertionError: If the variables for the SavedModel have not been saved
          yet.
    """
    if not self._has_saved_variables:
      raise AssertionError(
          "Graph state including variables and assets has not been saved yet. "
          "Please invoke `add_meta_graph_and_variables()` first.")

    # Validate the signature def map to ensure all included TensorInfos are
    # properly populated.
    self._validate_signature_def_map(signature_def_map)

    # Save asset files and write them to disk, if any.
    self._save_and_write_assets(assets_collection)

    if main_op is None:
      # Add legacy init op to the SavedModel.
      self._maybe_add_legacy_init_op(legacy_init_op)
    else:
      self._add_main_op(main_op)

    # Initialize a saver to generate a sharded output for all variables in the
    # current scope.
    saver = tf_saver.Saver(
        variables.global_variables(),
        sharded=True,
        write_version=saver_pb2.SaverDef.V2,
        allow_empty=True)

    meta_graph_def = saver.export_meta_graph(clear_devices=clear_devices)

    # Tag the meta graph def and add it to the SavedModel.
    self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)
示例#16
0
  def add_variable(self, name, shape, dtype=None,
                   initializer=None, regularizer=None, trainable=True):
    """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).

    Returns:
      The created variable.
    """
    if dtype is None:
      dtype = self.dtype
    existing_variables = set(tf_variables.global_variables())

    self._set_scope(None)

    with vs.variable_scope(self._scope,
                           reuse=self.built or self._reuse) as scope:
      with ops.name_scope(scope.original_name_scope):
        variable = vs.get_variable(name,
                                   shape=shape,
                                   initializer=initializer,
                                   dtype=dtypes.as_dtype(dtype),
                                   trainable=trainable and self.trainable)
        if variable in existing_variables:
          return variable
        if regularizer:
          # To match the behavior of tf.get_variable(), we only
          # apply regularization if the variable is newly created.
          if isinstance(variable, tf_variables.PartitionedVariable):
            for v in variable:
              with ops.colocate_with(v.op):
                with ops.name_scope(name + '/Regularizer'):
                  regularization = regularizer(v)
              if regularization is not None:
                self.add_loss(regularization)
                _add_elements_to_collection(
                    regularization, ops.GraphKeys.REGULARIZATION_LOSSES)
          else:
            with ops.colocate_with(variable.op):
              with ops.name_scope(name + '/Regularizer'):
                regularization = regularizer(variable)
            if regularization is not None:
              self.add_loss(regularization)
              _add_elements_to_collection(
                  regularization, ops.GraphKeys.REGULARIZATION_LOSSES)
    if trainable:
      self._trainable_weights.append(variable)
    else:
      self._non_trainable_weights.append(variable)
    return variable
示例#17
0
 def DISABLED_testShared(self):
   with self.test_session():
     with specs.ops:
       # pylint: disable=undefined-variable
       f = Shared(Fr(100))
       g = f | f | f | f
     inputs = constant_op.constant(_rand(10, 100))
     _ = g.funcall(inputs)
     self.assertEqual(len(variables.global_variables()), 2)
示例#18
0
    def _add_variable(
        self,
        name,
        shape,
        dtype=None,
        initializer=None,
        regularizer=None,
        trainable=True,
        variable_getter=vs.get_variable,
    ):
        """Adds a new variable to the layer.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
      variable_getter: The getter to use for TensorFlow variables.

    Returns:
      The created variable.
    """
        if dtype is None:
            dtype = self.dtype
        existing_variables = set(tf_variables.global_variables())
        variable = variable_getter(
            name, shape=shape, initializer=initializer, dtype=dtype, trainable=trainable and self.trainable
        )
        # TODO(sguada) fix name = variable.op.name
        if regularizer:
            if not self._reuse and variable not in existing_variables:
                # To match the behavior of tf.get_variable(), we only
                # apply regularization if the variable is newly created.
                if isinstance(variable, tf_variables.PartitionedVariable):
                    for v in variable:
                        with ops.colocate_with(v.op):
                            with ops.name_scope(name + "/Regularizer"):
                                regularization = regularizer(v)
                        if regularization is not None:
                            self._losses.append(regularization)
                            _add_elements_to_collection(regularization, ops.GraphKeys.REGULARIZATION_LOSSES)
                else:
                    with ops.colocate_with(variable.op):
                        with ops.name_scope(name + "/Regularizer"):
                            regularization = regularizer(variable)
                    if regularization is not None:
                        self._losses.append(regularization)
                        _add_elements_to_collection(regularization, ops.GraphKeys.REGULARIZATION_LOSSES)
        if trainable:
            self._trainable_variables.append(variable)
        else:
            self._non_trainable_variables.append(variable)
        return variable
示例#19
0
def _get_variable_for(v):
  """Returns the ResourceVariable responsible for v, or v if not necessary."""
  if v.op.type == "ResourceGather":
    for var in variables.global_variables() + variables.local_variables():
      if (isinstance(var, resource_variable_ops.ResourceVariable)
          and var.handle is v.op.inputs[0]):
        return var
    raise ValueError("Got embedding lookup %s but"
                     " could not locate source variable." % (str(v)))
  return v
 def _get_variable(var_name, part_name, ema):
   """Returns variable of it's moving average by name."""
   matches = [
       v for v in variables.global_variables()
       if ((var_name in v.op.name)
           and (part_name in v.op.name)
           and (('ExponentialMovingAverage' in v.op.name) == ema))
   ]
   self.assertEqual(len(matches), 1)
   return matches[0]
 def testInitWithNoneLocalInitOpError(self):
   # Creating a SessionManager with a None local_init_op but
   # non-None ready_for_local_init_op raises ValueError
   with self.assertRaisesRegexp(ValueError,
                                "If you pass a ready_for_local_init_op "
                                "you must also pass a local_init_op "):
     session_manager.SessionManager(
         ready_for_local_init_op=variables.report_uninitialized_variables(
             variables.global_variables()),
         local_init_op=None)
示例#22
0
def _any_variable_initialized():
  """Check if any variable has been initialized in the Keras model.

  Returns:
    boolean, True if at least one variable has been initialized, else False.
  """
  variables = variables_module.global_variables()
  for v in variables:
    if getattr(v, '_keras_initialized', False):
      return True
  return False
示例#23
0
def _assert_variables(test_case,
                      expected_global=None,
                      expected_model=None,
                      expected_trainable=None):
  test_case.assertItemsEqual([] if expected_global is None else expected_global,
                             [k.name for k in variables.global_variables()])
  test_case.assertItemsEqual([] if expected_model is None else expected_model,
                             [k.name for k in variables.model_variables()])
  test_case.assertItemsEqual([] if expected_trainable is None else
                             expected_trainable,
                             [k.name for k in variables.trainable_variables()])
示例#24
0
 def testBasicLSTMCell(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 2])
       m = array_ops.zeros([1, 8])
       cell = rnn_cell_impl.MultiRNNCell(
           [
               rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
               for _ in range(2)
           ],
           state_is_tuple=False)
       g, out_m = cell(x, m)
       expected_variable_names = [
           "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
           rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
           rnn_cell_impl._BIAS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
           rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
           "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
           rnn_cell_impl._BIAS_VARIABLE_NAME
       ]
       self.assertEqual(
           expected_variable_names, [v.name for v in cell.trainable_variables])
       self.assertFalse(cell.non_trainable_variables)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, out_m],
           {x.name: np.array([[1., 1.]]),
            m.name: 0.1 * np.ones([1, 8])})
       self.assertEqual(len(res), 2)
       variables = variables_lib.global_variables()
       self.assertEqual(expected_variable_names, [v.name for v in variables])
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
       expected_mem = np.array([[
           0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051,
           0.39897051, 0.24024698, 0.24024698
       ]])
       self.assertAllClose(res[1], expected_mem)
     with variable_scope.variable_scope(
         "other", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros(
           [1, 3])  # Test BasicLSTMCell with input_size != num_units.
       m = array_ops.zeros([1, 4])
       g, out_m = rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, out_m],
           {x.name: np.array([[1., 1., 1.]]),
            m.name: 0.1 * np.ones([1, 4])})
       self.assertEqual(len(res), 2)
示例#25
0
def _get_saver():
  """Lazy init and return saver."""
  saver = _get_first_op_from_collection(ops.GraphKeys.SAVERS)
  if saver is not None:
    if saver:
      saver = saver[0]
    else:
      saver = None
  if saver is None and variables.global_variables():
    saver = tf_saver.Saver(write_version=saver_pb2.SaverDef.V1)
    ops.add_to_collection(ops.GraphKeys.SAVERS, saver)
  return saver
  def run(self,
          num_batches=None,
          graph=None,
          session=None,
          start_queues=True,
          initialize_variables=True,
          **kwargs):
    """Builds and runs the columns of the `DataFrame` and yields batches.

    This is a generator that yields a dictionary mapping column names to
    evaluated columns.

    Args:
      num_batches: the maximum number of batches to produce. If none specified,
        the returned value will iterate through infinite batches.
      graph: the `Graph` in which the `DataFrame` should be built.
      session: the `Session` in which to run the columns of the `DataFrame`.
      start_queues: if true, queues will be started before running and halted
        after producting `n` batches.
      initialize_variables: if true, variables will be initialized.
      **kwargs: Additional keyword arguments e.g. `num_epochs`.

    Yields:
      A dictionary, mapping column names to the values resulting from running
      each column for a single batch.
    """
    if graph is None:
      graph = ops.get_default_graph()
    with graph.as_default():
      if session is None:
        session = sess.Session()
      self_built = self.build(**kwargs)
      keys = list(self_built.keys())
      cols = list(self_built.values())
      if initialize_variables:
        if variables.local_variables():
          session.run(variables.local_variables_initializer())
        if variables.global_variables():
          session.run(variables.global_variables_initializer())
      if start_queues:
        coord = coordinator.Coordinator()
        threads = qr.start_queue_runners(sess=session, coord=coord)
      i = 0
      while num_batches is None or i < num_batches:
        i += 1
        try:
          values = session.run(cols)
          yield collections.OrderedDict(zip(keys, values))
        except errors.OutOfRangeError:
          break
      if start_queues:
        coord.request_stop()
        coord.join(threads)
  def testRecoverSessionWithReadyForLocalInitOp(self):
    # Create a checkpoint.
    checkpoint_dir = os.path.join(self.get_temp_dir(),
                                  "recover_session_ready_for_local_init")
    try:
      gfile.DeleteRecursively(checkpoint_dir)
    except errors.OpError:
      pass  # Ignore
    gfile.MakeDirs(checkpoint_dir)

    with ops.Graph().as_default():
      v = variables.Variable(1, name="v")
      sm = session_manager.SessionManager(
          ready_op=variables.report_uninitialized_variables())
      saver = saver_lib.Saver({"v": v})
      sess, initialized = sm.recover_session(
          "", saver=saver, checkpoint_dir=checkpoint_dir)
      self.assertFalse(initialized)
      sess.run(v.initializer)
      self.assertEquals(1, sess.run(v))
      saver.save(sess,
                 os.path.join(checkpoint_dir, "recover_session_checkpoint"))
    # Create a new Graph and SessionManager and recover.
    with ops.Graph().as_default():
      v = variables.Variable(2, name="v")
      w = variables.Variable(
          v,
          trainable=False,
          collections=[ops.GraphKeys.LOCAL_VARIABLES],
          name="w")
      with self.cached_session():
        self.assertEqual(False, variables.is_variable_initialized(v).eval())
        self.assertEqual(False, variables.is_variable_initialized(w).eval())
      sm2 = session_manager.SessionManager(
          ready_op=variables.report_uninitialized_variables(),
          ready_for_local_init_op=variables.report_uninitialized_variables(
              variables.global_variables()),
          local_init_op=w.initializer)
      saver = saver_lib.Saver({"v": v})
      sess, initialized = sm2.recover_session(
          "", saver=saver, checkpoint_dir=checkpoint_dir)
      self.assertTrue(initialized)
      self.assertEqual(
          True,
          variables.is_variable_initialized(
              sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
      self.assertEqual(
          True,
          variables.is_variable_initialized(
              sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
      self.assertEquals(1, sess.run(v))
      self.assertEquals(1, sess.run(w))
  def testStochasticVariables(self):
    shape = (10, 20)
    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusScale)):
      v = variable_scope.get_variable("sv", shape)

    self.assertTrue(isinstance(v, st.StochasticTensor))
    self.assertTrue(isinstance(v.distribution, dist.NormalWithSoftplusScale))

    self.assertEqual(
        {"stochastic_variables/sv_loc", "stochastic_variables/sv_scale"},
        set([v.op.name for v in variables.global_variables()]))
    self.assertEqual(
        set(variables.trainable_variables()), set(variables.global_variables()))

    v = ops.convert_to_tensor(v)
    self.assertEqual(list(shape), v.get_shape().as_list())
    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())
      self.assertEqual(shape, sess.run(v).shape)
示例#29
0
  def _init_saver(self, saver=USE_DEFAULT):
    """Initializes saver.

    Args:
      saver: A `Saver` object. If set to USE_DEFAULT, create one that
        saves all the variables.
    """
    if saver is Supervisor.USE_DEFAULT:
      saver = self._get_first_op_from_collection(ops.GraphKeys.SAVERS)
      if saver is None and variables.global_variables():
        saver = saver_mod.Saver()
        ops.add_to_collection(ops.GraphKeys.SAVERS, saver)
    self._saver = saver
  def testFunctionalNoReuse(self):
    inputs = variables.Variable(
        np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
    epsilon = 1e-3
    training = array_ops.placeholder(dtype='bool')
    outputs = normalization_layers.batch_norm(
        inputs,
        axis=-1,
        momentum=0.9,
        epsilon=epsilon,
        training=training,
        name='bn')

    updates = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
    all_vars = dict([(v.name, v) for v in variables.global_variables()])
    moving_mean = all_vars['bn/moving_mean:0']
    moving_variance = all_vars['bn/moving_variance:0']
    beta = all_vars['bn/beta:0']
    gamma = all_vars['bn/gamma:0']

    with self.test_session() as sess:
      # Test training with placeholder learning phase.
      sess.run(variables.global_variables_initializer())
      for _ in range(100):
        np_output, _, _ = sess.run([outputs] + updates,
                                   feed_dict={training: True})

      # Verify that the statistics are updated during training.
      np_moving_mean, np_moving_var = sess.run([moving_mean, moving_variance])
      np_inputs = sess.run(inputs)
      np_mean = np.mean(np_inputs, axis=(0, 1, 2))
      np_std = np.std(np_inputs, axis=(0, 1, 2))
      np_variance = np.square(np_std)
      self.assertAllClose(np_mean, np_moving_mean, atol=1e-2)
      self.assertAllClose(np_variance, np_moving_var, atol=1e-2)

      # Verify that the axis is normalized during training.
      np_gamma, np_beta = sess.run([gamma, beta])
      np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
      np_beta = np.reshape(np_beta, (1, 1, 1, 6))
      normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
      self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
      self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)

      # Test inference with placeholder learning phase.
      np_output = sess.run(outputs, feed_dict={training: False})

      # Verify that the axis is normalized during inference.
      normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
      self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
      self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
示例#31
0
    def _add_variable(self,
                      name,
                      shape,
                      dtype=None,
                      initializer=None,
                      regularizer=None,
                      trainable=True,
                      variable_getter=vs.get_variable):
        """Adds a new variable to the layer.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self._dtype`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
      variable_getter: The getter to use for TensorFlow variables.

    Returns:
      The created variable.
    """
        if dtype is None:
            dtype = self._dtype
        existing_variables = set(tf_variables.global_variables())
        variable = variable_getter(name,
                                   shape=shape,
                                   initializer=initializer,
                                   dtype=dtype,
                                   trainable=trainable and self.trainable)
        # TODO(sguada) fix name = variable.op.name
        if regularizer:
            if not self._reuse and variable not in existing_variables:
                # To match the behavior of tf.get_variable(), we only
                # apply regularization if the variable is newly created.
                if isinstance(variable, tf_variables.PartitionedVariable):
                    for v in variable:
                        with ops.colocate_with(v.op):
                            with ops.name_scope(name + '/Regularizer'):
                                regularization = regularizer(v)
                        if regularization is not None:
                            self._losses.append(regularization)
                            _add_elements_to_collection(
                                regularization,
                                ops.GraphKeys.REGULARIZATION_LOSSES)
                else:
                    with ops.colocate_with(variable.op):
                        with ops.name_scope(name + '/Regularizer'):
                            regularization = regularizer(variable)
                    if regularization is not None:
                        self._losses.append(regularization)
                        _add_elements_to_collection(
                            regularization,
                            ops.GraphKeys.REGULARIZATION_LOSSES)
        if trainable:
            self._trainable_variables.append(variable)
        else:
            self._non_trainable_variables.append(variable)
        return variable
    def __init__(self, model_export_spec):
        """创建一个模型导出器

        参数:
          - model_export_spec:ModelExportSpec 对象。
        """
        self._export_dir = model_export_spec.export_dir
        saved_model_builder.SavedModelBuilder.__init__(
            self, model_export_spec.export_dir)

        # Create the variables sub-directory, if it does not exist.
        variables_dir = os.path.join(
            compat.as_text(model_export_spec.export_dir),
            compat.as_text(constants.VARIABLES_DIRECTORY))
        if not file_io.file_exists(variables_dir):
            file_io.recursive_create_dir(variables_dir)

        self._variables_path = os.path.join(
            compat.as_text(variables_dir),
            compat.as_text(constants.VARIABLES_FILENAME))

        # Set up the signature for input and output tensorflow specification.
        signature_inputs = {}
        for (alias_name,
             input_tensor) in model_export_spec.input_tensors.items():
            input_tensor_info = meta_graph_pb2.TensorInfo()
            input_tensor_info.name = input_tensor.name
            signature_inputs[alias_name] = input_tensor_info

        signature_outputs = {}
        for (alias_name,
             output_tensor) in model_export_spec.output_tensors.items():
            output_tensor_info = meta_graph_pb2.TensorInfo()
            output_tensor_info.name = output_tensor.name
            signature_outputs[alias_name] = output_tensor_info

        signature_def = signature_def_utils.build_signature_def(
            signature_inputs, signature_outputs,
            caicloud_constants.MODEL_METHOD_NAME)
        signature_def_map = {
            caicloud_constants.MODEL_METHOD_NAME: signature_def
        }

        # Save asset files and write them to disk, if any.
        self._save_and_write_assets(model_export_spec.assets_collection)

        if model_export_spec.main_op is None:
            # Add legacy init op to the SavedModel.
            self._maybe_add_legacy_init_op(model_export_spec.legacy_init_op)
        else:
            self._add_main_op(model_export_spec.main_op)

        # Initialize a saver to generate a sharded output for all variables in the
        # current scope.
        self._saver = tf_saver.Saver(variables.global_variables(),
                                     sharded=False,
                                     write_version=saver_pb2.SaverDef.V2)

        # Export the meta graph def.
        meta_graph_def = self._saver.export_meta_graph(clear_devices=True)

        # Tag the meta graph def and add it to the SavedModel.
        self._tag_and_add_meta_graph(meta_graph_def,
                                     [caicloud_constants.MODEL_TAG],
                                     signature_def_map)
示例#33
0
    def export_fn(estimator,
                  export_dir_base,
                  checkpoint_path=None,
                  eval_result=None):
        with ops.Graph().as_default() as g:
            contrib_variables.create_global_step(g)

            input_ops = serving_from_csv_input(train_config, args, keep_target)
            model_fn_ops = estimator._call_model_fn(
                input_ops.features, None, model_fn_lib.ModeKeys.INFER)
            output_fetch_tensors = make_output_tensors(
                train_config=train_config,
                args=args,
                input_ops=input_ops,
                model_fn_ops=model_fn_ops,
                keep_target=keep_target)

            signature_def_map = {
                'serving_default':
                signature_def_utils.predict_signature_def(
                    input_ops.default_inputs, output_fetch_tensors)
            }

            if not checkpoint_path:
                # Locate the latest checkpoint
                checkpoint_path = saver.latest_checkpoint(estimator._model_dir)
            if not checkpoint_path:
                raise NotFittedError("Couldn't find trained model at %s." %
                                     estimator._model_dir)

            export_dir = saved_model_export_utils.get_timestamped_export_dir(
                export_dir_base)

            with tf_session.Session('') as session:
                # variables.initialize_local_variables()
                variables.local_variables_initializer()
                data_flow_ops.tables_initializer()
                saver_for_restore = saver.Saver(variables.global_variables(),
                                                sharded=True)
                saver_for_restore.restore(session, checkpoint_path)

                init_op = control_flow_ops.group(
                    variables.local_variables_initializer(),
                    data_flow_ops.tables_initializer())

                # Perform the export
                builder = saved_model_builder.SavedModelBuilder(export_dir)
                builder.add_meta_graph_and_variables(
                    session, [tag_constants.SERVING],
                    signature_def_map=signature_def_map,
                    assets_collection=ops.get_collection(
                        ops.GraphKeys.ASSET_FILEPATHS),
                    legacy_init_op=init_op)
                builder.save(False)

            # Add the extra assets
            if assets_extra:
                assets_extra_path = os.path.join(
                    compat.as_bytes(export_dir),
                    compat.as_bytes('assets.extra'))
                for dest_relative, source in assets_extra.items():
                    dest_absolute = os.path.join(
                        compat.as_bytes(assets_extra_path),
                        compat.as_bytes(dest_relative))
                    dest_path = os.path.dirname(dest_absolute)
                    gfile.MakeDirs(dest_path)
                    gfile.Copy(source, dest_absolute)

        # only keep the last 3 models
        saved_model_export_utils.garbage_collect_exports(
            python_portable_string(export_dir_base), exports_to_keep=3)

        # save the last model to the model folder.
        # export_dir_base = A/B/intermediate_models/
        if keep_target:
            final_dir = os.path.join(args.job_dir, 'evaluation_model')
        else:
            final_dir = os.path.join(args.job_dir, 'model')
        if file_io.is_directory(final_dir):
            file_io.delete_recursively(final_dir)
        file_io.recursive_create_dir(final_dir)
        _recursive_copy(export_dir, final_dir)

        return export_dir
示例#34
0
  def add_weight(self,
                 name,
                 shape,
                 dtype=None,
                 initializer=None,
                 regularizer=None,
                 trainable=None,
                 constraint=None,
                 use_resource=None,
                 synchronization=vs.VariableSynchronization.AUTO,
                 aggregation=vs.VariableAggregation.NONE,
                 partitioner=None,
                 **kwargs):
    """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.compat.v1.fixed_size_partitioner` and
        `tf.compat.v1.variable_axis_size_partitioner`.  For more details, see
        the documentation of `tf.compat.v1.get_variable` and the  "Variable
        Partitioners and Sharding" section of the API guide.
      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partitioned variable regularization and
        eager execution is enabled.
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
    for kwarg in kwargs:
      if kwarg != 'experimental_autocast':
        raise TypeError('Unknown keyword argument:', kwarg)
    if self._keras_style:
      return super(Layer, self).add_weight(
          name=name,
          shape=shape,
          dtype=dtype,
          initializer=initializer,
          regularizer=regularizer,
          trainable=trainable and self.trainable,
          constraint=constraint,
          use_resource=use_resource,
          synchronization=vs.VariableSynchronization.AUTO,
          aggregation=vs.VariableAggregation.NONE,
          partitioner=partitioner,
          **kwargs)

    if synchronization == vs.VariableSynchronization.ON_READ:
      if trainable:
        raise ValueError(
            'Synchronization value can be set to '
            'VariableSynchronization.ON_READ only for non-trainable variables. '
            'You have specified trainable=True and '
            'synchronization=VariableSynchronization.ON_READ.')
      else:
        # Set trainable to be false when variable is to be synced on read.
        trainable = False
    elif trainable is None:
      trainable = True

    def _should_add_regularizer(variable, existing_variable_set):
      if base_layer_utils.is_split_variable(variable):
        for var in variable:
          if var in existing_variable_set:
            return False
        return True
      else:
        return variable not in existing_variable_set

    init_graph = None
    if not context.executing_eagerly():
      default_graph = ops.get_default_graph()
      if default_graph.building_function:
        with ops.init_scope():
          # Retrieve the variables from the graph into which variables
          # will be lifted; if initialization ops will be lifted into
          # the eager context, then there is nothing to retrieve, since variable
          # collections are not supported when eager execution is enabled.
          if not context.executing_eagerly():
            init_graph = ops.get_default_graph()
            existing_variables = set(tf_variables.global_variables())
      else:
        # Initialization ops will not be lifted out of the default graph.
        init_graph = default_graph
        existing_variables = set(tf_variables.global_variables())

    if dtype is None:
      dtype = self.dtype or dtypes.float32

    self._set_scope(None)
    reuse = self.built or self._reuse
    prev_len_trainable = len(self._trainable_weights)
    with vs.variable_scope(
        self._scope, reuse=reuse, auxiliary_name_scope=False) as scope:
      self._current_scope = scope
      with backend.name_scope(self._name_scope()):
        use_resource = (use_resource or
                        self._use_resource_variables or
                        scope.use_resource)
        if initializer is None:
          initializer = scope.initializer
        variable = super(Layer, self).add_weight(
            name,
            shape,
            dtype=dtypes.as_dtype(dtype),
            initializer=initializer,
            trainable=trainable and self.trainable,
            constraint=constraint,
            partitioner=partitioner,
            use_resource=use_resource,
            synchronization=synchronization,
            aggregation=aggregation,
            getter=vs.get_variable,
            **kwargs)

        if regularizer:
          if (ops.executing_eagerly_outside_functions()
              or _should_add_regularizer(variable, existing_variables)):
            self._handle_weight_regularization(name, variable, regularizer)

        if init_graph is not None:
          # Handle edge case where a custom getter has overridden `trainable`.
          # There is one known occurrence of this, in unit test
          # testBasicRNNCellNotTrainable in
          # contrib.rnn.python.kernel_tests.core_rnn_cell_test
          with init_graph.as_default():
            trainable_variables = tf_variables.trainable_variables()
          if (trainable and self.trainable and
              variable not in trainable_variables):
            # A custom getter / variable scope overrode the trainable flag.
            extra_trainable_vars = self._trainable_weights[prev_len_trainable:]
            self._trainable_weights = self._trainable_weights[
                :prev_len_trainable]
            self._non_trainable_weights += extra_trainable_vars
    return variable
示例#35
0
  def testDerivativeOfBlockGRUToGRUCellSingleStep(self):
    with self.session(use_gpu=True, graph=ops.Graph()) as sess:
      batch_size = 2
      cell_size = 3
      input_size = 4

      seed = 1994
      initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
      np.random.seed(seed)

      # Inputs
      x = array_ops.zeros([batch_size, input_size])
      h = array_ops.zeros([batch_size, cell_size])

      # Values for the inputs.
      x_value = np.random.rand(batch_size, input_size)
      h_value = np.random.rand(batch_size, cell_size)

      # Gradients from the block GRU cell implementation.
      with vs.variable_scope("block", initializer=initializer):
        output = gru_ops.GRUBlockCell(cell_size)(x, h)
        sess.run([variables.global_variables_initializer()])

        all_variables = variables.global_variables()[0:4]
        [w_ru, b_ru, w_c, b_c] = all_variables

        d_new_h_wrt_x = gradients_impl.gradients([output], x)
        d_new_h_wrt_h = gradients_impl.gradients([output], h)
        d_new_h_wrt_w_ru = gradients_impl.gradients([output], w_ru)
        d_new_h_wrt_w_c = gradients_impl.gradients([output], w_c)
        d_new_h_wrt_b_ru = gradients_impl.gradients([output], b_ru)
        d_new_h_wrt_b_c = gradients_impl.gradients([output], b_c)

        d_block_res = sess.run([
            d_new_h_wrt_x, d_new_h_wrt_h, d_new_h_wrt_w_ru, d_new_h_wrt_w_c,
            d_new_h_wrt_b_ru, d_new_h_wrt_b_c
        ], {x: x_value,
            h: h_value})

      # Gradients from the basic GRU cell implementation.
      with vs.variable_scope("basic", initializer=initializer):
        output = rnn_cell.GRUCell(cell_size)(x, h)
        sess.run([variables.global_variables_initializer()])

        all_variables = variables.global_variables()[4:8]
        [w_ru, b_ru, w_c, b_c] = all_variables

        d_new_h_wrt_x = gradients_impl.gradients([output], x)
        d_new_h_wrt_h = gradients_impl.gradients([output], h)
        d_new_h_wrt_w_ru = gradients_impl.gradients([output], w_ru)
        d_new_h_wrt_w_c = gradients_impl.gradients([output], w_c)
        d_new_h_wrt_b_ru = gradients_impl.gradients([output], b_ru)
        d_new_h_wrt_b_c = gradients_impl.gradients([output], b_c)

        d_basic_res = sess.run([
            d_new_h_wrt_x, d_new_h_wrt_h, d_new_h_wrt_w_ru, d_new_h_wrt_w_c,
            d_new_h_wrt_b_ru, d_new_h_wrt_b_c
        ], {x: x_value,
            h: h_value})

      # Check lengths of derivative results.
      self.assertEqual(len(d_block_res), len(d_basic_res))
      # Check the value of every derivative result.
      for block, basic in zip(d_block_res, d_basic_res):
        self.assertAllClose(block, basic)
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to global variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        global_old = set(n.op.name for n in variables.global_variables())
        apply_updates = self._opt.apply_gradients(grads_and_vars)
        global_new = set(n.op.name for n in variables.global_variables())
        with ops.control_dependencies([apply_updates]):
            local_update = state_ops.assign_add(self._local_step,
                                                1,
                                                name='local_step_update').op

        # this is for place the variables created by optimizer to local collection
        # e.g., AdamOptimizer will create beta as global variables
        def _adjust_optimizer_variable_collection(opt_vars):
            g = ops.get_default_graph()
            idx = 0
            for _ in range(len(
                    g._collections[ops.GraphKeys.GLOBAL_VARIABLES])):
                var = g.get_collection_ref(ops.GraphKeys.GLOBAL_VARIABLES)[idx]
                name = var.op.name
                if name in opt_vars:
                    ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var)
                    del g.get_collection_ref(
                        ops.GraphKeys.GLOBAL_VARIABLES)[idx]
                else:
                    idx += 1

        _adjust_optimizer_variable_collection(global_new - global_old)

        # update global variables.
        def _Update_global_variables():
            local_vars = [v for g, v in grads_and_vars if g is not None]
            global_center_vars = [self._global_map[var] for var in local_vars]
            local_center_vars = [self._local_map[var] for var in local_vars]
            local_center_vars_update = []
            for lvar, var in zip(local_center_vars, global_center_vars):
                local_center_vars_update.append(lvar.assign(var))
            update_ops = []
            differences = []
            with ops.control_dependencies(local_center_vars_update):
                for v, lv in zip(local_vars, local_center_vars):
                    with ops.device(v.device):
                        differences.append(math_ops.subtract(v, lv))
                for lvar, diff in zip(local_vars, differences):
                    with ops.device(lvar.device):
                        update_ops.append(
                            state_ops.assign_sub(
                                lvar,
                                math_ops.multiply(self._moving_rate, diff)))
                for var, diff in zip(global_center_vars, differences):
                    with ops.device(var.device):
                        update_ops.append(
                            state_ops.assign_add(
                                var, math_ops.multiply(self._moving_rate,
                                                       diff)))
                if global_step:
                    with ops.colocate_with(global_step):
                        update_ops.append(state_ops.assign_add(global_step, 1))
            variable_update = control_flow_ops.group(*(update_ops))
            return variable_update

        with ops.control_dependencies([local_update]):
            condition = math_ops.equal(
                math_ops.mod(self._local_step, self._period), 0)
            conditional_update = control_flow_ops.cond(
                condition, _Update_global_variables, control_flow_ops.no_op)
        return conditional_update
示例#37
0
    def add_variable(self,
                     name,
                     shape,
                     dtype=None,
                     initializer=None,
                     regularizer=None,
                     trainable=True):
        """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).

    Returns:
      The created variable.
    """
        if dtype is None:
            dtype = self.dtype
        existing_variables = set(tf_variables.global_variables())

        self._set_scope(None)

        with vs.variable_scope(self._scope, reuse=self.built
                               or self._reuse) as scope:
            with ops.name_scope(scope.original_name_scope):
                variable = vs.get_variable(name,
                                           shape=shape,
                                           initializer=initializer,
                                           dtype=dtypes.as_dtype(dtype),
                                           trainable=trainable
                                           and self.trainable)
                if variable in existing_variables:
                    return variable
                if regularizer:
                    # To match the behavior of tf.get_variable(), we only
                    # apply regularization if the variable is newly created.
                    if isinstance(variable, tf_variables.PartitionedVariable):
                        for v in variable:
                            with ops.colocate_with(v.op):
                                with ops.name_scope(name + '/Regularizer'):
                                    regularization = regularizer(v)
                            if regularization is not None:
                                self.add_loss(regularization)
                                _add_elements_to_collection(
                                    regularization,
                                    ops.GraphKeys.REGULARIZATION_LOSSES)
                    else:
                        with ops.colocate_with(variable.op):
                            with ops.name_scope(name + '/Regularizer'):
                                regularization = regularizer(variable)
                        if regularization is not None:
                            self.add_loss(regularization)
                            _add_elements_to_collection(
                                regularization,
                                ops.GraphKeys.REGULARIZATION_LOSSES)
        if trainable:
            self._trainable_weights.append(variable)
        else:
            self._non_trainable_weights.append(variable)
        return variable
示例#38
0
  def testFunctionalReuse(self):
    inputs1 = variables.Variable(
        np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
    inputs2 = variables.Variable(
        np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
    epsilon = 1e-3
    training = array_ops.placeholder(dtype='bool')
    _ = normalization_layers.batch_norm(
        inputs1,
        axis=-1,
        momentum=0.9,
        epsilon=epsilon,
        training=training,
        name='bn')
    outputs2 = normalization_layers.batch_norm(
        inputs2,
        axis=-1,
        momentum=0.9,
        epsilon=epsilon,
        training=training,
        name='bn',
        reuse=True)

    # Last 2 update ops
    updates = ops.get_collection(ops.GraphKeys.UPDATE_OPS)[-2:]
    all_vars = dict([(v.name, v) for v in variables.global_variables()])
    moving_mean = all_vars['bn/moving_mean:0']
    moving_variance = all_vars['bn/moving_variance:0']
    beta = all_vars['bn/beta:0']
    gamma = all_vars['bn/gamma:0']

    with self.test_session() as sess:
      # Test training with placeholder learning phase.
      sess.run(variables.global_variables_initializer())
      for _ in range(100):
        np_output, _, _ = sess.run([outputs2] + updates,
                                   feed_dict={training: True})

      # Verify that the statistics are updated during training.
      np_moving_mean, np_moving_var = sess.run([moving_mean, moving_variance])
      np_inputs = sess.run(inputs2)
      np_mean = np.mean(np_inputs, axis=(0, 1, 2))
      np_std = np.std(np_inputs, axis=(0, 1, 2))
      np_variance = np.square(np_std)
      self.assertAllClose(np_mean, np_moving_mean, atol=1e-2)
      self.assertAllClose(np_variance, np_moving_var, atol=1e-2)

      # Verify that the axis is normalized during training.
      np_gamma, np_beta = sess.run([gamma, beta])
      np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
      np_beta = np.reshape(np_beta, (1, 1, 1, 6))
      normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
      self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2)
      self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)

      # Test inference with placeholder learning phase.
      np_output = sess.run(outputs2, feed_dict={training: False})

      # Verify that the axis is normalized during inference.
      normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
      self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2)
      self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        with ops.colocate_with(local_anchor):
            self._local_step = variable_scope.variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=global_step.dtype.base_dtype,
                name="local_step")

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        var_list = [v for g, v in grads_and_vars]
        velocity_list = [self._var_2_velocity[v] for v in var_list]
        residual_list = [self._var_2_residual[v] for v in var_list]

        density = 0.01

        with ops.name_scope(None, self._name):
            for velocity, residual, grad, var in zip(velocity_list,
                                                     residual_list,
                                                     grads_and_vars):
                if grad is not None:
                    if self._use_nesterov:
                        update_velocity = self._momentum * (velocity + grad)
                        update_residual = residual + update_velocity + grad
                    else:
                        update_velocity = self._momentum * velocity + grad
                        update_residual = residual + update_velocity
                else:
                    update_velocity = velocity
                    update_residual = residual

                # select threshold according to abs(update_residual)
                top_k_values, top_k_indices = nn_ops.top_k(
                    math_ops.abs(update_residual),
                    math_ops.to_int32(
                        array_ops.shape(update_residual)[-1] * density))
                threshold = top_k_values[-1]
                mask = math_ops.abs(update_residual) > threshold
                mask = math_ops.cast(mask, dtype=dtypes.int32)
                mask_h = math_ops.abs(mask - 1)

                with ops.device(grad.device):
                    dense_grad = mask * update_residual
                    indices = array_ops.where(math_ops.not_equal(
                        dense_grad, 0))
                    values = array_ops.gather_nd(dense_grad, indices)
                    sparse_grad = ops.IndexedSlices(values, indices,
                                                    dense_grad.get_shape())
                    #grad_update = state_ops.assign(grad, mask * update_residual)

                #with ops.control_dependencies([grad_update]), ops.device(var.device):
                #grad_accum = data_flow_ops.ConditionalAccumulator(
                #grad.dtype, shape=var.get_shape(),
                #shared_name=var.name + "/grad_accum")
                #train_ops.append(grad_accum.apply_grad(grad, local_step=self._local_step))
                #aggregated_grad.append(grad_accum.take_grad(self._replicas_to_aggregate))

                with ops.device(var.device):
                    grad_accum = data_flow_ops.SparseConditionalAccumulator(
                        sparse_grad.dtype,
                        shape=(),
                        shared_name=var.name + "/grad_accum")
                    train_ops.append(
                        grad_accum.apply_indexed_slices_grad(
                            sparse_grad, local_step=self._local_step))
                    aggregated_grad.append(
                        grad_accum.take_indexed_slices_grad(
                            self._replicas_to_aggregate))

                    self._accumulator_list.append((grad_accum, var.device))

                with ops.device(residual.device):
                    train_ops.append(
                        state_ops.assign(residual, mask_h * update_residual))
                with ops.device(velocity.device):
                    train_ops.append(
                        state_ops.assign(velocity, mask_h * update_velocity))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradient(aggregated_grads_and_vars,
                                                     global_step)

            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    name="dummy_queue",
                    shared_name="dummy_queue"))

                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])

            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True

            return train_op
示例#40
0
    def test_pipelining(self):
        gradient_accumulation_count = 4
        local_batch_size = 2

        features = np.ones((1, 20), dtype=np.float32) * hvd.rank()
        labels = np.ones(1, dtype=np.int32) * hvd.rank()
        dataset = dataset_ops.Dataset.from_tensor_slices((features, labels))
        dataset = dataset.repeat().batch(local_batch_size, drop_remainder=True)

        loss_vals = []

        strategy = IPUHorovodStrategy()

        with strategy.scope():

            infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset, "infeed")
            outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue("outfeed")

            def stage1(lr, images, labels):
                partial = keras.layers.Dense(32, activation="relu")(images)
                partial = keras.layers.Dense(16, activation="relu")(partial)
                return lr, partial, labels

            def stage2(lr, partial, labels):
                logits = keras.layers.Dense(10)(partial)
                per_example_loss = keras.losses.sparse_categorical_crossentropy(
                    y_true=labels, y_pred=logits, from_logits=True)
                # In a custom training loop, the optimiser does an allreduce *sum*, not
                # average, of the gradients across the distributed workers. Therefore
                # we want to divide the loss here by the *global* batch size, which is
                # done by the `tf.nn.compute_average_loss()` function.
                loss = nn.compute_average_loss(per_example_loss)
                return lr, loss

            def optimizer_function(lr, loss):
                optimizer = GradientDescentOptimizer(lr)
                return pipelining_ops.OptimizerFunctionOutput(optimizer, loss)

            def model(lr):
                pipeline_op = pipelining_ops.pipeline(
                    computational_stages=[stage1, stage2],
                    device_mapping=[0, 0],
                    gradient_accumulation_count=gradient_accumulation_count,
                    inputs=[lr],
                    infeed_queue=infeed_queue,
                    repeat_count=2,
                    outfeed_queue=outfeed_queue,
                    optimizer_function=optimizer_function,
                    name="Pipeline")
                return pipeline_op

            def compiled_model(lr):
                with ipu_scope("/device:IPU:0"):
                    return ipu_compiler.compile(model, inputs=[lr])

            with ops.device("cpu"):
                lr = array_ops.placeholder(np.float32, [])

            train_op = strategy.experimental_run_v2(compiled_model, args=[lr])

            _, per_worker_losses = outfeed_queue.dequeue()

            # Mean across the local `gradient_accumulation_count` batches:
            per_worker_loss = math_ops.reduce_mean(per_worker_losses)

            # Global mean across the distributed workers (since it is already
            # divided by the global batch size above, we do a sum here):
            global_loss = strategy.reduce(ReduceOp.SUM, per_worker_loss)

            config = ipu_utils.create_ipu_config()
            config = ipu_utils.auto_select_ipus(config, num_ipus=1)
            ipu_utils.configure_ipu_system(config)
            ipu_utils.move_variable_initialization_to_cpu()

            with session.Session() as sess:
                sess.run(infeed_queue.initializer)
                sess.run(variables.global_variables_initializer())

                for _ in range(10):
                    sess.run(train_op, {lr: 0.01})
                    global_loss_val = sess.run(global_loss)

                    if loss_vals:
                        # Check that the loss decreases monotonically.
                        self.assertLess(global_loss_val, loss_vals[-1])
                    loss_vals.append(global_loss_val)

                sess.run(infeed_queue.deleter)
                sess.run(outfeed_queue.deleter)

                # Check all variables are equal across workers.
                for variable in variables.global_variables():
                    self.assertAllRanksEqual(variable.eval(), variable.name)
示例#41
0
 def testIndyLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         state_0 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         state_1 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         cell = rnn_cell_impl.MultiRNNCell(
             [contrib_rnn_cell.IndyLSTMCell(2) for _ in range(2)])
         self.assertEqual(cell.dtype, None)
         self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
         self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
         cell.get_config()  # Should not throw an error
         g, (out_state_0, out_state_1) = cell(x, (state_0, state_1))
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_w:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_u:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_w:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_u:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(expected_variable_names,
                          [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_state_0, out_state_1], {
                 x.name: np.array([[1., 1.]]),
                 state_0[0].name: 0.1 * np.ones([1, 2]),
                 state_0[1].name: 0.1 * np.ones([1, 2]),
                 state_1[0].name: 0.1 * np.ones([1, 2]),
                 state_1[1].name: 0.1 * np.ones([1, 2]),
             })
         self.assertEqual(len(res), 3)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # Only check the range of outputs as this is just a smoke test.
         self.assertAllInRange(res[0], -1.0, 1.0)
         self.assertAllInRange(res[1], -1.0, 1.0)
         self.assertAllInRange(res[2], -1.0, 1.0)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test IndyLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         state = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         g, out_state = contrib_rnn_cell.IndyLSTMCell(2)(x, state)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_state], {
                 x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
                 state[0].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
                 state[1].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
             })
         self.assertEqual(len(res), 2)
示例#42
0
    def export_savedmodel(self,
                          export_dir_base,
                          serving_input_receiver_fn,
                          assets_extra=None,
                          as_text=False,
                          checkpoint_path=None):
        """Exports inference graph as a SavedModel into given dir.

    This method builds a new graph by first calling the
    serving_input_receiver_fn to obtain feature `Tensor`s, and then calling
    this `Estimator`'s model_fn to generate the model graph based on those
    features. It restores the given checkpoint (or, lacking that, the most
    recent checkpoint) into this graph in a fresh session.  Finally it creates
    a timestamped export directory below the given export_dir_base, and writes
    a `SavedModel` into it containing a single `MetaGraphDef` saved from this
    session.

    The exported `MetaGraphDef` will provide one `SignatureDef` for each
    element of the export_outputs dict returned from the model_fn, named using
    the same keys.  One of these keys is always
    signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, indicating which
    signature will be served when a serving request does not specify one.
    For each signature, the outputs are provided by the corresponding
    `ExportOutput`s, and the inputs are always the input receivers provided by
    the serving_input_receiver_fn.

    Extra assets may be written into the SavedModel via the extra_assets
    argument.  This should be a dict, where each key gives a destination path
    (including the filename) relative to the assets.extra directory.  The
    corresponding value gives the full path of the source file to be copied.
    For example, the simple case of copying a single file without renaming it
    is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`.

    Args:
      export_dir_base: A string containing a directory in which to create
        timestamped subdirectories containing exported SavedModels.
      serving_input_receiver_fn: A function that takes no argument and
        returns a `ServingInputReceiver`.
      assets_extra: A dict specifying how to populate the assets.extra directory
        within the exported SavedModel, or `None` if no extra assets are needed.
      as_text: whether to write the SavedModel proto in text format.
      checkpoint_path: The checkpoint path to export.  If `None` (the default),
        the most recent checkpoint found within the model directory is chosen.

    Returns:
      The string path to the exported directory.

    Raises:
      ValueError: if no serving_input_receiver_fn is provided, no export_outputs
          are provided, or no checkpoint can be found.
    """
        if serving_input_receiver_fn is None:
            raise ValueError('serving_input_receiver_fn must be defined.')

        with ops.Graph().as_default() as g:
            training.create_global_step(g)
            serving_input_receiver = serving_input_receiver_fn()

            # Call the model_fn and collect the export_outputs.
            estimator_spec = self._call_model_fn(
                features=serving_input_receiver.features,
                labels=None,
                mode=model_fn_lib.ModeKeys.PREDICT)

            # Build the SignatureDefs from receivers and all outputs
            signature_def_map = export.build_all_signature_defs(
                serving_input_receiver.receiver_tensors,
                estimator_spec.export_outputs)

            if not checkpoint_path:
                # Locate the latest checkpoint
                checkpoint_path = saver.latest_checkpoint(self._model_dir)
            if not checkpoint_path:
                raise ValueError("Couldn't find trained model at %s." %
                                 self._model_dir)

            export_dir = export.get_timestamped_export_dir(export_dir_base)

            # TODO(soergel): Consider whether MonitoredSession makes sense here
            with tf_session.Session() as session:

                saver_for_restore = estimator_spec.scaffold.saver or saver.Saver(
                    variables.global_variables(), sharded=True)
                saver_for_restore.restore(session, checkpoint_path)

                # TODO(b/36111876): replace legacy_init_op with main_op mechanism
                # pylint: disable=protected-access
                local_init_op = (
                    estimator_spec.scaffold.local_init_op
                    or monitored_session.Scaffold._default_local_init_op())
                # pylint: enable=protected-access

                # Perform the export
                builder = saved_model_builder.SavedModelBuilder(export_dir)
                builder.add_meta_graph_and_variables(
                    session, [tag_constants.SERVING],
                    signature_def_map=signature_def_map,
                    assets_collection=ops.get_collection(
                        ops.GraphKeys.ASSET_FILEPATHS),
                    legacy_init_op=local_init_op)
                builder.save(as_text)

            # Add the extra assets
            if assets_extra:
                assets_extra_path = os.path.join(
                    compat.as_bytes(export_dir),
                    compat.as_bytes('assets.extra'))
                for dest_relative, source in assets_extra.items():
                    dest_absolute = os.path.join(
                        compat.as_bytes(assets_extra_path),
                        compat.as_bytes(dest_relative))
                    dest_path = os.path.dirname(dest_absolute)
                    gfile.MakeDirs(dest_path)
                    gfile.Copy(source, dest_absolute)

            return export_dir
示例#43
0
    def add_weight(self,
                   name,
                   shape,
                   dtype=None,
                   initializer=None,
                   regularizer=None,
                   trainable=True,
                   constraint=None,
                   use_resource=None,
                   partitioner=None):
        """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.fixed_size_partitioner` and
        `tf.variable_axis_size_partitioner`.  For more details, see the
        documentation of `tf.get_variable` and the  "Variable Partitioners
        and Sharding" section of the API guide.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partioned variable regularization and
        eager execution is enabled.
    """
        def _should_add_regularizer(variable, existing_variable_set):
            if isinstance(variable, tf_variables.PartitionedVariable):
                for var in variable:
                    if var in existing_variable_set:
                        return False
                return True
            else:
                return variable not in existing_variable_set

        init_graph = None
        if not context.executing_eagerly():
            default_graph = ops.get_default_graph()
            if default_graph.building_function:
                with ops.init_scope():
                    # Retrieve the variables from the graph into which variables
                    # will be lifted; if initialization ops will be lifted into
                    # the eager context, then there is nothing to retrieve, since variable
                    # collections are not supported when eager execution is enabled.
                    if not context.executing_eagerly():
                        init_graph = ops.get_default_graph()
                        existing_variables = set(
                            tf_variables.global_variables())
            else:
                # Initialization ops will not be lifted out of the default graph.
                init_graph = default_graph
                existing_variables = set(tf_variables.global_variables())

        if dtype is None:
            dtype = self.dtype or dtypes.float32

        self._set_scope(None)
        reuse = self.built or self._reuse
        prev_len_trainable = len(self._trainable_weights)
        with vs.variable_scope(self._scope,
                               reuse=reuse,
                               auxiliary_name_scope=False) as scope:
            self._current_scope = scope
            with ops.name_scope(self._name_scope()):
                use_resource = (use_resource or self._use_resource_variables
                                or scope.use_resource)
                variable = super(Layer,
                                 self).add_weight(name,
                                                  shape,
                                                  dtype=dtypes.as_dtype(dtype),
                                                  initializer=initializer
                                                  or scope.initializer,
                                                  trainable=trainable,
                                                  constraint=constraint,
                                                  partitioner=partitioner,
                                                  use_resource=use_resource,
                                                  getter=vs.get_variable)

                if regularizer:
                    if context.executing_eagerly() or _should_add_regularizer(
                            variable, existing_variables):
                        self._handle_weight_regularization(
                            name, variable, regularizer)

                if init_graph is not None:
                    # Handle edge case where a custom getter has overridden `trainable`.
                    # There is one known occurrence of this, in unit test
                    # testBasicRNNCellNotTrainable in
                    # contrib.rnn.python.kernel_tests.core_rnn_cell_test
                    with init_graph.as_default():
                        trainable_variables = tf_variables.trainable_variables(
                        )
                    if (trainable and self.trainable
                            and variable not in trainable_variables):
                        # A custom getter / variable scope overrode the trainable flag.
                        extra_trainable_vars = self._trainable_weights[
                            prev_len_trainable:]
                        self._trainable_weights = self._trainable_weights[:
                                                                          prev_len_trainable]
                        self._non_trainable_weights += extra_trainable_vars
        return variable
示例#44
0
 def default_ready_for_local_init_op():
     return variables.report_uninitialized_variables(
         variables.global_variables())
    def test1Workers2Period(self):
        num_workers = 1
        communication_period = 4
        num_ps = 1
        _, workers, _ = create_local_cluster(num_workers=num_workers,
                                             num_ps=num_ps)

        sessions, graphs, train_ops = _get_workers(num_workers,
                                                   communication_period,
                                                   workers)

        var_0 = graphs[0].get_tensor_by_name("v0:0")
        var_1 = graphs[0].get_tensor_by_name("v1:0")
        global_step = training_util.get_global_step(graphs[0])
        var_0_g = graphs[0].get_tensor_by_name(
            agn_optimizer.GLOBAL_VARIABLE_NAME + "/v0:0")
        var_1_g = graphs[0].get_tensor_by_name(
            agn_optimizer.GLOBAL_VARIABLE_NAME + "/v1:0")

        # verify adam/beta variables not in global collection
        with graphs[0].as_default():
            for ele in variables.global_variables():
                self.assertTrue(ele.op.name.find("beta") < 0)
                if ele.op.name.find("global_center_variable") < 0:
                    self.assertTrue(ele.op.name.find("Adam") < 0)

        # Verify the initialized value.
        self.assertAllEqual(0.0, sessions[0].run(var_0))
        self.assertAllEqual(0.5, sessions[0].run(var_1))
        self.assertAllEqual(0.0, sessions[0].run(var_0_g))
        self.assertAllEqual(0.5, sessions[0].run(var_1_g))
        self.assertAllEqual(0, sessions[0].run(global_step))
        # step 0
        sessions[0].run(train_ops[0])
        self.assertNear(0.1, sessions[0].run(var_0), 1e-6)
        self.assertNear(0.6, sessions[0].run(var_1), 1e-6)
        self.assertAllEqual(0.0, sessions[0].run(var_0_g))
        self.assertAllEqual(0.5, sessions[0].run(var_1_g))
        self.assertAllEqual(0, sessions[0].run(global_step))

        # 2 & 3
        sessions[0].run(train_ops[0])
        sessions[0].run(train_ops[0])
        self.assertNear(0.3, sessions[0].run(var_0), 1e-6)
        self.assertNear(0.8, sessions[0].run(var_1), 1e-6)

        # 4
        sessions[0].run(train_ops[0])
        # pull
        self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g))
        self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g))
        self.assertNear(0.1, sessions[0].run(var_0), 1e-6)
        self.assertNear(0.6, sessions[0].run(var_1), 1e-6)

        sessions[0].run(train_ops[0])
        sessions[0].run(train_ops[0])
        sessions[0].run(train_ops[0])
        sessions[0].run(train_ops[0])
        self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g))
        self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g))
        self.assertNear(0.2, sessions[0].run(var_0), 1e-6)
        self.assertNear(0.7, sessions[0].run(var_1), 1e-6)
示例#46
0
    def _testRevBlock(self,
                      x=None,
                      f=None,
                      g=None,
                      f_side_input=None,
                      g_side_input=None):
        random_seed.set_random_seed(1234)

        if f is None:

            def f(x):  # pylint: disable=function-redefined
                return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)

        if g is None:

            def g(x):  # pylint: disable=function-redefined
                return core_layers.dense(x, self.CHANNELS // 2, use_bias=True)

        if f_side_input is None:
            f_side_input = []

        if g_side_input is None:
            g_side_input = []

        if x is None:
            x = random_ops.random_uniform([self.BATCH_SIZE, self.CHANNELS],
                                          dtype=dtypes.float32)
        x1, x2 = array_ops.split(x, 2, axis=-1)

        with variable_scope.variable_scope("rev_test") as vs:
            y1_rev, y2_rev = rev_block_lib.rev_block(
                x1,
                x2,
                f,
                g,
                f_side_input=f_side_input,
                g_side_input=g_side_input,
                num_layers=self.NUM_LAYERS)
            y_rev = array_ops.concat([y1_rev, y2_rev], axis=1)
            fg_vars = vs.trainable_variables()

        num_vars = len(variables.global_variables())
        with variable_scope.variable_scope(vs, reuse=True):
            y1, y2 = rev_block_lib.rev_block(x1,
                                             x2,
                                             f,
                                             g,
                                             f_side_input=f_side_input,
                                             g_side_input=g_side_input,
                                             num_layers=self.NUM_LAYERS,
                                             is_training=False)
            y = array_ops.concat([y1, y2], axis=1)
        # Ensure no new vars were created - full reuse
        assert len(variables.global_variables()) == num_vars

        loss_rev = math_ops.reduce_mean(y_rev + 10.)
        loss = math_ops.reduce_mean(y + 10.)

        wrt = [x] + f_side_input + g_side_input + fg_vars
        grads_rev = gradients_impl.gradients(loss_rev, wrt)
        grads = gradients_impl.gradients(loss, wrt)

        with self.cached_session() as sess:
            sess.run(variables.global_variables_initializer())
            y_val, yd_val, gd_val, g_val = sess.run(
                [y, y_rev, grads_rev, grads])
            self.assertAllClose(y_val, yd_val, rtol=1e-5)
            for g1, g2 in zip(gd_val, g_val):
                self.assertAllClose(g1, g2, rtol=1e-5)
    def _helpTestRun(self, use_resource=False):
        for sequential_update in [True, False]:
            for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
                with self.test_session(graph=ops.Graph()) as sess:
                    orig_val0 = [1.0, 2.0]
                    orig_val1 = [3.0, 4.0]
                    var0 = variable_scope.get_variable(
                        'var0',
                        initializer=constant_op.constant(orig_val0,
                                                         dtype=dtype),
                        use_resource=use_resource)
                    var1 = variable_scope.get_variable(
                        'var1',
                        initializer=constant_op.constant(orig_val1,
                                                         dtype=dtype),
                        use_resource=use_resource)
                    grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
                    grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)

                    opt = moving_average_optimizer.MovingAverageOptimizer(
                        gradient_descent.GradientDescentOptimizer(
                            learning_rate=2.0),
                        average_decay=0.5,
                        sequential_update=sequential_update)
                    save_dir = tempfile.mkdtemp(
                        prefix=os.path.join(self.get_temp_dir(), 'run_1'))
                    save_path = os.path.join(save_dir, 'model')
                    update = opt.apply_gradients(
                        list(six.moves.zip([grads0, grads1], [var0, var1])))
                    global_vars = variables.global_variables()
                    ema_var0 = [
                        v for v in global_vars
                        if v.op.name == 'var0/ExponentialMovingAverage'
                    ][0]
                    ema_var1 = [
                        v for v in global_vars
                        if v.op.name == 'var1/ExponentialMovingAverage'
                    ][0]
                    perturb = control_flow_ops.group([
                        state_ops.assign_add(var0, [1.0, 1.0]),
                        state_ops.assign_add(var1, [2.0, 2.0]),
                        state_ops.assign_add(ema_var0, [3.0, 3.0]),
                        state_ops.assign_add(ema_var1, [4.0, 4.0])
                    ])

                    # Test that saver with missing ema variables will fail.
                    with self.assertRaisesRegexp(ValueError,
                                                 r'Variable to swap'):
                        opt.swapping_saver(var_list=[var0])

                    train_saver = opt.swapping_saver()
                    train_saver_subset = opt.swapping_saver(
                        var_list=[var0, ema_var0])
                    inference_saver = saver.Saver()
                    variables.global_variables_initializer().run()
                    # Step 1.
                    update.run()
                    self.assertAllCloseAccordingToType([0.8, 1.8], var0.eval())
                    self.assertAllCloseAccordingToType([2.98, 3.98],
                                                       var1.eval())
                    if sequential_update:
                        self.assertAllCloseAccordingToType([0.9, 1.9],
                                                           ema_var0.eval())
                        self.assertAllCloseAccordingToType([2.99, 3.99],
                                                           ema_var1.eval())
                    # Test that the swapping saver save/restore operation is identity.
                    train_saver.save(sess, save_path)
                    train_saver.restore(sess, save_path)
                    self.assertAllCloseAccordingToType([0.8, 1.8], var0.eval())
                    self.assertAllCloseAccordingToType([2.98, 3.98],
                                                       var1.eval())
                    if sequential_update:
                        self.assertAllCloseAccordingToType([0.9, 1.9],
                                                           ema_var0.eval())
                        self.assertAllCloseAccordingToType([2.99, 3.99],
                                                           ema_var1.eval())
                    # Test that the subset saver saves the EMA variable as well.
                    if sequential_update:
                        subset_save_path = save_path + '_subset'
                        train_saver_subset.save(sess, subset_save_path)
                        perturb.run()
                        self.assertAllCloseAccordingToType([1.8, 2.8],
                                                           var0.eval())
                        self.assertAllCloseAccordingToType([3.9, 4.9],
                                                           ema_var0.eval())
                        self.assertAllCloseAccordingToType([4.98, 5.98],
                                                           var1.eval())
                        self.assertAllCloseAccordingToType([6.99, 7.99],
                                                           ema_var1.eval())
                        # Restoring should only restore var0 and ema_var0.
                        train_saver_subset.restore(sess, subset_save_path)
                        self.assertAllCloseAccordingToType([0.8, 1.8],
                                                           var0.eval())
                        self.assertAllCloseAccordingToType([0.9, 1.9],
                                                           ema_var0.eval())
                        self.assertAllCloseAccordingToType([4.98, 5.98],
                                                           var1.eval())
                        self.assertAllCloseAccordingToType([6.99, 7.99],
                                                           ema_var1.eval())
                        # Restore back to previous state.
                        train_saver.restore(sess, save_path)

                    # If updates are parallel, this is not always true after the 1st step.
                    if sequential_update:
                        # Test that the normal saver will have the averaged variables.
                        # We test that the average values are between the original value
                        # and the most recent variable values (since they are an average
                        # of the two).
                        val0 = var0.eval()
                        val1 = var1.eval()
                        train_saver.save(sess, save_path)
                        inference_saver.restore(sess, save_path)
                        avg_val0 = var0.eval()
                        avg_val1 = var1.eval()
                        for i in six.moves.range(len(val0)):
                            self.assertLess(val0[i], avg_val0[i])
                            self.assertLess(avg_val0[i], orig_val0[i])
                            self.assertLess(val1[i], avg_val1[i])
                            self.assertLess(avg_val1[i], orig_val1[i])
                        train_saver.restore(sess, save_path)
                    # Step 2.
                    update.run()
                    # Test that the normal saver will have the averaged variables.
                    # We test that the average values are between the original value and
                    # the most recent variable values (since they are an average of the
                    # two).
                    val0 = var0.eval()
                    val1 = var1.eval()
                    self.assertAllCloseAccordingToType([0.6, 1.6], val0)
                    self.assertAllCloseAccordingToType([2.96, 3.96], val1)
                    train_saver.save(sess, save_path)
                    inference_saver.restore(sess, save_path)
                    avg_val0 = var0.eval()
                    avg_val1 = var1.eval()
                    for i in six.moves.range(len(val0)):
                        self.assertLess(val0[i], avg_val0[i])
                        self.assertLess(avg_val0[i], orig_val0[i])
                        self.assertLess(val1[i], avg_val1[i])
                        self.assertLess(avg_val1[i], orig_val1[i])
示例#48
0
 def testBasicLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.test_session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         m = array_ops.zeros([1, 8], dtype=dtype)
         cell = rnn_cell_impl.MultiRNNCell(
             [
                 rnn_cell_impl.BasicLSTMCell(
                     2, state_is_tuple=False)
                 for _ in range(2)
             ],
             state_is_tuple=False)
         self.assertEqual(cell.dtype, None)
         g, out_m = cell(x, m)
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(
             expected_variable_names,
             [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m],
             {x.name: np.array([[1., 1.]]),
              m.name: 0.1 * np.ones([1, 8])})
         self.assertEqual(len(res), 2)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # The numbers in results were not calculated, this is just a
         # smoke test.
         self.assertAllClose(
             res[0], np.array([[0.240, 0.240]], dtype=np_dtype), 1e-2)
         expected_mem = np.array(
             [[0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240]],
             dtype=np_dtype)
         self.assertAllClose(res[1], expected_mem, 1e-2)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test BasicLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         m = array_ops.zeros([1, 4], dtype=dtype)
         g, out_m = rnn_cell_impl.BasicLSTMCell(
             2, state_is_tuple=False)(x, m)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_m],
             {x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
              m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)})
         self.assertEqual(len(res), 2)
示例#49
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        var_list = []

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        with ops.colocate_with(local_anchor):
            self._local_step = variable_scope.variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=global_step.dtype.base_dtype,
                name="sync_rep_local_step")

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        with ops.name_scope(None, self._name):
            for grad, var in grads_and_vars:
                var_list.append(var)
                with ops.device(var.device):
                    # Dense gradients.
                    if grad is None:
                        aggregated_grad.append(None)  # pass-through.
                        continue
                    elif isinstance(grad, ops.Tensor):
                        grad_accum = data_flow_ops.ConditionalAccumulator(
                            grad.dtype,
                            shape=var.get_shape(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_grad(grad,
                                                  local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_grad(self._replicas_to_aggregate))
                    else:
                        if not isinstance(grad, ops.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        grad_accum = data_flow_ops.SparseConditionalAccumulator(
                            grad.dtype,
                            shape=(),
                            shared_name=var.name + "/grad_accum")
                        train_ops.append(
                            grad_accum.apply_indexed_slices_grad(
                                grad, local_step=self._local_step))
                        aggregated_grad.append(
                            grad_accum.take_indexed_slices_grad(
                                self._replicas_to_aggregate))

                    self._accumulator_list.append((grad_accum, var.device))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradients(
                    aggregated_grads_and_vars, global_step)

            # Create token queue.
            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    name="dummy_queue",
                    shared_name="dummy_queue"))

            with ops.device(global_step.device), ops.name_scope(""):
                # Replicas have to wait until they can get a token from the token queue.
                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    # Sync_op needs to insert tokens to the token queue at the end of the
                    # step so the replicas can fetch them to start the next step.
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])
            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True
            return train_op
示例#50
0
    def swapping_saver(self, var_list=None, name='swapping_saver', **kwargs):
        """Create a saver swapping moving averages and variables.

    You should use this saver during training.  It will save the moving averages
    of the trained parameters under the original parameter names.  For
    evaluations or inference you should use a regular saver and it will
    automatically use the moving averages for the trained variable.

    You must call this function after all variables have been created and after
    you have called Optimizer.minimize().

    Args:
      var_list: List of variables to save, as per `Saver()`.
                If set to None, will save all the variables that have been
                created before this call.
      name: The name of the saver.
      **kwargs: Keyword arguments of `Saver()`.

    Returns:
      A `tf.compat.v1.train.Saver` object.

    Raises:
      RuntimeError: If apply_gradients or minimize has not been called before.
      ValueError: If var_list is provided and contains some variables but not
        their moving average counterpart.
    """

        if self._swapped_variable_name_map is None:
            raise RuntimeError('Must call apply_gradients or minimize before '
                               'creating the swapping_saver')
        if var_list is None:
            var_list = variables.global_variables()
        if not isinstance(var_list, dict):
            var_list = saveable_object_util.op_list_to_dict(var_list)

        v_name_to_tensor = {}
        for k, tensor_or_list in six.iteritems(var_list):
            # For each partitioned variable OpListToDict returns list of constituent
            # parts instead of single tensor.
            if (isinstance(tensor_or_list, list) or isinstance(
                    tensor_or_list, variables.PartitionedVariable)):
                for tensor in tensor_or_list:
                    v_name = tensor.op.name
                    v_name_to_tensor[v_name] = tensor
            else:
                v_name_to_tensor[k] = tensor_or_list

        # Now swap variables and moving averages
        swapped_var_list = {}
        for k, tensor_or_list in six.iteritems(var_list):
            if isinstance(tensor_or_list, list):
                tensor_list_to_save = []
                for tensor in tensor_or_list:
                    v_name = tensor.op.name
                    swapped_variable = self._find_swapped_variable(
                        v_name_to_tensor, v_name, tensor)
                    tensor_list_to_save.append(swapped_variable)
                swapped_var_list[k] = tensor_list_to_save
            else:
                swapped_var_list[k] = self._find_swapped_variable(
                    v_name_to_tensor, k, tensor_or_list)

        # Build the swapping saver.
        return saver.Saver(swapped_var_list, name=name, **kwargs)
示例#51
0
    def swapping_saver(self, var_list=None, name='swapping_saver', **kwargs):
        """Create a saver swapping moving averages and variables.

    You should use this saver during training.  It will save the moving averages
    of the trained parameters under the original parameter names.  For
    evaluations or inference you should use a regular saver and it will
    automatically use the moving averages for the trained variable.

    You must call this function after all variables have been created and after
    you have called Optimizer.minimize().

    Args:
      var_list: List of variables to save, as per `Saver()`.
                If set to None, will save all the variables that have been
                created before this call.
      name: The name of the saver.
      **kwargs: Keyword arguments of `Saver()`.

    Returns:
      A `tf.train.Saver` object.

    Raises:
      RuntimeError: If apply_gradients or minimize has not been called before.
      ValueError: If var_list is provided and contains some variables but not
        their moving average counterpart.
    """

        if self._swapped_variable_name_map is None:
            raise RuntimeError('Must call apply_gradients or minimize before '
                               'creating the swapping_saver')
        if var_list is None:
            var_list = variables.global_variables()
        if not isinstance(var_list, dict):
            var_list = saver.BaseSaverBuilder.OpListToDict(var_list)

        # OpListToDict converts variables to tensors. We make sure we can get
        # the unique variable name for normal and resource vaiables.
        def get_v_name(tensor):
            if tensor.op.type == 'ReadVariableOp':
                return tensor.op.inputs[0].op.name
            else:
                return tensor.op.name

        v_name_to_tensor = {}
        for tensor in six.itervalues(var_list):
            v_name = get_v_name(tensor)
            v_name_to_tensor[v_name] = tensor

        # Now swap variables and moving averages
        swapped_var_list = {}
        for k, tensor in six.iteritems(var_list):
            v_name = get_v_name(tensor)
            swapped_v_name = self._swapped_variable_name_map.get(v_name, None)
            tensor_to_save = tensor
            if swapped_v_name is not None:
                if swapped_v_name in v_name_to_tensor:
                    tensor_to_save = v_name_to_tensor[swapped_v_name]
                else:
                    raise ValueError((
                        'Variable to swap %s is not part of variables to save. '
                        'This breaks MovingAverageOptimizer.') %
                                     swapped_v_name)
            swapped_var_list[k] = tensor_to_save

        # Build the swapping saver.
        return saver.Saver(swapped_var_list, name=name, **kwargs)
示例#52
0
  def testModelWithBuckets(self):
    """Larger tests that does full sequence-to-sequence model training."""
    # We learn to copy 10 symbols in 2 buckets: length 4 and length 8.
    classes = 10
    buckets = [(4, 4), (8, 8)]
    perplexities = [[], []]  # Results for each bucket.
    random_seed.set_random_seed(111)
    random.seed(111)
    np.random.seed(111)

    with self.test_session() as sess:
      # We use sampled softmax so we keep output projection separate.
      w = variable_scope.get_variable("proj_w", [24, classes])
      w_t = array_ops.transpose(w)
      b = variable_scope.get_variable("proj_b", [classes])

      # Here comes a sample Seq2Seq model using GRU cells.
      def SampleGRUSeq2Seq(enc_inp, dec_inp, weights):
        """Example sequence-to-sequence model that uses GRU cells."""

        def GRUSeq2Seq(enc_inp, dec_inp):
          cell = rnn_cell.MultiRNNCell(
              [rnn_cell.GRUCell(24) for _ in range(2)], state_is_tuple=True)
          return seq2seq_lib.embedding_attention_seq2seq(
              enc_inp,
              dec_inp,
              cell,
              num_encoder_symbols=classes,
              num_decoder_symbols=classes,
              embedding_size=24,
              output_projection=(w, b))

        targets = [dec_inp[i + 1] for i in range(len(dec_inp) - 1)] + [0]

        def SampledLoss(labels, logits):
          labels = array_ops.reshape(labels, [-1, 1])
          return nn_impl.sampled_softmax_loss(
              weights=w_t,
              biases=b,
              labels=labels,
              inputs=logits,
              num_sampled=8,
              num_classes=classes)

        return seq2seq_lib.model_with_buckets(
            enc_inp,
            dec_inp,
            targets,
            weights,
            buckets,
            GRUSeq2Seq,
            softmax_loss_function=SampledLoss)

      # Now we construct the copy model.
      batch_size = 8
      inp = [
          array_ops.placeholder(
              dtypes.int32, shape=[None]) for _ in range(8)
      ]
      out = [
          array_ops.placeholder(
              dtypes.int32, shape=[None]) for _ in range(8)
      ]
      weights = [
          array_ops.ones_like(
              inp[0], dtype=dtypes.float32) for _ in range(8)
      ]
      with variable_scope.variable_scope("root"):
        _, losses = SampleGRUSeq2Seq(inp, out, weights)
        updates = []
        params = variables.global_variables()
        optimizer = adam.AdamOptimizer(0.03, epsilon=1e-5)
        for i in range(len(buckets)):
          full_grads = gradients_impl.gradients(losses[i], params)
          grads, _ = clip_ops.clip_by_global_norm(full_grads, 30.0)
          update = optimizer.apply_gradients(zip(grads, params))
          updates.append(update)
        sess.run([variables.global_variables_initializer()])
      steps = 6
      for _ in range(steps):
        bucket = random.choice(np.arange(len(buckets)))
        length = buckets[bucket][0]
        i = [
            np.array(
                [np.random.randint(9) + 1 for _ in range(batch_size)],
                dtype=np.int32) for _ in range(length)
        ]
        # 0 is our "GO" symbol here.
        o = [np.array([0] * batch_size, dtype=np.int32)] + i
        feed = {}
        for i1, i2, o1, o2 in zip(inp[:length], i[:length], out[:length],
                                  o[:length]):
          feed[i1.name] = i2
          feed[o1.name] = o2
        if length < 8:  # For the 4-bucket, we need the 5th as target.
          feed[out[length].name] = o[length]
        res = sess.run([updates[bucket], losses[bucket]], feed)
        perplexities[bucket].append(math.exp(float(res[1])))
      for bucket in range(len(buckets)):
        if len(perplexities[bucket]) > 1:  # Assert that perplexity went down.
          self.assertLess(perplexities[bucket][-1],  # 20% margin of error.
                          1.2 * perplexities[bucket][0])
示例#53
0
    def add_meta_graph_and_variables(self,
                                     sess,
                                     tags,
                                     signature_def_map=None,
                                     assets_collection=None,
                                     legacy_init_op=None,
                                     clear_devices=False,
                                     main_op=None):
        """Adds the current meta graph to the SavedModel and saves variables.

    Creates a Saver to save the variables from the provided session. Exports the
    corresponding meta graph def. This function assumes that the variables to be
    saved have been initialized. For a given `SavedModelBuilder`, this API must
    be called exactly once and for the first meta graph to save. For subsequent
    meta graph defs to be added, the `add_meta_graph()` API must be used.

    Args:
      sess: The TensorFlow session from which to save the meta graph and
        variables.
      tags: The set of tags with which to save the meta graph.
      signature_def_map: The map of signature def map to add to the meta graph
        def.
      assets_collection: Assets collection to be saved with SavedModel.
      legacy_init_op: Legacy support for op or group of ops to execute after the
          restore op upon a load.
      clear_devices: Set to true if the device info on the default graph should
          be cleared.
      main_op: Op or group of ops to execute when the graph is loaded.
    """
        if self._has_saved_variables:
            raise AssertionError(
                "Variables and assets have already been saved. "
                "Please invoke `add_meta_graph()` instead.")

        # Save asset files and write them to disk, if any.
        self._save_and_write_assets(assets_collection)

        # Create the variables sub-directory, if it does not exist.
        variables_dir = os.path.join(
            compat.as_text(self._export_dir),
            compat.as_text(constants.VARIABLES_DIRECTORY))
        if not file_io.file_exists(variables_dir):
            file_io.recursive_create_dir(variables_dir)

        variables_path = os.path.join(
            compat.as_text(variables_dir),
            compat.as_text(constants.VARIABLES_FILENAME))

        if main_op is None:
            # Add legacy init op to the SavedModel.
            self._maybe_add_legacy_init_op(legacy_init_op)
        else:
            self._add_main_op(main_op)

        # Initialize a saver to generate a sharded output for all variables in the
        # current scope.
        saver = tf_saver.Saver(variables.global_variables(),
                               sharded=True,
                               write_version=saver_pb2.SaverDef.V2)

        # Save the variables. Also, disable writing the checkpoint state proto. The
        # file is not used during SavedModel loading. In addition, since a
        # SavedModel can be copied or moved, this avoids the checkpoint state to
        # become outdated.
        saver.save(sess,
                   variables_path,
                   write_meta_graph=False,
                   write_state=False)

        # Export the meta graph def.
        meta_graph_def = saver.export_meta_graph(clear_devices=clear_devices)

        # Tag the meta graph def and add it to the SavedModel.
        self._tag_and_add_meta_graph(meta_graph_def, tags, signature_def_map)

        # Mark this instance of SavedModel as having saved variables, such that
        # subsequent attempts to save variables will fail.
        self._has_saved_variables = True
示例#54
0
    def add_graph_and_variables(self,
                                input_tensors,
                                output_tensors,
                                assets_collection=None,
                                legacy_init_op=None,
                                main_op=None):
        """添加当前训练模型的 meta graph 和参数。

        Args:
          input_tensors: 导出模型的输入的别名和 Tensors 之间的字典。
          output_tensors: 导出模型的输出的别名和 Tensors 之间的字。,
          assets_collection: 附加资产文件列表,可选。
            资产文件会在模型导出和导入时被当作模型的一部分进行处理。
            资产文件主要应用场景:训练模型的某些操作需要外部附加文件进行初始化等。
            在导出模型的时候,资产文件会被拷贝到模型导出路径的 assets 目录下。
          legacy_init_op: 在导出模型被加载要被执行的初始化操作,可选。
          main_op: 导出模型在被加载时执行的操作,可选。
        """
        # Set up the signature for input and output tensorflow specification.
        signature_inputs = {}
        for (alias_name, input_tensor) in input_tensors.items():
            input_tensor_info = meta_graph_pb2.TensorInfo()
            input_tensor_info.name = input_tensor.name
            signature_inputs[alias_name] = input_tensor_info

        signature_outputs = {}
        for (alias_name, output_tensor) in output_tensors.items():
            output_tensor_info = meta_graph_pb2.TensorInfo()
            output_tensor_info.name = output_tensor.name
            signature_outputs[alias_name] = output_tensor_info

        signature_def = utils.build_signature_def(
            signature_inputs, signature_outputs,
            caicloud_constants.MODEL_METHOD_NAME)
        signature_def_map = {
            caicloud_constants.MODEL_METHOD_NAME: signature_def
        }

        # Save asset files and write them to disk, if any.
        self._save_and_write_assets(assets_collection)

        if main_op is None:
            # Add legacy init op to the SavedModel.
            self._maybe_add_legacy_init_op(legacy_init_op)
        else:
            self._add_main_op(main_op)

        # Initialize a saver to generate a sharded output for all variables in the
        # current scope.
        self._saver = tf_saver.Saver(variables.global_variables(),
                                     sharded=True,
                                     write_version=saver_pb2.SaverDef.V2)

        # Export the meta graph def.
        meta_graph_def = self._saver.export_meta_graph(clear_devices=True)

        # Tag the meta graph def and add it to the SavedModel.
        self._tag_and_add_meta_graph(meta_graph_def,
                                     [caicloud_constants.MODEL_TAG],
                                     signature_def_map)

        self._has_added_graph_and_variables = True
 def _get_data_dict(self):
     data_dict = dict()
     for var in variables.global_variables():
         name = var.name.split(':')[0]
         data_dict[name] = var.eval()
     return data_dict
示例#56
0
文件: _default.py 项目: wujinke/MDNT
def load_loss_weights_from_hdf5_group(f, fh_dict):
    """Implements loss weight loading.
    This is the extension for implementing the loading session for loss
    weights. It will enable the load_model to load loss weights if they
    are compiled as variables before saving. The variable names would
    be extracted from the config dictionary and the values would be
    extracted from the hdf5 dataset by indexing the variable names.
    Arguments:
        f:       a pointer to a HDF5 loss weights group.
        fh_dict: JSON config dictionary.
    Returns:
        loss_weights: a list, or dictionary of loss weights.
    Raises:
        ValueError: if the loss_weights is not list, tuple or dict.
        ValueError: if a saved variable has a wrong tag.
    """
    cfg_entry = fh_dict['training_config']
    loss_serialized = cfg_entry['loss_weights']
    if loss_serialized is None:  # In None case, the
        return None

    # Retain constant weights, and retrieve variables according to name tag.
    if isinstance(loss_serialized, (list, tuple)):
        loss_weights = []
        for value in loss_serialized:
            if isinstance(value, str):
                var = list(
                    filter(
                        lambda x: x.name == value,
                        [op for op in variables.global_variables(scope=None)]))
                if var:
                    var = var[0]
                    K.set_value(var, value=np.asarray(f[value]))
                else:
                    if value[-2:] == ':0':
                        var = K.variable(value=np.asarray(f[value]),
                                         name=value[:-2])
                    else:
                        raise ValueError(
                            'The name of a variable in loss_weights should end with :0, because it is produced by K.variable.'
                        )
            else:
                var = value
            loss_weights.append(var)
    elif isinstance(loss_serialized, dict):
        loss_weights = {}
        for key, value in loss_serialized.items():
            if isinstance(value, str):
                var = list(
                    filter(
                        lambda x: x.name == value,
                        [op for op in variables.global_variables(scope=None)]))
                if var:
                    var = var[0]
                    K.set_value(var, value=np.asarray(f[value]))
                else:
                    if value[-2:] == ':0':
                        var = K.variable(value=np.asarray(f[value]),
                                         name=value[:-2])
                    else:
                        raise ValueError(
                            'The name of a variable in loss_weights should end with :0, because it is produced by K.variable.'
                        )
            else:
                var = value
            loss_weights[key] = var
    else:
        raise ValueError(
            'The parameter loss_weights needs to be a list or a dictionary, maybe you need to recompile your model.'
        )

    return loss_weights
    def testTrainWithInitFromCheckpoint(self):
        logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs1/')
        logdir2 = os.path.join(self.get_temp_dir(), 'tmp_logs2/')

        if gfile.Exists(logdir1):  # For running on jenkins.
            gfile.DeleteRecursively(logdir1)
        if gfile.Exists(logdir2):  # For running on jenkins.
            gfile.DeleteRecursively(logdir2)

        # First, train the model one step (make sure the error is high).
        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            train_op = self.create_train_op()
            saver = saver_lib.Saver()
            loss = training.train(
                train_op,
                logdir1,
                hooks=[
                    basic_session_run_hooks.CheckpointSaverHook(logdir1,
                                                                save_steps=1,
                                                                saver=saver),
                    basic_session_run_hooks.StopAtStepHook(num_steps=1),
                ],
                save_checkpoint_secs=None,
                save_summaries_steps=None)
            self.assertGreater(loss, .5)

        # Next, train the model to convergence.
        with ops.Graph().as_default():
            random_seed.set_random_seed(1)
            train_op = self.create_train_op()
            saver = saver_lib.Saver()
            loss = training.train(
                train_op,
                logdir1,
                hooks=[
                    basic_session_run_hooks.CheckpointSaverHook(logdir1,
                                                                save_steps=300,
                                                                saver=saver),
                    basic_session_run_hooks.StopAtStepHook(num_steps=300),
                ],
                save_checkpoint_secs=None,
                save_summaries_steps=None)
            self.assertIsNotNone(loss)
            self.assertLess(loss, .02)

        # Finally, advance the model a single step and validate that the loss is
        # still low.
        with ops.Graph().as_default():
            random_seed.set_random_seed(2)
            train_op = self.create_train_op()

            model_variables = variables_lib2.global_variables()
            model_path = saver_lib.latest_checkpoint(logdir1)

            assign_fn = variables_lib.assign_from_checkpoint_fn(
                model_path, model_variables)

            def init_fn(_, session):
                assign_fn(session)

            loss = training.train(
                train_op,
                None,
                scaffold=monitored_session.Scaffold(init_fn=init_fn),
                hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=1)],
                save_checkpoint_secs=None,
                save_summaries_steps=None)

            self.assertIsNotNone(loss)
            self.assertLess(loss, .02)
示例#58
0
    def export_fn(estimator,
                  export_dir_base,
                  checkpoint_path=None,
                  eval_result=None):
        with ops.Graph().as_default() as g:
            contrib_variables.create_global_step(g)

            input_ops = feature_transforms.build_csv_serving_tensors(
                args.output_dir_from_analysis_step, features, schema, stats,
                keep_target)
            model_fn_ops = estimator._call_model_fn(
                input_ops.features, None, model_fn_lib.ModeKeys.INFER)
            output_fetch_tensors = make_prediction_output_tensors(
                args=args,
                features=features,
                input_ops=input_ops,
                model_fn_ops=model_fn_ops,
                keep_target=keep_target)

            # Don't use signature_def_utils.predict_signature_def as that renames
            # tensor names if there is only 1 input/output tensor!
            signature_inputs = {
                key: tf.saved_model.utils.build_tensor_info(tensor)
                for key, tensor in six.iteritems(input_ops.default_inputs)
            }
            signature_outputs = {
                key: tf.saved_model.utils.build_tensor_info(tensor)
                for key, tensor in six.iteritems(output_fetch_tensors)
            }
            signature_def_map = {
                'serving_default':
                signature_def_utils.build_signature_def(
                    signature_inputs, signature_outputs,
                    tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
            }

            if not checkpoint_path:
                # Locate the latest checkpoint
                checkpoint_path = saver.latest_checkpoint(estimator._model_dir)
            if not checkpoint_path:
                raise ValueError("Couldn't find trained model at %s." %
                                 estimator._model_dir)

            export_dir = saved_model_export_utils.get_timestamped_export_dir(
                export_dir_base)

            with tf_session.Session('') as session:
                variables.local_variables_initializer()
                data_flow_ops.tables_initializer()
                saver_for_restore = saver.Saver(variables.global_variables(),
                                                sharded=True)
                saver_for_restore.restore(session, checkpoint_path)

                init_op = control_flow_ops.group(
                    variables.local_variables_initializer(),
                    data_flow_ops.tables_initializer())

                # Perform the export
                builder = saved_model_builder.SavedModelBuilder(export_dir)
                builder.add_meta_graph_and_variables(
                    session, [tag_constants.SERVING],
                    signature_def_map=signature_def_map,
                    assets_collection=ops.get_collection(
                        ops.GraphKeys.ASSET_FILEPATHS),
                    legacy_init_op=init_op)
                builder.save(False)

            # Add the extra assets
            if assets_extra:
                assets_extra_path = os.path.join(
                    compat.as_bytes(export_dir),
                    compat.as_bytes('assets.extra'))
                for dest_relative, source in assets_extra.items():
                    dest_absolute = os.path.join(
                        compat.as_bytes(assets_extra_path),
                        compat.as_bytes(dest_relative))
                    dest_path = os.path.dirname(dest_absolute)
                    file_io.recursive_create_dir(dest_path)
                    file_io.copy(source, dest_absolute)

        # only keep the last 3 models
        saved_model_export_utils.garbage_collect_exports(export_dir_base,
                                                         exports_to_keep=3)

        # save the last model to the model folder.
        # export_dir_base = A/B/intermediate_models/
        if keep_target:
            final_dir = os.path.join(args.job_dir, 'evaluation_model')
        else:
            final_dir = os.path.join(args.job_dir, 'model')
        if file_io.is_directory(final_dir):
            file_io.delete_recursively(final_dir)
        file_io.recursive_create_dir(final_dir)
        recursive_copy(export_dir, final_dir)

        return export_dir
def main(_):
    start = time.time()
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default(), tf.device('/device:CPU:0'):
        tf.set_random_seed(1234)

        test_filenames = [
            os.path.join(FLAGS.dataset_dir, FLAGS.test_dir, name)
            for name in os.listdir(
                os.path.join(FLAGS.dataset_dir, FLAGS.test_dir))
        ]

        test_imgs, test_labels, test_basenames, test_iterator = _get_data(
            test_filenames)
        split_test_imgs = tf.split(value=test_imgs,
                                   num_or_size_splits=FLAGS.num_gpus,
                                   axis=0)
        split_test_labels = tf.split(value=test_labels,
                                     num_or_size_splits=FLAGS.num_gpus,
                                     axis=0)
        split_test_basenames = tf.split(value=test_basenames,
                                        num_or_size_splits=FLAGS.num_gpus,
                                        axis=0)

        loss_list_test = []
        labels_list_test = []
        softmax_list_test = []
        basename_list_test = []

        # get the logits for validation set
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            for i in xrange(FLAGS.num_gpus):
                with tf.device('/device:GPU:%d' % i):
                    logits_test, ages_test, _ = ResNetGN().get_model(
                        split_test_imgs[i],
                        split_test_labels[i],
                        training=False)
                    if 'sigmoid' not in exp_name:
                        softmax_test = tf.nn.softmax(logits_test)[:, :, 1]
                        cls_loss_test = tf.losses.softmax_cross_entropy(
                            tf.one_hot(split_test_labels[i], 2), logits_test)
                    else:
                        softmax_test = tf.nn.sigmoid(logits_test)
                        cls_loss_test = tf.losses.sigmoid_cross_entropy(
                            split_test_labels[i], logits_test)

                    loss_list_test.append(cls_loss_test)
                    labels_list_test.append(split_test_labels[i])
                    softmax_list_test.append(softmax_test)
                    basename_list_test.append(split_test_basenames[i])

        labels_list_test = tf.reshape(tf.stack(labels_list_test), [-1])
        softmax_list_test = tf.reshape(tf.stack(softmax_list_test), [-1])
        basename_list_test = tf.reshape(tf.stack(basename_list_test), [-1])

        test_loss = tf.math.reduce_mean(loss_list_test)

        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True)

        with tf.Session(config=config) as sess:
            sess.run(test_iterator.initializer)

            init = tf.global_variables_initializer()
            sess.run(init)
            saver = tf.train.Saver()
            saver.restore(sess, FLAGS.checkpoint_path)
            var_list = variables.global_variables()
            num_imgs = FLAGS.test_samples

            num_batches_test = int(math.ceil(num_imgs / FLAGS.batch_size)) + 1
            print 'Number of batches of test: ' + str(num_batches_test)

            loss_list = []
            y_scores = []
            y_trues = []
            basenames = []

            for eval_iter in range(num_batches_test):
                print eval_iter
                softmax, labels, loss, basename = sess.run([
                    softmax_list_test, labels_list_test, test_loss,
                    basename_list_test
                ])
                loss_list.append(loss)
                y_scores.extend(softmax)
                y_trues.extend(labels)
                basenames.extend(basename)

            output_file_path = os.path.join(
                os.path.dirname(FLAGS.checkpoint_path),
                'preds_test_' + str(best_iter) + '.txt')

            of = open(output_file_path, 'w')
            for i in range(len(basenames)):
                of.write('%s %.12f %d\n' %
                         (basenames[i], y_scores[i], y_trues[i]))

            avg_loss = sum(loss_list) / len(loss_list)