def test_get_variable(self):
        with self.session(
                config=default_config,
                graph=ops.Graph(),
                use_gpu=test_util.is_gpu_available(),
        ):
            default_val = -1
            with variable_scope.variable_scope("embedding", reuse=True):
                table1 = de.get_variable("t1",
                                         dtypes.int64,
                                         dtypes.int32,
                                         initializer=default_val,
                                         dim=2)
                table2 = de.get_variable("t1",
                                         dtypes.int64,
                                         dtypes.int32,
                                         initializer=default_val,
                                         dim=2)
                table3 = de.get_variable("t2",
                                         dtypes.int64,
                                         dtypes.int32,
                                         initializer=default_val,
                                         dim=2)

            self.assertAllEqual(table1, table2)
            self.assertNotEqual(table1, table3)
示例#2
0
 def test_GraphKeys(self):
   v0 = de.Variable(key_dtype=dtypes.int64,
                    value_dtype=dtypes.float32,
                    initializer=0.0,
                    name="v0")
   v1 = de.Variable(key_dtype=dtypes.int64,
                    value_dtype=dtypes.float32,
                    initializer=0.0,
                    name="v1",
                    trainable=False)
   v2 = de.get_variable(
       "v2",
       key_dtype=dtypes.int64,
       value_dtype=dtypes.float32,
       initializer=init_ops.zeros_initializer,
       dim=10,
   )
   v3 = de.get_variable("v3",
                        key_dtype=dtypes.int64,
                        value_dtype=dtypes.float32,
                        initializer=init_ops.zeros_initializer,
                        dim=10,
                        trainable=False)
   de_vars = ops.get_collection(de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES)
   self.assertSetEqual(set([v0, v1, v2, v3]), set(de_vars))
   de_trainable_vars = ops.get_collection(
       de.GraphKeys.TRAINABLE_DYNAMIC_EMBEDDING_VARIABLES)
   self.assertAllEqual(set([v0, v2]), set(de_trainable_vars))
    def test_scope_reuse_embedding_lookup(self):
        ids = constant_op.constant([0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                                   dtype=dtypes.int64)
        with variable_scope.variable_scope("test",
                                           reuse=variable_scope.AUTO_REUSE):
            p1 = de.get_variable(name="p1")
            with variable_scope.variable_scope("q"):
                _, t1 = de.embedding_lookup(p1,
                                            ids,
                                            name="emb",
                                            return_trainable=True)

        with variable_scope.variable_scope("test",
                                           reuse=variable_scope.AUTO_REUSE):
            p1_reuse = de.get_variable(name="p1")
            p2 = de.get_variable(name="p2")
            with variable_scope.variable_scope("q"):
                _, t2 = de.embedding_lookup(p2,
                                            ids,
                                            name="emb",
                                            return_trainable=True)

        self.assertAllEqual(p1.name, "test/p1")
        self.assertAllEqual(p2.name, "test/p2")
        self.assertAllEqual(p1, p1_reuse)
        self.assertEqual(t1.name, "test/q/emb/TrainableWrapper:0")
        self.assertEqual(t2.name, "test/q/emb/TrainableWrapper_1:0")
        self.assertAllEqual(p1._tables[0].name, "test_p1_mht_1of1")
        self.assertAllEqual(p1_reuse._tables[0].name, "test_p1_mht_1of1")
        self.assertAllEqual(p2._tables[0].name, "test_p2_mht_1of1")
    def test_scope_reuse_safe_sparse_embedding_lookup(self):
        indices = [
            [0, 0, 0],
            [0, 0, 1],
            [0, 0, 2],
            [0, 1, 0],
            [1, 0, 0],
            [1, 1, 0],
            [1, 1, 1],
        ]
        ids = [0, 1, -1, -1, 2, 0, 1]
        shape = [2, 3, 4]

        sparse_ids = sparse_tensor.SparseTensor(
            constant_op.constant(indices, dtypes.int64),
            constant_op.constant(ids, dtypes.int64),
            constant_op.constant(shape, dtypes.int64),
        )

        with variable_scope.variable_scope("test",
                                           reuse=variable_scope.AUTO_REUSE):
            p1 = de.get_variable(name="p1")
            with variable_scope.variable_scope("q"):
                _, t1 = de.safe_embedding_lookup_sparse(p1,
                                                        sparse_ids,
                                                        None,
                                                        name="safe_sp_emb",
                                                        return_trainable=True)

        with variable_scope.variable_scope("test",
                                           reuse=variable_scope.AUTO_REUSE):
            p1_reuse = de.get_variable(name="p1")
            p2 = de.get_variable(name="p2")
            with variable_scope.variable_scope("q"):
                _, t2 = de.safe_embedding_lookup_sparse(p2,
                                                        sparse_ids,
                                                        None,
                                                        name="safe_sp_emb",
                                                        return_trainable=True)

        self.assertAllEqual(p1.name, "test/p1")
        self.assertAllEqual(p2.name, "test/p2")
        self.assertAllEqual(p1, p1_reuse)
        self.assertEqual(
            t1.name,
            "test/q/safe_sp_emb/embedding_lookup_sparse/embedding_lookup/TrainableWrapper:0",
        )
        self.assertEqual(
            t2.name,
            "test/q/safe_sp_emb/embedding_lookup_sparse/embedding_lookup/TrainableWrapper_1:0",
        )
        self.assertAllEqual(p1._tables[0].name, "test_p1_mht_1of1")
        self.assertAllEqual(p1_reuse._tables[0].name, "test_p1_mht_1of1")
        self.assertAllEqual(p2._tables[0].name, "test_p2_mht_1of1")
 def test_get_variable_reuse_error(self):
     ops.disable_eager_execution()
     with self.session(
             config=default_config,
             graph=ops.Graph(),
             use_gpu=test_util.is_gpu_available(),
     ):
         with variable_scope.variable_scope("embedding", reuse=False):
             _ = de.get_variable("t900", initializer=-1, dim=2)
             with self.assertRaisesRegexp(
                     ValueError, "Variable embedding/t900 already exists"):
                 _ = de.get_variable("t900", initializer=-1, dim=2)
    def test_higher_rank(self):
        np.random.seed(8)
        with self.session(use_gpu=test_util.is_gpu_available(),
                          config=default_config):
            for dim in [1, 10]:
                for ids_shape in [[3, 2], [4, 3], [4, 3, 10]]:
                    with variable_scope.variable_scope("test_higher_rank",
                                                       reuse=True):
                        params = de.get_variable(
                            "t350-" + str(dim),
                            dtypes.int64,
                            dtypes.float32,
                            initializer=2.0,
                            dim=dim,
                        )
                        ids = np.random.randint(
                            2**31, size=np.prod(ids_shape),
                            dtype=np.int).reshape(ids_shape)
                        ids = constant_op.constant(ids, dtype=dtypes.int64)
                        simple = params.lookup(ids)
                        self.evaluate(params.upsert(ids, simple))

                        embedding = de.embedding_lookup(params, ids)
                        self.assertAllEqual(simple.eval(), embedding.eval())
                        self.assertAllEqual(ids_shape + [dim],
                                            embedding.eval().shape)
  def test_simple_sharded(self):
    embeddings = de.get_variable(
        "t300",
        dtypes.int64,
        dtypes.float32,
        dim= 5,
        devices=_get_devices() * 2,
        initializer=2.0,
    )

    ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64)
    embedding, trainable = de.embedding_lookup(embeddings,
                                               ids,
                                               max_norm=1.0,
                                               return_trainable=True)
    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      self.assertAllClose(
          embedding.eval(),
          [
              [1.0],
          ] * 5,
      )
      self.evaluate(trainable.update_op())
      self.assertAllEqual(embeddings.size().eval(), 5)
      self.assertAllEqual(embeddings.size(0).eval(), 3)
      self.assertAllEqual(embeddings.size(1).eval(), 2)
            def test_fn():
                embeddings = de.get_variable(
                    "t2020-v2-" + name + str(id),
                    key_dtype=k_dtype,
                    value_dtype=d_dtype,
                    devices=_get_devices() * num_shards,
                    initializer=1.0,
                    dim=dim,
                )
                self.device_check(embeddings)
                trainables = []
                init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype)
                init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype)
                self.evaluate(embeddings.upsert(init_ids, init_vals))

                def var_fn():
                    return trainables

                def loss_fn(x, trainables):
                    ids = constant_op.constant(raw_ids, dtype=k_dtype)
                    pred, trainable = de.embedding_lookup(
                        [x], ids, return_trainable=True)
                    trainables.clear()
                    trainables.append(trainable)
                    return pred * pred

                test_opt_op = test_opt.minimize(
                    lambda: loss_fn(embeddings, trainables), var_fn)
                self.evaluate(variables.global_variables_initializer())
                for _ in range(run_step):
                    self.evaluate(test_opt_op)
                return embeddings.lookup(init_ids)
    def test_dynamic_embedding_variable_remove_high_rank(self):
        with self.session(use_gpu=test_util.is_gpu_available(),
                          config=default_config):
            default_val = constant_op.constant([-1, -1, -1], dtypes.int32)
            keys = constant_op.constant([0, 1, 2], dtypes.int64)
            values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]],
                                          dtypes.int32)
            table = de.get_variable("t180",
                                    dtypes.int64,
                                    dtypes.int32,
                                    initializer=default_val,
                                    dim=3)

            self.evaluate(table.upsert(keys, values))
            self.assertAllEqual(3, self.evaluate(table.size()))

            remove_keys = constant_op.constant([[0, 3]], dtypes.int64)
            self.evaluate(table.remove(remove_keys))
            self.assertAllEqual(2, self.evaluate(table.size()))

            remove_keys = constant_op.constant([[0, 1], [2, 3]], dtypes.int64)
            output = table.lookup(remove_keys)
            self.assertAllEqual([2, 2, 3], output.get_shape())

            result = self.evaluate(output)
            self.assertAllEqual(
                [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result)
 def test_check_ops_number(self):
   self.assertTrue(de.get_model_mode() == "train")
   de.enable_inference_mode()
   self.assertTrue(de.get_model_mode() == "inference")
   de.enable_train_mode()
   self.assertTrue(de.get_model_mode() == "train")
   for fn, assign_num, read_num in [(de.enable_train_mode, 1, 2),
                                    (de.enable_inference_mode, 0, 1)]:
     fn()
     embeddings = de.get_variable('ModeModeTest' + str(assign_num),
                                  key_dtype=dtypes.int64,
                                  value_dtype=dtypes.float32,
                                  devices=_get_devices(),
                                  initializer=1.,
                                  dim=8)
     ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64)
     test_var, trainable = de.embedding_lookup([embeddings],
                                               ids,
                                               return_trainable=True)
     _ = math_ops.add(test_var, 1)
     op_list = ops.get_default_graph().get_operations()
     op_list_assign = [
         op.name for op in op_list if "AssignBeforeReadVariable" in op.name
     ]
     op_list_read = [op.name for op in op_list if "ReadVariableOp" in op.name]
     self.assertTrue(len(op_list_assign) == assign_num)
     self.assertTrue(len(op_list_read) == read_num)
     de.enable_train_mode()
     ops.reset_default_graph()
示例#11
0
  def __init__(self, var):
    """
    A timestamp status sparse variable is created. The timestamp status
    has same key_dtype as the target variable and value_dtype in int32,
    which indicates the timestamp value. The timestamp means a digital
    record of time. The later the time, the larger the timestamp.

    Args:
      var: A `dynamic_embedding.Variable` object to be restricted.
    """
    super(TimestampRestrictPolicy, self).__init__(var)
    scope = variable_scope.get_variable_scope()
    if scope.name:
      tstp_scope = scope.name + '/status'
    else:
      tstp_scope = 'status'
    tstp_name = self.var.name + '/timestamp'

    with ops.name_scope(tstp_scope, 'status', []) as unique_scope:
      if unique_scope:
        full_name = unique_scope + tstp_name
      else:
        full_name = tstp_name

      self.tstp_var = de.get_variable(
          full_name,
          key_dtype=self.var.key_dtype,
          value_dtype=dtypes.int32,
          dim=1,
          devices=self.var.devices,
          partitioner=self.var.partition_fn,
          trainable=False,
          init_size=self.var.init_size,
      )
示例#12
0
  def __init__(self, var):
    """
    A frequency status sparse variable is created. The frequency status has
    same key_dtype as the target variable and value_dtype in `int32`, which
    indicates the occurrence times of the feature.

    Args:
      var: A `dynamic_embedding.Variable` object to be restricted.
    """
    super(FrequencyRestrictPolicy, self).__init__(var)
    self.init_count = constant_op.constant(0, dtypes.int32)

    scope = variable_scope.get_variable_scope()
    if scope.name:
      freq_scope = scope.name + '/status'
    else:
      freq_scope = 'status'
    freq_name = self.var.name + '/frequency'

    with ops.name_scope(freq_scope, 'status', []) as unique_scope:
      if unique_scope:
        full_name = unique_scope + freq_name
      else:
        full_name = freq_name

      self.freq_var = de.get_variable(
          full_name,
          key_dtype=self.var.key_dtype,
          value_dtype=dtypes.int32,
          dim=1,
          devices=self.var.devices,
          partitioner=self.var.partition_fn,
          trainable=False,
          init_size=self.var.init_size,
      )
  def test_embedding_lookup(self):
    if not context.executing_eagerly():
      self.skipTest('Only test in eager mode.')

    params = de.get_variable('pn012', dim=2, initializer=0.1)
    params.upsert(
        constant_op.constant([1, 2, 3], dtype=dtypes.int64),
        constant_op.constant([[1., 1.], [2., 2.], [3., 3.]],
                             dtype=dtypes.float32))
    shadow = de.shadow_ops.ShadowVariable(params)

    ids = constant_op.constant([2, 3, 4], dtype=dtypes.int64)
    val = de.shadow_ops.embedding_lookup(shadow, ids)
    self.assertAllEqual(
        val,
        constant_op.constant([[2., 2.], [3., 3.], [0.1, 0.1]],
                             dtype=dtypes.float32))

    params.upsert(
        constant_op.constant([1, 2, 3], dtype=dtypes.int64),
        constant_op.constant([[1.1, 1.1], [2.2, 2.2], [3.3, 3.3]],
                             dtype=dtypes.float32))
    val = de.shadow_ops.embedding_lookup(shadow, ids)
    self.assertAllEqual(
        val,
        constant_op.constant([[2.2, 2.2], [3.3, 3.3], [0.1, 0.1]],
                             dtype=dtypes.float32))
  def test_create(self):
    if not context.executing_eagerly():
      self.skipTest('Only test in eager mode.')

    key_dtypes = [dtypes.int32, dtypes.int64]
    value_dtypes = [dtypes.int32, dtypes.float32, dtypes.float64]
    dims = [1, 4]
    trainable_options = [True, False]
    devices = ['/CPU:0']
    var_list = []
    rnd = 0
    for comb in itertools.product(key_dtypes, value_dtypes, dims,
                                  trainable_options):
      devar = de.get_variable('sparse_domain-' + str(rnd),
                              key_dtype=comb[0],
                              value_dtype=comb[1],
                              dim=comb[2],
                              initializer=0.1,
                              devices=devices,
                              init_size=1)
      name = 'shadow-' + str(rnd)
      var = de.shadow_ops.ShadowVariable(devar, name=name, trainable=comb[3])
      self.assertEqual(var.dtype, devar.value_dtype)
      self.assertEqual(var.ids.dtype, devar.key_dtype)
      rnd += 1
示例#15
0
  def test_sharded_custom_partitioner_int32_ids(self):

    def _partition_fn(keys, shard_num):
      return math_ops.cast(keys % 2, dtype=dtypes.int32)

    embeddings = de.get_variable(
        "t330",
        dtypes.int64,
        dtypes.float32,
        partitioner=_partition_fn,
        devices=_get_devices() * 3,
        initializer=2.0,
    )

    ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64)
    vals = constant_op.constant([[0.0], [1.0], [2.0], [3.0], [4.0]],
                                dtype=dtypes.float32)
    ids_test = constant_op.constant([1, 3, 2, 3, 0], dtype=dtypes.int64)
    embedding = de.embedding_lookup(embeddings, ids_test)
    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      self.evaluate(embeddings.upsert(ids, vals))
      self.assertAllClose(embedding.eval(), [[1.0], [3.0], [2.0], [3.0], [0.0]])
      self.assertAllEqual([5, 1], embedding.eval().shape)
      self.assertAllEqual(3, embeddings.size(0).eval())
      self.assertAllEqual(2, embeddings.size(1).eval())
      self.assertAllEqual(0, embeddings.size(2).eval())
    def commonly_apply_update_verify(self):
        first_inputs = np.array(range(3), dtype=np.int64)
        second_inputs = np.array(range(1, 4), dtype=np.int64)
        overdue_features = np.array([0, 3], dtype=np.int64)
        updated_features = np.array(range(1, 3), dtype=np.int64)
        with session.Session(config=default_config) as sess:
            ids = array_ops.placeholder(dtypes.int64)
            var = de.get_variable('sp_var',
                                  key_dtype=ids.dtype,
                                  value_dtype=dtypes.float32,
                                  initializer=-0.1,
                                  dim=2)
            embed_w, trainable = de.embedding_lookup(var,
                                                     ids,
                                                     return_trainable=True,
                                                     name='pl3201')
            policy = de.FrequencyRestrictPolicy(var)
            update_op = policy.apply_update(ids)

            self.assertAllEqual(sess.run(policy.status.size()), 0)
            sess.run(update_op, feed_dict={ids: first_inputs})
            self.assertAllEqual(sess.run(policy.status.size()), 3)
            time.sleep(1)
            sess.run(update_op, feed_dict={ids: second_inputs})
            self.assertAllEqual(sess.run(policy.status.size()), 4)

            keys, freq = sess.run(policy.status.export())
            kvs = sorted(dict(zip(keys, freq)).items())
            freq = np.array([x[1] for x in kvs])
            for x in freq[overdue_features]:
                for y in freq[updated_features]:
                    self.assertLess(x, y)
示例#17
0
  def test_dynamic_embedding_variable_invalid_shape(self):
    with self.session(config=default_config,
                      use_gpu=test_util.is_gpu_available()):
      default_val = constant_op.constant([-1, -1], dtypes.int64)
      keys = constant_op.constant([0, 1, 2], dtypes.int64)
      table = de.get_variable("t110",
                              dtypes.int64,
                              dtypes.int32,
                              initializer=default_val,
                              dim=2)

      # Shape [6] instead of [3, 2]
      values = constant_op.constant([0, 1, 2, 3, 4, 5], dtypes.int32)
      with self.assertRaisesOpError("Expected shape"):
        self.evaluate(table.upsert(keys, values))

      # Shape [2,3] instead of [3, 2]
      values = constant_op.constant([[0, 1, 2], [3, 4, 5]], dtypes.int32)
      with self.assertRaisesOpError("Expected shape"):
        self.evaluate(table.upsert(keys, values))

      # Shape [2, 2] instead of [3, 2]
      values = constant_op.constant([[0, 1], [2, 3]], dtypes.int32)
      with self.assertRaisesOpError("Expected shape"):
        self.evaluate(table.upsert(keys, values))

      # Shape [3, 1] instead of [3, 2]
      values = constant_op.constant([[0], [2], [4]], dtypes.int32)
      with self.assertRaisesOpError("Expected shape"):
        self.evaluate(table.upsert(keys, values))

      # Valid Insert
      values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int32)
      self.evaluate(table.upsert(keys, values))
      self.assertAllEqual(3, self.evaluate(table.size()))
    def commonly_apply_update_verify_v2(self):
        if not context.executing_eagerly():
            self.skipTest('Skip graph mode test.')

        first_inputs = np.array(range(6), dtype=np.int64)
        second_inputs = np.array(range(3, 9), dtype=np.int64)
        overdue_features = np.array([0, 1, 2, 6, 7, 8], dtype=np.int64)
        updated_features = np.array(range(3, 6), dtype=np.int64)
        all_features = np.array(range(9), dtype=np.int64)

        with self.session(config=default_config):
            var = de.get_variable('sp_var',
                                  key_dtype=dtypes.int64,
                                  value_dtype=dtypes.float32,
                                  initializer=-0.1,
                                  dim=2)
            embed_w, trainable = de.embedding_lookup(var,
                                                     first_inputs,
                                                     return_trainable=True,
                                                     name='vc3939')
            policy = de.FrequencyRestrictPolicy(var)

            self.assertAllEqual(policy.status.size(), 0)
            policy.apply_update(first_inputs)
            self.assertAllEqual(policy.status.size(), len(first_inputs))
            time.sleep(1)
            policy.apply_update(second_inputs)
            self.assertAllEqual(policy.status.size(), len(all_features))

            keys, freq = policy.status.export()
            kvs = sorted(dict(zip(keys.numpy(), freq.numpy())).items())
            freq = np.array([x[1] for x in kvs])
            for x in freq[overdue_features]:
                for y in freq[updated_features]:
                    self.assertLess(x, y)
def _random_weights(
    key_dtype=dtypes.int64,
    value_dtype=dtypes.float32,
    vocab_size=4,
    embed_dim=4,
    num_shards=1,
):
  assert vocab_size > 0
  assert embed_dim > 0
  assert num_shards > 0
  assert num_shards <= vocab_size

  initializer = init_ops.truncated_normal_initializer(mean=0.0,
                                                      stddev=1.0 /
                                                      math.sqrt(vocab_size),
                                                      dtype=dtypes.float32)
  embedding_weights = de.get_variable(
      key_dtype=key_dtype,
      value_dtype=value_dtype,
      devices=_get_devices() * num_shards,
      name="embedding_weights",
      initializer=initializer,
      dim=embed_dim,
  )
  return embedding_weights
示例#20
0
  def test_static_shape_checking(self):
    np.random.seed(8)
    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      for dim in [1, 10]:
        for ids_shape in [[3, 2], [4, 3], [4, 3, 10]]:
          with variable_scope.variable_scope(
              "test_static_shape_checking" + str(dim),
              reuse=variable_scope.AUTO_REUSE,
          ):
            params = de.get_variable(
                "test_static_shape_checking-" + str(dim),
                dtypes.int64,
                dtypes.float32,
                initializer=2.0,
                dim=dim,
            )
            params_nn = variable_scope.get_variable("n",
                                                    shape=[100, dim],
                                                    use_resource=False)
            ids = np.random.randint(2**31,
                                    size=np.prod(ids_shape),
                                    dtype=np.int).reshape(ids_shape)
            ids = constant_op.constant(ids, dtype=dtypes.int64)

            embedding_test = de.embedding_lookup(params, ids)
            embedding_base = embedding_ops.embedding_lookup(params_nn, ids)
            self.assertAllEqual(embedding_test.shape, embedding_base.shape)
示例#21
0
  def test_sharded_multi_lookup_on_one_variable(self):
    embeddings = de.get_variable(
        "t340",
        dtypes.int64,
        dtypes.float32,
        devices=_get_devices() * 3,
        initializer=2.0,
    )

    ids = constant_op.constant([0, 1, 2, 3, 4], dtype=dtypes.int64)
    vals = constant_op.constant([[0.0], [1.0], [2.0], [3.0], [4.0]],
                                dtype=dtypes.float32)
    new_vals = constant_op.constant([[10.0], [11.0], [12.0], [13.0], [14.0]],
                                    dtype=dtypes.float32)

    ids0 = constant_op.constant([1, 3, 2], dtype=dtypes.int64)
    ids1 = constant_op.constant([3, 4], dtype=dtypes.int64)

    embedding0 = de.embedding_lookup(embeddings, ids0)
    embedding1 = de.embedding_lookup(embeddings, ids1)

    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      self.evaluate(embeddings.upsert(ids, vals))
      self.assertAllClose(embedding0.eval(), [[1.0], [3.0], [2.0]])
      self.assertAllEqual([3, 1], embedding0.eval().shape)
      self.assertAllClose(embedding1.eval(), [[3.0], [4.0]])
      self.assertAllEqual([2, 1], embedding1.eval().shape)
      self.evaluate(embeddings.upsert(ids, new_vals))
      self.assertAllClose(embedding1.eval(), [[13.0], [14.0]])
      self.assertAllEqual([2, 1], embedding1.eval().shape)
    def commonly_apply_restriction_verify(self, optimizer):
        first_inputs = np.array(range(6), dtype=np.int64)
        second_inputs = np.array(range(4, 9), dtype=np.int64)
        overdue_features = np.array(range(4), dtype=np.int64)
        updated_features = np.array(range(4, 9), dtype=np.int64)
        all_input_features = np.array(range(9), dtype=np.int64)
        embedding_dim = 2
        oversize_trigger = 100
        optimizer = de.DynamicEmbeddingOptimizer(optimizer)

        with session.Session(config=default_config) as sess:
            ids = array_ops.placeholder(dtypes.int64)
            var = de.get_variable('sp_var',
                                  key_dtype=ids.dtype,
                                  value_dtype=dtypes.float32,
                                  initializer=-0.1,
                                  dim=embedding_dim,
                                  restrict_policy=de.TimestampRestrictPolicy)
            embed_w, trainable = de.embedding_lookup(var,
                                                     ids,
                                                     return_trainable=True,
                                                     name='ut8900')
            loss = _simple_loss(embed_w)
            train_op = optimizer.minimize(loss, var_list=[trainable])

            slot_params = [
                optimizer.get_slot(trainable, name).params
                for name in optimizer.get_slot_names()
            ]
            all_vars = [var] + slot_params + [var.restrict_policy.status]

            sess.run(variables.global_variables_initializer())

            sess.run([train_op], feed_dict={ids: first_inputs})
            time.sleep(1)
            sess.run([train_op], feed_dict={ids: second_inputs})
            for v in all_vars:
                self.assertAllEqual(sess.run(v.size()), 9)
            keys, tstp = sess.run(var.restrict_policy.status.export())
            kvs = sorted(dict(zip(keys, tstp)).items())
            tstp = np.array([x[1] for x in kvs])
            for x in tstp[overdue_features]:
                for y in tstp[updated_features]:
                    self.assertLess(x, y)

            sess.run(
                var.restrict_policy.apply_restriction(
                    len(updated_features), trigger=oversize_trigger))
            for v in all_vars:
                self.assertAllEqual(sess.run(v.size()),
                                    len(all_input_features))

            sess.run(
                var.restrict_policy.apply_restriction(
                    len(updated_features), trigger=len(updated_features)))
            for v in all_vars:
                self.assertAllEqual(sess.run(v.size()), len(updated_features))
            keys, _ = sess.run(var.export())
            keys_sorted = np.sort(keys)
            self.assertAllEqual(keys_sorted, updated_features)
  def test_training_with_distributed_strategy(self):
    # TODO(Lifann) Servers will be alive and thus make other test cases
    # across the cases failed. So this case is kept only for demonstration.
    self.skipTest('Only for demonstration.')

    if not context.executing_eagerly():
      self.skipTest('Only test in eager mode.')

    cluster_spec = tf.train.ClusterSpec({
        'ps': ['localhost:2220', 'localhost:2221'],
        'worker': ['localhost:2222', 'localhost:2223']
    })
    ps_list, worker_list = _create_ps_and_worker_servers(cluster_spec)

    resolver = tf.distribute.cluster_resolver.SimpleClusterResolver(
        cluster_spec)
    strategy = tf.distribute.experimental.ParameterServerStrategy(resolver)
    coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator(
        strategy)
    with strategy.scope() as scope:
      var = de.get_variable('pf988',
                            dim=2,
                            initializer=0.1,
                            devices=['/job:ps/task:0', '/job:ps/task:1'])
      shadow_var = de.shadow_ops.ShadowVariable(var,
                                                name='pf988-shadow',
                                                distribute_strategy=strategy)
      optimizer = optimizer_v2.adam.Adam(1E-4)
      optimizer = de.DynamicEmbeddingOptimizer(optimizer)

    def dist_dataset_fn():
      dataset_values = np.arange(0, 10, dtype=np.int64)
      fn = lambda x: tf.data.Dataset.from_tensor_slices(dataset_values).batch(
          4).repeat(None)
      return strategy.distribute_datasets_from_function(fn)

    dataset = coordinator.create_per_worker_dataset(dist_dataset_fn)

    @tf.function
    def step_fn(iterator):

      def replica_fn(ids):

        def loss_fn(ids):
          batch_size = tf.shape(ids)[0]
          emb = de.shadow_ops.embedding_lookup(shadow_var, ids)
          loss = tf.reduce_mean(emb)
          return loss

        optimizer.minimize(lambda: loss_fn(ids), [shadow_var])

      return strategy.run(replica_fn, args=(next(iterator),))

    iterator = iter(dataset)
    for i in range(5):
      coordinator.schedule(step_fn, args=(iterator,))
    coordinator.join()
    self.assertAllEqual(var.size(), 10)
    def _test_warm_start_estimator(self, num_shards, use_regex):
        devices = ["/cpu:0" for _ in range(num_shards)]
        ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt")
        id_list = [x for x in range(100)]
        val_list = [[x] for x in range(100)]

        emb_name = "t300_{}_{}".format(num_shards, use_regex)
        with self.session(graph=ops.Graph()) as sess:
            embeddings = de.get_variable(emb_name,
                                         dtypes.int64,
                                         dtypes.float32,
                                         devices=devices,
                                         initializer=0.0)
            ids = constant_op.constant(id_list, dtype=dtypes.int64)
            vals = constant_op.constant(val_list, dtype=dtypes.float32)
            self.evaluate(embeddings.upsert(ids, vals))
            save = saver.Saver(var_list=[embeddings])
            save.save(sess, ckpt_prefix)

        def _input_fn():
            dataset = tf.data.Dataset.from_tensor_slices({
                'ids':
                constant_op.constant([[x] for x in id_list],
                                     dtype=dtypes.int64)
            })
            return dataset

        def _model_fn(features, labels, mode, params):
            ids = features['ids']
            embeddings = de.get_variable(emb_name,
                                         dtypes.int64,
                                         dtypes.float32,
                                         devices=devices,
                                         initializer=0.0)
            emb = de.embedding_lookup(embeddings, ids, name="lookup")
            emb.graph.add_to_collection(
                de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES, embeddings)
            vars_to_warm_start = [embeddings]
            if use_regex:
                vars_to_warm_start = [".*t300.*"]

            warm_start_hook = de.WarmStartHook(
                ckpt_to_initialize_from=ckpt_prefix,
                vars_to_warm_start=vars_to_warm_start)
            return tf.estimator.EstimatorSpec(
                mode=tf.estimator.ModeKeys.PREDICT,
                predictions=emb,
                prediction_hooks=[warm_start_hook])

        predictor = tf.estimator.Estimator(model_fn=_model_fn)
        predictions = predictor.predict(_input_fn)
        pred_vals = []
        for pred in predictions:
            pred_vals.append(pred)
        self.assertAllEqual(pred_vals, val_list)
  def test_get_size(self):
    if not context.executing_eagerly():
      self.skipTest('Only test in eager mode.')

    params = de.get_variable('pn012', dim=2, initializer=0.1)
    params.upsert(
        constant_op.constant([1, 2, 3], dtype=dtypes.int64),
        constant_op.constant([[1., 1.], [2., 2.], [3., 3.]],
                             dtype=dtypes.float32))
    shadow = de.shadow_ops.ShadowVariable(params)
    self.assertEqual(shadow.size(), 3)
示例#26
0
  def test_max_norm(self):
    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      embeddings = de.get_variable("t310",
                                   dtypes.int64,
                                   dtypes.float32,
                                   initializer=2.0)

      ids = constant_op.constant([0], dtype=dtypes.int64)
      embedding = de.embedding_lookup(embeddings, ids, max_norm=1.0)
      self.assertAllEqual(embedding.eval(), [[1.0]])
    def _test_warm_start_rename(self, num_shards, use_regex):
        devices = ["/cpu:0" for _ in range(num_shards)]
        ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt")
        id_list = [x for x in range(100)]
        val_list = [[x] for x in range(100)]

        emb_name = "t200_{}_{}".format(num_shards, use_regex)
        with self.session(graph=ops.Graph()) as sess:
            embeddings = de.get_variable("save_{}".format(emb_name),
                                         dtypes.int64,
                                         dtypes.float32,
                                         devices=devices,
                                         initializer=0.0)
            ids = constant_op.constant(id_list, dtype=dtypes.int64)
            vals = constant_op.constant(val_list, dtype=dtypes.float32)
            self.evaluate(embeddings.upsert(ids, vals))
            save = saver.Saver(var_list=[embeddings])
            save.save(sess, ckpt_prefix)

        with self.session(graph=ops.Graph()) as sess:
            embeddings = de.get_variable("restore_{}".format(emb_name),
                                         dtypes.int64,
                                         dtypes.float32,
                                         devices=devices,
                                         initializer=0.0)
            ids = constant_op.constant(id_list, dtype=dtypes.int64)
            emb = de.embedding_lookup(embeddings, ids, name="lookup")
            sess.graph.add_to_collection(
                de.GraphKeys.DYNAMIC_EMBEDDING_VARIABLES, embeddings)
            vars_to_warm_start = [embeddings]
            if use_regex:
                vars_to_warm_start = [".*t200.*"]

            restore_op = de.warm_start(ckpt_to_initialize_from=ckpt_prefix,
                                       vars_to_warm_start=vars_to_warm_start,
                                       var_name_to_prev_var_name={
                                           "restore_{}".format(emb_name):
                                           "save_{}".format(emb_name)
                                       })
            self.evaluate(restore_op)
            self.assertAllEqual(emb, val_list)
    def test_dynamic_shape_checking(self):
        np.random.seed(8)
        with self.session(use_gpu=test_util.is_gpu_available(),
                          config=default_config):
            for dim in [1, 10]:
                for ids_shape in [None, [-1, 1], [1, -1, 1], [-1, 1, 1]]:
                    with variable_scope.variable_scope(
                            "test_static_shape_checking" + str(dim),
                            reuse=variable_scope.AUTO_REUSE,
                    ):
                        params = de.get_variable(
                            "test_static_shape_checking-" + str(dim),
                            dtypes.int64,
                            dtypes.float32,
                            initializer=2.0,
                            dim=dim,
                        )
                        params_nn = variable_scope.get_variable(
                            "n", shape=[100, dim], use_resource=False)
                        ids = script_ops.py_func(
                            _create_dynamic_shape_tensor(min_val=0,
                                                         max_val=100),
                            inp=[],
                            Tout=dtypes.int64,
                            stateful=True,
                        )
                        if ids_shape is not None:
                            ids = array_ops.reshape(ids, ids_shape)

                        embedding_test = de.embedding_lookup(params, ids)
                        embedding_base = embedding_ops.embedding_lookup(
                            params_nn, ids)

                        # check static shape
                        if ids_shape is None:
                            # ids with unknown shape
                            self.assertTrue(
                                embedding_test.shape == embedding_base.shape)
                        else:
                            # ids with no fully-defined shape.
                            self.assertAllEqual(
                                embedding_test.shape.as_list(),
                                embedding_base.shape.as_list(),
                            )

                        self.evaluate(variables.global_variables_initializer())

                        # check static shape
                        for _ in range(10):
                            embedding_test_val, embedding_base_val = self.evaluate(
                                [embedding_test, embedding_base])
                            self.assertAllEqual(embedding_test_val.shape,
                                                embedding_base_val.shape)
示例#29
0
  def test_embedding_lookup_shape(self):

    def _evaluate(tensors, feed_dict):
      sess = ops.get_default_session()
      if sess is None:
        with self.test_session() as sess:
          return sess.run(tensors, feed_dict=feed_dict)
      else:
        return sess.run(tensors, feed_dict=feed_dict)

    with self.session(use_gpu=test_util.is_gpu_available(),
                      config=default_config):
      default_val = -1

      keys = constant_op.constant([0, 1, 2], dtypes.int64)
      values = constant_op.constant([[0, 0, 0], [1, 1, 1], [2, 2, 2]],
                                    dtypes.int32)
      table = de.get_variable("t140",
                              dtypes.int64,
                              dtypes.int32,
                              dim=3,
                              initializer=default_val)
      self.evaluate(table.upsert(keys, values))
      self.assertAllEqual(3, self.evaluate(table.size()))

      # shape of ids is fully defined
      ids = constant_op.constant([[0, 1], [2, 4]], dtypes.int64)
      embeddings = de.embedding_lookup(table, ids)
      self.assertAllEqual([2, 2, 3], embeddings.get_shape())
      re = self.evaluate(embeddings)
      self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]],
                          re)

      # shape of ids is partially defined
      ids = gen_array_ops.placeholder(shape=(2, None), dtype=dtypes.int64)
      embeddings = de.embedding_lookup(table, ids)
      self.assertFalse(embeddings.get_shape().is_fully_defined())
      re = _evaluate(
          embeddings,
          feed_dict={ids: np.asarray([[0, 1], [2, 4]], dtype=np.int64)})
      self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]],
                          re)

      # shape of ids is unknown
      ids = gen_array_ops.placeholder(dtype=dtypes.int64)
      embeddings = de.embedding_lookup(table, ids)
      self.assertEqual(embeddings.get_shape(), tensor_shape.unknown_shape())
      re = _evaluate(
          embeddings,
          feed_dict={ids: np.asarray([[0, 1], [2, 4]], dtype=np.int64)})
      self.assertAllEqual([[[0, 0, 0], [1, 1, 1]], [[2, 2, 2], [-1, -1, -1]]],
                          re)
    def test_dynamic_embedding_variable_export_insert(self):
        with self.session(config=default_config,
                          use_gpu=test_util.is_gpu_available()):
            default_val = constant_op.constant([-1, -1], dtypes.int64)
            keys = constant_op.constant([0, 1, 2], dtypes.int64)
            values = constant_op.constant([[0, 1], [2, 3], [4, 5]],
                                          dtypes.int32)
            table1 = de.get_variable("t101",
                                     dtypes.int64,
                                     dtypes.int32,
                                     initializer=default_val,
                                     dim=2)
            self.assertAllEqual(0, self.evaluate(table1.size()))
            self.evaluate(table1.upsert(keys, values))
            self.assertAllEqual(3, self.evaluate(table1.size()))

            input_keys = constant_op.constant([0, 1, 3], dtypes.int64)
            expected_output = [[0, 1], [2, 3], [-1, -1]]
            output1 = table1.lookup(input_keys)
            self.assertAllEqual(expected_output, self.evaluate(output1))

            exported_keys, exported_values = table1.export()
            self.assertAllEqual(3, self.evaluate(exported_keys).size)
            self.assertAllEqual(6, self.evaluate(exported_values).size)

            # Populate a second table from the exported data
            table2 = de.get_variable("t102",
                                     dtypes.int64,
                                     dtypes.int32,
                                     initializer=default_val,
                                     dim=2)
            self.assertAllEqual(0, self.evaluate(table2.size()))
            self.evaluate(table2.upsert(exported_keys, exported_values))
            self.assertAllEqual(3, self.evaluate(table2.size()))

            # Verify lookup result is still the same
            output2 = table2.lookup(input_keys)
            self.assertAllEqual(expected_output, self.evaluate(output2))