Exemplo n.º 1
0
  def testIncompatibleShapes(self):
    with self.test_session():
      x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32)
      sp_ids = sparse_tensor.SparseTensor(
          constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64),
          constant_op.constant([0, 1, 2], dtypes.int32),
          constant_op.constant([2, 2], dtypes.int64))
      sp_weights = sparse_tensor.SparseTensor(
          constant_op.constant([[0, 0], [0, 1]], dtypes.int64),
          constant_op.constant([12.0, 5.0], dtypes.float32),
          constant_op.constant([1, 2], dtypes.int64))

      with self.assertRaises(ValueError):
        embedding_ops.embedding_lookup_sparse(
            x, sp_ids, sp_weights, combiner="mean")
Exemplo n.º 2
0
  def testIncompatibleShapes(self):
    with self.cached_session():
      x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32)
      sp_ids = sparse_tensor.SparseTensor(
          constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64),
          constant_op.constant([0, 1, 2], dtypes.int32),
          constant_op.constant([2, 2], dtypes.int64))
      sp_weights = sparse_tensor.SparseTensor(
          constant_op.constant([[0, 0], [0, 1]], dtypes.int64),
          constant_op.constant([12.0, 5.0], dtypes.float32),
          constant_op.constant([1, 2], dtypes.int64))

      with self.assertRaises(ValueError):
        embedding_ops.embedding_lookup_sparse(
            x, sp_ids, sp_weights, combiner="mean")
Exemplo n.º 3
0
  def testGradientsEmbeddingLookupSparse(self):
    vocab_size = 12
    batch_size = 4
    param_shape = [2, 3]
    sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights(
        batch_size, vocab_size))

    for num_shards, combiner, dtype, ignore_weights in itertools.product(
        [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32,
                                           dtypes.float64], [True, False]):
      with self.test_session():
        x, params, _ = _EmbeddingParams(
            num_shards, vocab_size, shape=param_shape, dtype=dtype)

        y = embedding_ops.embedding_lookup_sparse(
            x,
            sp_ids,
            None if ignore_weights else sp_weights,
            combiner=combiner)
        x_name = [_PName(i) for i in range(num_shards)]
        x_init_value = [params[x_n + ":0"] for x_n in x_name]
        x_shape = [i.shape for i in x_init_value]
        y_shape = [batch_size] + list(params[_PName(0) + ":0"].shape[1:])
        err = gradient_checker.compute_gradient_error(
            x, x_shape, y, y_shape, x_init_value=x_init_value)
      self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
 def loss_fn(emb):
   embedding = embedding_ops.embedding_lookup_sparse(emb,
                                                     sp_ids,
                                                     None,
                                                     combiner='sum')
   pred = math_ops.matmul(embedding, x)
   return pred * pred
Exemplo n.º 5
0
  def testGradientsEmbeddingLookupSparse(self):
    vocab_size = 12
    batch_size = 4
    param_shape = [2, 3]
    sp_ids, sp_weights, _, _, _ = (self._RandomIdsAndWeights(
        batch_size, vocab_size))

    for num_shards, combiner, dtype, ignore_weights in itertools.product(
        [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32,
                                           dtypes.float64], [True, False]):
      with self.cached_session():
        x, params, _ = _EmbeddingParams(
            num_shards, vocab_size, shape=param_shape, dtype=dtype)

        y = embedding_ops.embedding_lookup_sparse(
            x,
            sp_ids,
            None if ignore_weights else sp_weights,
            combiner=combiner)
        x_name = [_PName(i) for i in range(num_shards)]
        x_init_value = [params[x_n + ":0"] for x_n in x_name]
        x_shape = [i.shape for i in x_init_value]
        y_shape = [batch_size] + list(params[_PName(0) + ":0"].shape[1:])
        err = gradient_checker.compute_gradient_error(
            x, x_shape, y, y_shape, x_init_value=x_init_value)
      self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
Exemplo n.º 6
0
  def testEmbeddingLookupSparse(self):
    vocab_size = 13
    batch_size = 10
    param_shape = [2, 5]
    expected_lookup_result_shape = [None] + param_shape

    sp_ids, sp_weights, ids, weights, vals_per_batch_entry = (
        self._RandomIdsAndWeights(batch_size, vocab_size))

    grouped_ids = self._GroupByBatchEntry(ids, vals_per_batch_entry)
    grouped_weights = self._GroupByBatchEntry(weights, vals_per_batch_entry)
    grouped_ignored_weights = self._GroupByBatchEntry(
        np.ones(np.sum(vals_per_batch_entry)), vals_per_batch_entry)

    for num_shards, combiner, dtype, ignore_weights in itertools.product(
        [1, 5], ["sum", "mean", "sqrtn"],
        [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64],
        [True, False]):

      with self.cached_session():
        p, params, feed_dict = _EmbeddingParams(
            num_shards, vocab_size, shape=param_shape, dtype=dtype)
        embedding_sum = embedding_ops.embedding_lookup_sparse(
            p,
            sp_ids,
            None if ignore_weights else sp_weights,
            combiner=combiner)

        self.assertEqual(embedding_sum.get_shape().as_list(),
                         expected_lookup_result_shape)
        if dtype in (dtypes.float16, dtypes.bfloat16):
          self.assertEqual(embedding_sum.dtype, dtypes.float32)
        else:
          self.assertEqual(embedding_sum.dtype, dtype)

        tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict)

        np_embedding_sum, np_weight_sum, np_weight_sq_sum = _EmbeddingResult(
            params,
            grouped_ids,
            num_shards,
            vocab_size,
            weight_vals=grouped_ignored_weights
            if ignore_weights else grouped_weights)
        if combiner == "mean":
          np_embedding_sum /= np.reshape(np_weight_sum, (batch_size, 1, 1))
        if combiner == "sqrtn":
          np_embedding_sum /= np.reshape(
              np.sqrt(np_weight_sq_sum), (batch_size, 1, 1))

        rtol = 1e-6
        if dtype == dtypes.bfloat16:
          rtol = 1e-2
        elif dtype == dtypes.float16:
          rtol = 1e-3
        atol = rtol
        self.assertAllClose(np_embedding_sum, tf_embedding_sum, rtol, atol)
    def test_embedding_lookup_sparse_shape_checking(self):
        with self.session(use_gpu=test_util.is_gpu_available(),
                          config=default_config):
            embed_dim = 4
            embedding_weights_nn = variable_scope.get_variable(
                "n", shape=[100, embed_dim], use_resource=False)
            embedding_weights_de = _random_weights(embed_dim=4)
            sparse_ids, _ = ids_and_weights_3d(embed_dim=embed_dim)

            embedding_lookup_base = embedding_ops.embedding_lookup_sparse(
                embedding_weights_nn, sparse_ids, None)
            embedding_lookup_test = de.embedding_lookup_sparse(
                embedding_weights_de, sparse_ids, None)
            self.assertTrue(embedding_lookup_base.get_shape().as_list() ==
                            embedding_lookup_test.get_shape().as_list())
Exemplo n.º 8
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner=None,
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div",
                                 max_norm=None):
  """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.  Alternatively, a `PartitionedVariable`,
        created by partitioning along dimension 0.  The total unpartitioned
        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
        vocab size and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
        ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not None, all embeddings are l2-normalized to max_norm before
        combining.


  Returns:
    Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if embedding_weights is None:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)  # get underlying Variables.
  if not isinstance(embedding_weights, list):
    embedding_weights = [embedding_weights]
  if len(embedding_weights) < 1:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)

  dtype = sparse_weights.dtype if sparse_weights is not None else None
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)
  embedding_weights = [
      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
  ]

  contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                              [sparse_weights])

  with ops.name_scope(name, "embedding_lookup",
                      embedding_weights + [sparse_ids,
                                           sparse_weights]) as scope:
    # Reshape higher-rank sparse ids and weights to linear segment ids.
    original_shape = sparse_ids.dense_shape
    original_rank_dim = sparse_ids.dense_shape.get_shape()[0]
    original_rank = (
        array_ops.size(original_shape)
        if original_rank_dim.value is None
        else original_rank_dim.value)
    sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
        math_ops.reduce_prod(
            array_ops.slice(original_shape, [0], [original_rank - 1])),
        array_ops.gather(original_shape, original_rank - 1)])
    if sparse_weights is not None:
      sparse_weights = sparse_tensor.SparseTensor(
          sparse_ids.indices,
          sparse_weights.values, sparse_ids.dense_shape)

    # Prune invalid ids and weights.
    sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights)

    # Fill in dummy values for empty features, if necessary.
    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids,
                                                                 default_id or
                                                                 0)
    if sparse_weights is not None:
      sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0)

    result = embedding_ops.embedding_lookup_sparse(
        embedding_weights,
        sparse_ids,
        sparse_weights,
        combiner=combiner,
        partition_strategy=partition_strategy,
        name=None if default_id is None else scope,
        max_norm=max_norm)

    if default_id is None:
      # Broadcast is_row_empty to the same shape as embedding_lookup_result,
      # for use in Select.
      is_row_empty = array_ops.tile(
          array_ops.reshape(is_row_empty, [-1, 1]),
          array_ops.stack([1, array_ops.shape(result)[1]]))

      result = array_ops.where(is_row_empty,
                               array_ops.zeros_like(result),
                               result,
                               name=scope)

    # Reshape back from linear ids back into higher-dimensional dense result.
    final_result = array_ops.reshape(
        result,
        array_ops.concat([
            array_ops.slice(
                math_ops.cast(original_shape, dtypes.int32), [0],
                [original_rank - 1]),
            array_ops.slice(array_ops.shape(result), [1], [-1])
        ], 0))
    final_result.set_shape(tensor_shape.unknown_shape(
        (original_rank_dim - 1).value).concatenate(result.get_shape()[1:]))
    return final_result
    def common_minimize_trainable(self, base_opt, test_opt, name):
        base_opt = de.DynamicEmbeddingOptimizer(base_opt)
        test_opt = de.DynamicEmbeddingOptimizer(test_opt)
        id = 0
        config = config_pb2.ConfigProto()
        config.allow_soft_placement = False
        for (
                num_shards,
                k_dtype,
                d_dtype,
                initial_mode,
                dim,
                run_step,
        ) in itertools.product(
            [1, 2],
            [dtypes.int64],
            [
                dtypes.float32,
            ],
            [
                "constant",
            ],
            [1, 10],
            [10],
        ):
            id += 1
            raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
            raw_init_vals = [
                [
                    x,
                ] * dim for x in
                [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81]
            ]

            raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype)
            sp_ids = sparse_tensor.SparseTensor(
                indices=[
                    [0, 0],
                    [0, 1],
                    [1, 0],
                    [2, 1],
                ],
                values=raw_ids,
                dense_shape=[3, 2],
            )
            x = constant_op.constant([[_x * dim]
                                      for _x in [[0.4], [0.5], [0.6]]],
                                     dtype=d_dtype)

            x = array_ops.reshape(x, shape=(3 * dim, 1))
            # base branch
            with self.session(use_gpu=test_util.is_gpu_available(),
                              config=default_config) as sess:
                base_var = variables.Variable(
                    np.array(raw_init_vals).reshape([len(raw_init_ids), dim]),
                    dtype=d_dtype,
                    shape=[len(raw_init_ids), dim],
                )
                base_embedding = embedding_ops.embedding_lookup_sparse(
                    base_var, sp_ids, None, combiner="sum")
                base_embedding = array_ops.reshape(base_embedding,
                                                   shape=[1, 3 * dim])
                pred0 = math_ops.matmul(base_embedding, x)
                loss0 = pred0 * pred0

                base_opt_op = base_opt.minimize(loss0, var_list=[base_var])
                # run base
                self.evaluate(variables.global_variables_initializer())
                for _ in range(run_step):
                    sess.run(base_opt_op)

                base_var_val = self.evaluate(base_var)

            # test branch
            with self.session(config=default_config,
                              use_gpu=test_util.is_gpu_available()) as sess:
                # test var prepare
                embeddings = de.get_variable(
                    "t1030-" + name + str(id),
                    key_dtype=k_dtype,
                    value_dtype=d_dtype,
                    devices=_get_devices() * num_shards,
                    initializer=1.0,
                    dim=dim,
                )
                self.device_check(embeddings)

                init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype)
                init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype)
                init_op = embeddings.upsert(init_ids, init_vals)
                self.evaluate(init_op)

                test_var, trainable = de.embedding_lookup_sparse(
                    embeddings,
                    sp_ids,
                    sp_weights=None,
                    combiner="sum",
                    return_trainable=True,
                )

                pred1 = math_ops.matmul(
                    array_ops.reshape(test_var, shape=[1, 3 * dim]), x)
                loss1 = pred1 * pred1
                test_opt_op = test_opt.minimize(loss1, var_list=[trainable])

                self.evaluate(variables.global_variables_initializer())

                self.assertAllCloseAccordingToType(
                    np.array(raw_init_vals).reshape([len(raw_init_ids), dim]),
                    self.evaluate(base_var),
                )

                # Run `run_step` step of sgd
                for _ in range(run_step):
                    sess.run(test_opt_op)
                if test_util.is_gpu_available():
                    self.assertTrue(
                        _check_device(embeddings.tables[0].resource_handle,
                                      "GPU"))

                table_var_val = self.evaluate(
                    array_ops.reshape(embeddings.lookup(init_ids),
                                      shape=[10, dim]))
            # Validate updated params
            self.assertAllCloseAccordingToType(
                base_var_val,
                table_var_val,
                msg="Cond:{},{},{},{},{}".format(num_shards, k_dtype, d_dtype,
                                                 dim, run_step),
            )
    def common_minimize_trainable(self, base_opt, test_opt, name):
        base_opt = de.DynamicEmbeddingOptimizer(base_opt)
        test_opt = de.DynamicEmbeddingOptimizer(test_opt)
        id = 0
        for (
                num_shards,
                k_dtype,
                d_dtype,
                initial_mode,
                dim,
                run_step,
        ) in itertools.product(
            [3],
            [dtypes.int64],
            [
                dtypes.float32,
            ],
            [
                "constant",
            ],
            [1, 10],
            [10],
        ):
            with ops.Graph().as_default():
                id += 1
                raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
                raw_init_vals = [
                    [
                        x,
                    ] * dim for x in
                    [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81]
                ]
                raw_ids = constant_op.constant([1, 3, 3, 9], dtype=k_dtype)
                sp_ids = sparse_tensor.SparseTensor(
                    indices=[
                        [0, 0],
                        [0, 1],
                        [1, 0],
                        [2, 1],
                    ],
                    values=raw_ids,
                    dense_shape=[3, 2],
                )
                x = constant_op.constant([[_x * dim]
                                          for _x in [[0.4], [0.5], [0.6]]],
                                         dtype=d_dtype)
                x = array_ops.reshape(x, shape=(3 * dim, 1))
                # base var prepare
                base_var = variables.Variable(
                    np.array(raw_init_vals).reshape([len(raw_init_ids), dim]),
                    dtype=d_dtype,
                    shape=[len(raw_init_ids), dim],
                )

                # test var prepare
                embeddings = de.get_variable(
                    "t1030-" + name + str(id),
                    key_dtype=k_dtype,
                    value_dtype=d_dtype,
                    devices=_get_devices() * num_shards,
                    initializer=1.0,
                    dim=dim,
                )

                init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype)
                init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype)
                init_op = embeddings.upsert(init_ids, init_vals)

                # base branch
                base_embedding = embedding_ops.embedding_lookup_sparse(
                    base_var, sp_ids, None, combiner="sum")
                base_embedding = array_ops.reshape(base_embedding,
                                                   shape=[1, 3 * dim])
                pred0 = math_ops.matmul(base_embedding, x)
                loss0 = pred0 * pred0

                base_opt_op = base_opt.minimize(loss0, var_list=[base_var])

                # test branch
                test_var, trainable = de.embedding_lookup_sparse(
                    embeddings,
                    sp_ids,
                    sp_weights=None,
                    combiner="sum",
                    return_trainable=True,
                )

                pred1 = math_ops.matmul(
                    array_ops.reshape(test_var, shape=[1, 3 * dim]), x)
                loss1 = pred1 * pred1

                gstep = training_util.create_global_step()
                test_opt_op = test_opt.minimize(loss1,
                                                var_list=[trainable],
                                                global_step=gstep)

                table_var = array_ops.reshape(embeddings.lookup(init_ids),
                                              shape=[10, dim])

                with monitored_session.MonitoredTrainingSession(
                        is_chief=True, config=default_config) as sess:
                    sess.run(init_op)
                    self.assertAllCloseAccordingToType(
                        np.array(raw_init_vals).reshape(
                            [len(raw_init_ids), dim]),
                        sess.run(base_var),
                    )

                    # run base
                    for _ in range(run_step):
                        sess.run(base_opt_op)
                        sess.run(test_opt_op)

                    # Validate global_step
                    self.assertEqual(run_step, sess.run(gstep))

                    # Validate updated params
                    self.assertAllCloseAccordingToType(
                        sess.run(base_var),
                        sess.run(table_var),
                        msg="Cond:{},{},{},{},{}".format(
                            num_shards, k_dtype, d_dtype, dim, run_step),
                    )
                    self.device_check(embeddings)
Exemplo n.º 11
0
def safe_embedding_lookup_sparse(
    embedding_weights, sparse_ids, sparse_weights=None, combiner="mean",
    default_id=None, name=None, partition_strategy="div"):
  """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.
    sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.


  Returns:
    Dense tensor of shape `[batch_size, embed_dim]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
  if embedding_weights is None or len(embedding_weights) < 1:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)

  dtype = sparse_weights.dtype if sparse_weights else None
  embedding_weights = [
      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights]

  contrib_tensor_util.assert_same_float_dtype(
      embedding_weights + [sparse_weights])

  with ops.op_scope(
      embedding_weights + [sparse_ids, sparse_weights], name,
      "embedding_lookup") as scope:
    # Prune invalid ids and weights.
    sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights)

    # Fill in dummy values for empty features, if necessary.
    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
        sparse_ids, default_id or 0)
    if sparse_weights:
      sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
          sparse_weights, 1.0)

    result = tf_embedding_ops.embedding_lookup_sparse(
        embedding_weights, sparse_ids, sparse_weights, combiner=combiner,
        partition_strategy=partition_strategy,
        name=None if default_id is None else scope)

    if default_id is None:
      # Broadcast is_row_empty to the same shape as embedding_lookup_result,
      # for use in Select.
      is_row_empty = array_ops.tile(
          array_ops.reshape(is_row_empty, [-1, 1]),
          array_ops.pack([1, array_ops.shape(result)[1]]))

      result = math_ops.select(
          is_row_empty, array_ops.zeros_like(result), result, name=scope)

    return result
Exemplo n.º 12
0
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner="mean",
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div"):
    """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
        partitioned embedding tensors.
    sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
        float weights corresponding to `sparse_ids`, or `None` if all weights
        are be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
        the default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy.
        Currently `"div"` and `"mod"` are supported. Default is `"div"`.


  Returns:
    Dense tensor of shape `[batch_size, embed_dim]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
    if embedding_weights is None or len(embedding_weights) < 1:
        raise ValueError("Missing embedding_weights %s." % embedding_weights)

    dtype = sparse_weights.dtype if sparse_weights else None
    embedding_weights = [
        ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
    ]

    contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                                [sparse_weights])

    with ops.op_scope(embedding_weights + [sparse_ids, sparse_weights], name,
                      "embedding_lookup") as scope:
        # Prune invalid ids and weights.
        sparse_ids, sparse_weights = _prune_invalid_ids(
            sparse_ids, sparse_weights)

        # Fill in dummy values for empty features, if necessary.
        sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
            sparse_ids, default_id or 0)
        if sparse_weights:
            sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(
                sparse_weights, 1.0)

        result = tf_embedding_ops.embedding_lookup_sparse(
            embedding_weights,
            sparse_ids,
            sparse_weights,
            combiner=combiner,
            partition_strategy=partition_strategy,
            name=None if default_id is None else scope)

        if default_id is None:
            # Broadcast is_row_empty to the same shape as embedding_lookup_result,
            # for use in Select.
            is_row_empty = array_ops.tile(
                array_ops.reshape(is_row_empty, [-1, 1]),
                array_ops.pack([1, array_ops.shape(result)[1]]))

            result = math_ops.select(is_row_empty,
                                     array_ops.zeros_like(result),
                                     result,
                                     name=scope)

        return result
Exemplo n.º 13
0
    def common_minimize_trainable(self, base_opt, test_opt, name):
        if test_util.is_gpu_available():
            keys_type_list = [dtypes.int64]
        else:
            keys_type_list = [dtypes.int64, dtypes.string]
        deo.enable_train_mode()
        for run_id, num_shards, k_dtype, d_dtype, initial_mode, dim, run_step \
            in _next_run_step_config(keys_type_list):
            with ops.Graph().as_default():
                raw_init_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
                if k_dtype == dtypes.string:
                    raw_init_ids = [str(i) for i in raw_init_ids]
                raw_init_vals = [
                    [
                        x,
                    ] * dim for x in
                    [0.0, 0.1, 0.3, 0.8, 0.16, 0.25, 0.36, 0.49, 0.64, 0.81]
                ]
                raw_ids_py = [1, 3, 3, 9]
                raw_ids_nn = constant_op.constant(raw_ids_py,
                                                  dtype=dtypes.int64)
                raw_ids_de = raw_ids_nn
                if k_dtype == dtypes.string:
                    raw_ids_de = constant_op.constant(
                        [str(i) for i in raw_ids_py], dtype=k_dtype)
                sp_ids_nn = sparse_tensor.SparseTensor(indices=[
                    [0, 0],
                    [0, 1],
                    [1, 0],
                    [2, 1],
                ],
                                                       values=raw_ids_nn,
                                                       dense_shape=[3, 2])
                sp_ids_de = sparse_tensor.SparseTensor(indices=[
                    [0, 0],
                    [0, 1],
                    [1, 0],
                    [2, 1],
                ],
                                                       values=raw_ids_de,
                                                       dense_shape=[3, 2])
                x = constant_op.constant([[_x * dim]
                                          for _x in [[0.4], [0.5], [0.6]]],
                                         dtype=d_dtype)
                x = array_ops.reshape(x, shape=(3 * dim, 1))
                # base var prepare
                base_var = variables.Variable(np.array(raw_init_vals).reshape(
                    [len(raw_init_ids), dim]),
                                              dtype=d_dtype,
                                              shape=[len(raw_init_ids), dim])

                # test var prepare
                embeddings = deo.get_variable('t1030-' + name + str(run_id),
                                              key_dtype=k_dtype,
                                              value_dtype=d_dtype,
                                              devices=_get_devices() *
                                              num_shards,
                                              initializer=1.,
                                              dim=dim)

                init_ids = constant_op.constant(raw_init_ids, dtype=k_dtype)
                init_vals = constant_op.constant(raw_init_vals, dtype=d_dtype)
                init_op = embeddings.upsert(init_ids, init_vals)

                # base branch
                base_embedding = embedding_ops.embedding_lookup_sparse(
                    base_var, sp_ids_nn, None, combiner='sum')
                base_embedding = array_ops.reshape(base_embedding,
                                                   shape=[1, 3 * dim])
                pred0 = math_ops.matmul(base_embedding, x)
                loss0 = pred0 * pred0

                base_opt_op = base_opt.minimize(loss0, var_list=[base_var])

                # test branch
                test_var, trainable = deo.embedding_lookup_sparse(
                    embeddings,
                    sp_ids_de,
                    sp_weights=None,
                    combiner="sum",
                    return_trainable=True)

                pred1 = math_ops.matmul(
                    array_ops.reshape(test_var, shape=[1, 3 * dim]), x)
                loss1 = pred1 * pred1

                gstep = training_util.create_global_step()
                test_opt_op = test_opt.minimize(loss1,
                                                var_list=[trainable],
                                                global_step=gstep)

                table_var = array_ops.reshape(embeddings.lookup(init_ids),
                                              shape=[10, dim])

                with monitored_session.MonitoredTrainingSession(
                        is_chief=True, config=default_config) as sess:
                    sess.run(init_op)
                    self.assertAllCloseAccordingToType(
                        np.array(raw_init_vals).reshape(
                            [len(raw_init_ids), dim]), sess.run(base_var))

                    # run base
                    for _ in range(run_step):
                        sess.run(base_opt_op)
                        sess.run(test_opt_op)

                    # Validate global_step
                    self.assertEqual(run_step, sess.run(gstep))

                    # Validate updated params
                    self.assertAllCloseAccordingToType(
                        sess.run(base_var),
                        sess.run(table_var),
                        msg="Cond:{},{},{},{},{}".format(
                            num_shards, k_dtype, d_dtype, dim, run_step))
                    self.device_check(embeddings)