def testFillFloat(self): with self.test_session(use_gpu=False) as sess: values = constant_op.constant( [0.0, 10.0, 13.0, 14.0, 32.0, 33.0], dtype=dtypes.float64) default_value = constant_op.constant(-1.0, dtype=dtypes.float64) sp_input = sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 0], [1, 3], [1, 4], [3, 2], [3, 3]]), values=values, dense_shape=np.array([5, 6])) sp_output, empty_row_indicator = (sparse_ops.sparse_fill_empty_rows( sp_input, default_value)) output, empty_row_indicator_out = sess.run( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllClose(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool)) values_grad_err = gradient_checker.compute_gradient_error( values, values.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(values_grad_err, 0) self.assertLess(values_grad_err, 1e-8) default_value_grad_err = gradient_checker.compute_gradient_error( default_value, default_value.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(default_value_grad_err, 0) self.assertLess(default_value_grad_err, 1e-8)
def testFillFloat(self): with self.session(): values = constant_op.constant( [0.0, 10.0, 13.0, 14.0, 32.0, 33.0], dtype=dtypes.float64) default_value = constant_op.constant(-1.0, dtype=dtypes.float64) sp_input = sparse_tensor.SparseTensorValue( indices=np.array([[0, 0], [1, 0], [1, 3], [1, 4], [3, 2], [3, 3]]), values=values, dense_shape=np.array([5, 6])) sp_output, empty_row_indicator = (sparse_ops.sparse_fill_empty_rows( sp_input, default_value)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllClose(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool_)) values_grad_err = gradient_checker.compute_gradient_error( values, values.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(values_grad_err, 0) self.assertLess(values_grad_err, 1e-8) default_value_grad_err = gradient_checker.compute_gradient_error( default_value, default_value.shape.as_list(), sp_output.values, [8], delta=1e-8) self.assertGreater(default_value_grad_err, 0) self.assertLess(default_value_grad_err, 1e-8)
def testInvalidIndices(self): with test_util.use_gpu(): sp_input = sparse_tensor.SparseTensor( indices=np.array([[1, 2], [1, 3], [99, 1], [99, 3]]), values=np.array([1, 3, 2, 4]), dense_shape=np.array([2, 5])) with self.assertRaisesRegex(errors.InvalidArgumentError, r"indices\(2, 0\) is invalid"): self.evaluate(sparse_ops.sparse_fill_empty_rows(sp_input, -1))
def backward_compute(self, sp_input, default_value): with backprop.GradientTape(persistent=True) as tape: tape.watch(sp_input.values) result_output, result_indicator = de_math.sparse_fill_empty_rows( sp_input, default_value) expected_output, expected_indicator = sparse_ops.sparse_fill_empty_rows( sp_input, default_value) result = tape.gradient(result_output.values, sp_input.values) expected = tape.gradient(expected_output.values, sp_input.values) return result, expected
def testFillNumber(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_5x6() sp_output, empty_row_indicator = sparse_ops.sparse_fill_empty_rows(sp_input, -1) output, empty_row_indicator_out = sess.run([sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testNoEmptyRows(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_2x6() sp_output, empty_row_indicator = sparse_ops.sparse_fill_empty_rows(sp_input, -1) output, empty_row_indicator_out = sess.run([sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4]]) self.assertAllEqual(output.values, [0, 10, 13, 14]) self.assertAllEqual(output.shape, [2, 6]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool))
def testFillString(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_String5x6() sp_output, empty_row_indicator = sparse_ops.sparse_fill_empty_rows(sp_input, "") output, empty_row_indicator_out = sess.run([sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, ["a", "b", "c", "d", "", "e", "f", ""]) self.assertAllEqual(output.shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testNoEmptyRows(self): with test_util.use_gpu(): sp_input = self._SparseTensor_2x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4]]) self.assertAllEqual(output.values, [0, 10, 13, 14]) self.assertAllEqual(output.dense_shape, [2, 6]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool_))
def testNoEmptyRows(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_2x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = sess.run( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4]]) self.assertAllEqual(output.values, [0, 10, 13, 14]) self.assertAllEqual(output.shape, [2, 6]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool))
def testNoEmptyRows(self): with test_util.force_cpu(): sp_input = self._SparseTensor_2x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4]]) self.assertAllEqual(output.values, [0, 10, 13, 14]) self.assertAllEqual(output.dense_shape, [2, 6]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool))
def call(self, inputs): if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: inputs = math_ops.cast(inputs, dtype=self._compute_dtype_object) rank = inputs.shape.rank if rank == 2 or rank is None: # We use embedding_lookup_sparse as a more efficient matmul operation for # large sparse input tensors. The op will result in a sparse gradient, as # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense # gradients. This can lead to sigfinicant speedups, see b/171762937. if isinstance(inputs, sparse_tensor.SparseTensor): # We need to fill empty rows, as the op assumes at least one id per row. inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) # We need to do some munging of our input to use the embedding lookup as # a matrix multiply. We split our input matrix into separate ids and # weights tensors. The values of the ids tensor should be the column # indices of our input matrix and the values of the weights tensor # can continue to the actual matrix weights. # The column arrangement of ids and weights # will be summed over and does not matter. See the documentation for # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation # of the inputs to both ops. ids = sparse_tensor.SparseTensor( indices=inputs.indices, values=inputs.indices[:, 1], dense_shape=inputs.dense_shape) weights = inputs outputs = embedding_ops.embedding_lookup_sparse_v2( self.kernel * self.window, ids, weights, combiner='sum') else: outputs = gen_math_ops.MatMul(a=inputs, b=self.kernel * self.window) # Broadcast kernel to inputs. else: outputs = standard_ops.tensordot(inputs, self.kernel * self.window, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [self.kernel.shape[-1]] outputs.set_shape(output_shape) if self.use_bias: outputs = nn_ops.bias_add(outputs, self.bias) if self.activation is not None: outputs = self.activation(outputs) return outputs
def testEmptyIndicesTensor(self): with test_util.use_gpu(): sp_input = sparse_tensor.SparseTensor( indices=np.ones([0, 2]), values=np.ones([0]), dense_shape=np.array([2, 5])) sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0]]) self.assertAllEqual(output.values, [-1, -1]) self.assertAllEqual(output.dense_shape, [2, 5]) self.assertAllEqual(empty_row_indicator_out, np.ones(2).astype(np.bool_))
def testFillNumber(self): with test_util.use_gpu(): for sp_input in (self._SparseTensorValue_5x6(), self._SparseTensor_5x6()): sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual( output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool_))
def testFillNumber(self): with test_util.force_cpu(): for sp_input in (self._SparseTensorValue_5x6(), self._SparseTensor_5x6()): sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual( output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testFillNumber(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_5x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = sess.run( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [0, 10, 13, 14, -1, 32, 33, -1]) self.assertAllEqual(output.shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testNoEmptyRowsAndUnordered(self): with test_util.use_gpu(): sp_input = sparse_tensor.SparseTensor( indices=np.array([[1, 2], [1, 3], [0, 1], [0, 3]]), values=np.array([1, 3, 2, 4]), dense_shape=np.array([2, 5])) sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 1], [0, 3], [1, 2], [1, 3]]) self.assertAllEqual(output.values, [2, 4, 1, 3]) self.assertAllEqual(output.dense_shape, [2, 5]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool_))
def testEmptyOutput(self): with test_util.use_gpu(): sp_input = sparse_tensor.SparseTensor( indices=np.ones([0, 2]), values=np.ones([0]), dense_shape=np.array([0, 3])) sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, np.ones([0, 2])) self.assertAllEqual(output.values, np.ones([0])) self.assertAllEqual(output.dense_shape, [0, 3]) self.assertAllEqual(empty_row_indicator_out, [])
def testFillString(self): with test_util.force_cpu(): sp_input = self._SparseTensor_String5x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, "")) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual( output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [b"a", b"b", b"c", b"d", b"", b"e", b"f", b""]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testFillString(self): with self.test_session(use_gpu=False) as sess: sp_input = self._SparseTensor_String5x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, "")) output, empty_row_indicator_out = sess.run( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [b"a", b"b", b"c", b"d", b"", b"e", b"f", b""]) self.assertAllEqual(output.shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testFillString(self): with test_util.force_cpu(): sp_input = self._SparseTensor_String5x6() sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, "")) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 0], [1, 0], [1, 3], [1, 4], [2, 0], [3, 2], [3, 3], [4, 0]]) self.assertAllEqual(output.values, [b"a", b"b", b"c", b"d", b"", b"e", b"f", b""]) self.assertAllEqual(output.dense_shape, [5, 6]) self.assertAllEqual(empty_row_indicator_out, np.array([0, 0, 1, 0, 1]).astype(np.bool))
def testUnordered(self): with test_util.use_gpu(): sp_input = sparse_tensor.SparseTensor( indices=np.array([[2, 3], [2, 2], [0, 1], [0, 3]]), values=np.array([1, 3, 2, 4]), dense_shape=np.array([3, 5])) sp_output, empty_row_indicator = ( sparse_ops.sparse_fill_empty_rows(sp_input, -1)) output, empty_row_indicator_out = self.evaluate( [sp_output, empty_row_indicator]) self.assertAllEqual(output.indices, [[0, 1], [0, 3], [1, 0], [2, 3], [2, 2]]) self.assertAllEqual(output.values, [2, 4, -1, 1, 3]) self.assertAllEqual(output.dense_shape, [3, 5]) self.assertAllEqual(empty_row_indicator_out, [False, True, False])
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not None, all embeddings are l2-normalized to max_norm before combining. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = sparse_ids.dense_shape.get_shape()[0] original_rank = ( array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1)]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice( math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape(tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) return final_result
def scattered_embedding_lookup_sparse(params, sparse_values, dimension, combiner=None, default_value=None, name=None, hash_key=None): """Looks up embeddings of a sparse feature using parameter hashing. See `tf.contrib.layers.scattered_embedding_lookup` for embedding with hashing. Args: params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each tensor must be of rank 1 with fully-defined shape. sparse_values: A 2-D `SparseTensor` containing the values to be embedded. Some rows may be empty. dimension: Embedding dimension combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_value: The value to use for an entry with no features. name: An optional name for this op. hash_key: Specify the hash_key that will be used by the `FingerprintCat64` function to combine the crosses fingerprints on SparseFeatureCrossOp (optional). Returns: Dense tensor with shape [N, dimension] with N the number of rows in sparse_values. Raises: TypeError: If sparse_values is not a SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if isinstance(params, variables.PartitionedVariable): params = list(params) if not isinstance(params, list): params = [params] if not isinstance(sparse_values, sparse_tensor.SparseTensor): raise TypeError("sparse_values must be SparseTensor") with ops.name_scope(name, "scattered_embedding_lookup_sparse", params + [sparse_values]) as scope: # Fill in the empty rows. if default_value is None: # Random default values to reduce the risk of collision. if sparse_values.dtype == dtypes.string: default_value = "6ZxWzWOHxZ" else: default_value = 1288896567 sparse_values, _ = sparse_ops.sparse_fill_empty_rows( sparse_values, default_value) segment_ids = sparse_values.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) values = sparse_values.values values, idx = array_ops.unique(values) embeddings = scattered_embedding_lookup( params, values, dimension, hash_key=hash_key) if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, name=scope) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids, name=scope) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx, segment_ids, name=scope) else: raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'.") return embeddings
def hashed_embedding_lookup_sparse(params, sparse_values, dimension, combiner="mean", default_value=None, name=None): """Looks up embeddings of a sparse feature using parameter hashing. See `tf.contrib.layers.hashed_embedding_lookup` for embedding with hashing. Args: params: A `Tensor` or `list` of `Tensors`. Each tensor must be of rank 1 with fully-defined shape. sparse_values: A 2-D `SparseTensor` containing the values to be embedded. Some rows may be empty. dimension: Embedding dimension combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_value: The value to use for an entry with no features. name: An optional name for this op. Returns: Dense tensor with shape [N, dimension] with N the number of rows in sparse_values. Raises: TypeError: If sparse_values is not a SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if not isinstance(params, list): params = [params] if not isinstance(sparse_values, ops.SparseTensor): raise TypeError("sparse_values must be SparseTensor") with ops.name_scope(name, "hashed_sparse_embedding_lookup", params + [sparse_values]) as scope: # Fill in the empty rows. if default_value is None: # Random default values to reduce the risk of collision. if sparse_values.dtype == dtypes.string: default_value = "6ZxWzWOHxZ" else: default_value = 1288896567 sparse_values, _ = sparse_ops.sparse_fill_empty_rows( sparse_values, default_value) segment_ids = sparse_values.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) values = sparse_values.values values, idx = array_ops.unique(values) embeddings = hashed_embedding_lookup(params, values, dimension) if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, name=scope) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids, name=scope) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx, segment_ids, name=scope) else: raise ValueError( "Combiner must be one of 'mean', 'sqrtn' or 'sum'.") return embeddings
def safe_embedding_lookup_sparse( self, sparse_ids, sparse_weights=None, combiner="mean", default_id=None ): """Lookup embedding results, accounting for invalid IDs and empty features. The result of this function is the same as tf.nn.safe_embeddding_lookup_sparse`. But, this function is implemented to support lookup embedding using ParameterServer distribution strategy. """ self._init_for_graph_mode_if_necessary() sparse_ids = _prune_invalid_ids(sparse_ids) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, 0 ) unique_ids, idx = tf.unique(sparse_ids.values) segment_ids = sparse_ids.indices[:, 0] if segment_ids.dtype != tf.int32: segment_ids = tf.cast(segment_ids, tf.int32) ids = sparse_ids.values unique_ids, idx = tf.unique(ids) batch_embedding = self._get_embeddings_by_id(unique_ids) if sparse_weights is not None: if self.tape: batch_embedding = self._record_gradients( batch_embedding=batch_embedding, ids=ids ) weights = sparse_weights.values if weights.dtype != batch_embedding.dtype: weights = math_ops.cast(weights, batch_embedding.dtype) batch_embedding = array_ops.gather(batch_embedding, idx) # Reshape weights to allow broadcast ones = array_ops.fill( array_ops.expand_dims(array_ops.rank(batch_embedding) - 1, 0), 1, ) bcast_weights_shape = array_ops.concat( [array_ops.shape(weights), ones], 0 ) orig_weights_shape = weights.get_shape() weights = array_ops.reshape(weights, bcast_weights_shape) # Set the weight shape, since after reshaping to # bcast_weights_shape, the shape becomes None. if batch_embedding.get_shape().ndims is not None: weights.set_shape( orig_weights_shape.concatenate( [ 1 for _ in range( batch_embedding.get_shape().ndims - 1 ) ] ) ) batch_embedding *= weights if combiner == "sum": batch_embedding = math_ops.segment_sum( batch_embedding, segment_ids ) elif combiner == "mean": batch_embedding = math_ops.segment_sum( batch_embedding, segment_ids ) weight_sum = math_ops.segment_sum(weights, segment_ids) batch_embedding = math_ops.div(batch_embedding, weight_sum) elif combiner == "sqrtn": batch_embedding = math_ops.segment_sum( batch_embedding, segment_ids ) weights_squared = math_ops.pow(weights, 2) weight_sum = math_ops.segment_sum(weights_squared, segment_ids) weight_sum_sqrt = math_ops.sqrt(weight_sum) batch_embedding = math_ops.div( batch_embedding, weight_sum_sqrt ) else: assert False, "Unrecognized combiner" else: if self.tape: batch_embedding = self._record_gradients( batch_embedding=batch_embedding, ids=unique_ids, ) assert idx is not None if combiner == "sum": batch_embedding = math_ops.sparse_segment_sum( batch_embedding, idx, segment_ids ) elif combiner == "mean": batch_embedding = math_ops.sparse_segment_mean( batch_embedding, idx, segment_ids ) elif combiner == "sqrtn": batch_embedding = math_ops.sparse_segment_sqrt_n( batch_embedding, idx, segment_ids ) else: assert False, "Unrecognized combiner" # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(batch_embedding)[1]]), ) batch_embedding = array_ops.where( is_row_empty, array_ops.zeros_like(batch_embedding), batch_embedding, name=self.name, ) batch_embedding.set_shape((None, self.output_dim)) return batch_embedding
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name="safe_embedding_lookup_sparse", partition_strategy=None, # no used max_norm=None, return_trainable=False): """ Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`. Lookup embedding results, accounting for empty features and invalid weights. Any IDs will be treated as valid include non-positive IDs. Invalid weights (<= 0) are pruned from input weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single `dynamic_embedding.Variable` instance representing the complete embedding tensor. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: combined_embeddings: A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. trainable_wrap: A TrainableWrapper object used to fill the Optimizers `var_list` Only provided if `return_trainable` is True. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if embedding_weights.key_dtype != sparse_ids.dtype: raise TypeError( "embedding_weights.key_dtype should be same with sparse_ids.dtype: " "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype)) weights_dtype = sparse_weights.dtype if sparse_weights is not None else None if weights_dtype and embedding_weights.value_dtype != weights_dtype: raise TypeError( "embedding_weights.value_dtype should be same with sparse_weights.dtype" ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype)) scope = variable_scope.get_variable_scope() full_name = scope.name + "/" + name if scope.name else name with ops.name_scope(full_name + "/"): # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid weights. if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result, trainable_ = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=name + "/embedding_lookup_sparse", max_norm=max_norm, return_trainable=True) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name="where") # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return (final_result, trainable_) if return_trainable else final_result
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[batch_size, embed_dim]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights else None embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.op_scope(embedding_weights + [sparse_ids, sparse_weights], name, "embedding_lookup") as scope: # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = tf_embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select(is_row_empty, array_ops.zeros_like(result), result, name=scope) return result
def hashed_embedding_lookup_sparse(params, sparse_values, dimension, combiner="mean", default_value=None, name=None): """Looks up embeddings of a sparse feature using parameter hashing. See `tf.contrib.layers.hashed_embedding_lookup` for embedding with hashing. Args: params: A `Tensor` or `list` of `Tensors`. Each tensor must be of rank 1 with fully-defined shape. sparse_values: A 2-D `SparseTensor` containing the values to be embedded. Some rows may be empty. dimension: Embedding dimension combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_value: The value to use for an entry with no features. name: An optional name for this op. Returns: Dense tensor with shape [N, dimension] with N the number of rows in sparse_values. Raises: TypeError: If sparse_values is not a SparseTensor. ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}. """ if not isinstance(params, list): params = [params] if not isinstance(sparse_values, ops.SparseTensor): raise TypeError("sparse_values must be SparseTensor") with ops.name_scope(name, "hashed_sparse_embedding_lookup", params + [sparse_values]) as scope: # Fill in the empty rows. if default_value is None: # Random default values to reduce the risk of collision. if sparse_values.dtype == dtypes.string: default_value = "6ZxWzWOHxZ" else: default_value = 1288896567 sparse_values, _ = sparse_ops.sparse_fill_empty_rows( sparse_values, default_value) segment_ids = sparse_values.indices[:, 0] if segment_ids.dtype != dtypes.int32: segment_ids = math_ops.cast(segment_ids, dtypes.int32) values = sparse_values.values values, idx = array_ops.unique(values) embeddings = hashed_embedding_lookup(params, values, dimension) if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, name=scope) elif combiner == "mean": embeddings = math_ops.sparse_segment_mean(embeddings, idx, segment_ids, name=scope) elif combiner == "sqrtn": embeddings = math_ops.sparse_segment_sqrt_n(embeddings, idx, segment_ids, name=scope) else: raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'.") return embeddings
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. sparse_ids: `SparseTensor` of shape `[batch_size, ?]` containing the ids. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[batch_size, embed_dim]`. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights else None embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights] contrib_tensor_util.assert_same_float_dtype( embedding_weights + [sparse_weights]) with ops.op_scope( embedding_weights + [sparse_ids, sparse_weights], name, "embedding_lookup") as scope: # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = tf_embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select( is_row_empty, array_ops.zeros_like(result), result, name=scope) return result
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name=None, partition_strategy="div", max_norm=None): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single tensor representing the complete embedding tensor, or a list tensors all of same shape except for the first dimension, representing sharded embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. Each element must be appropriately sized for the given `partition_strategy`. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: A dense tensor representing the combined embeddings for the sparse ids. For each row in the dense tensor represented by `sp_ids`, the op looks up the embeddings for all ids in that row, multiplies them by the corresponding weight, and combines these embeddings as specified. In other words, if `shape(combined embedding_weights) = [p0, p1, ..., pm]` and `shape(sparse_ids) = shape(sparse_weights) = [d0, d1, ..., dn]` then `shape(output) = [d0, d1, ... dn-1, p1, ..., pm]`. For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are ```python [0, 0]: id 1, weight 2.0 [0, 1]: id 3, weight 0.5 [1, 0]: id -1, weight 1.0 [2, 3]: id 1, weight 3.0 ``` `default_id` is 0. with `combiner`="mean", then the output will be a 3x20 matrix where ```python output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) output[1, :] = (params[0, :] * 1.0) / 1.0 output[2, :] = (params[1, :] * 3.0) / 3.0 ``` Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list( embedding_weights) # get underlying Variables. if not isinstance(embedding_weights, list): embedding_weights = [embedding_weights] if len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None embedding_weights = [ w if (isinstance(w, resource_variable_ops.ResourceVariable) and dtype in (None, w.dtype)) else ops.convert_to_tensor( w, dtype=dtype) for w in embedding_weights ] with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope, max_norm=max_norm) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return final_result
def forward_compute(self, sp_input, default_value): result_output, result_indicator = de_math.sparse_fill_empty_rows( sp_input, default_value) expected_output, expected_indicator = sparse_ops.sparse_fill_empty_rows( sp_input, default_value) return result_output, result_indicator, expected_output, expected_indicator
def dense(inputs, kernel, bias=None, activation=None, dtype=None): """Densely connected NN layer op. Args: inputs: `tf.Tensor` or `tf.SparseTensor`. Inputs to operation. kernel: `tf.Variable`. Matrix kernel. bias: (Optional) `tf.Variable`. Bias to add to outputs. activation: (Optional) 1-argument callable. Activation function to apply to outputs. dtype: (Optional) `tf.DType`. Dtype to cast `inputs` to. Returns: `tf.Tensor`. Output of dense connection. """ if dtype: if inputs.dtype.base_dtype != dtype.base_dtype: inputs = math_ops.cast(inputs, dtype=dtype) rank = inputs.shape.rank if rank == 2 or rank is None: # We use embedding_lookup_sparse as a more efficient matmul operation for # large sparse input tensors. The op will result in a sparse gradient, as # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense # gradients. This can lead to sigfinicant speedups, see b/171762937. if isinstance(inputs, sparse_tensor.SparseTensor): # We need to fill empty rows, as the op assumes at least one id per row. inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) # We need to do some munging of our input to use the embedding lookup as a # matrix multiply. We split our input matrix into separate ids and weights # tensors. The values of the ids tensor should be the column indices of # our input matrix and the values of the weights tensor can continue to # the actual matrix weights. The column arrangement of ids and weights # will be summed over and does not matter. See the documentation for # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation of the # inputs to both ops. ids = sparse_tensor.SparseTensor(indices=inputs.indices, values=inputs.indices[:, 1], dense_shape=inputs.dense_shape) weights = inputs outputs = embedding_ops.embedding_lookup_sparse_v2(kernel, ids, weights, combiner="sum") else: outputs = gen_math_ops.MatMul(a=inputs, b=kernel) # Broadcast kernel to inputs. else: outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]]) # Reshape the output back to the original ndim of the input. if not context.executing_eagerly(): shape = inputs.shape.as_list() output_shape = shape[:-1] + [kernel.shape[-1]] outputs.set_shape(output_shape) if bias is not None: outputs = nn_ops.bias_add(outputs, bias) if activation is not None: outputs = activation(outputs) return outputs