def test(): fea = tf.constant(feature) la = tf.constant(label) po = tf.constant(pos) features = { params.feature_name: dense_to_sparse_tensor(fea), params.pos_name: dense_to_sparse_tensor(po), params.distance_name: dense_to_sparse_tensor(dis) } if tf.executing_eagerly(): print("feature is \r\n {}".format(features[params.feature_name])) print("pos is \r\n {}".format(features[params.pos_name])) model_fn(features, la, tf.estimator.ModeKeys.TRAIN, config, params)
def testBowEncoderSparseTensor(self): with self.cached_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(variables.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
def testBowEncoderSparseTensor(self): with self.test_session() as sess: docs = [[0, 1], [2, 3]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) enc = encoders.bow_encoder(sparse_docs, 4, 3) sess.run(variables.global_variables_initializer()) self.assertAllEqual([2, 3], enc.eval().shape)
def bow_encoder(ids, vocab_size, embed_dim, sparse_lookup=True, initializer=None, regularizer=None, trainable=True, scope=None, reuse=None): """Maps a sequence of symbols to a vector per example by averaging embeddings. Args: ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type `int32` or `int64` with symbol ids. vocab_size: Integer number of symbols in vocabulary. embed_dim: Integer number of dimensions for embedding matrix. sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor` and performs a sparse embedding lookup. This is usually faster, but not desirable if padding tokens should have an embedding. Empty rows are assigned a special embedding. initializer: An initializer for the embeddings, if `None` default for current scope is used. regularizer: Optional regularizer for the embeddings. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional string specifying the variable scope for the op, required if `reuse=True`. reuse: If `True`, variables inside the op will be reused. Returns: Encoding `Tensor` `[batch_size, embed_dim]` produced by averaging embeddings. Raises: ValueError: If `embed_dim` or `vocab_size` are not specified. """ if not vocab_size or not embed_dim: raise ValueError('Must specify vocab size and embedding dimension') with variable_scope.variable_scope(scope, 'bow_encoder', [ids], reuse=reuse): embeddings = variables.model_variable('embeddings', shape=[vocab_size, embed_dim], initializer=initializer, regularizer=regularizer, trainable=trainable) if sparse_lookup: if isinstance(ids, ops.SparseTensor): sparse_ids = ids else: sparse_ids = sparse_ops.dense_to_sparse_tensor(ids) return contrib_embedding_ops.safe_embedding_lookup_sparse( [embeddings], sparse_ids, combiner='mean', default_id=0) else: if isinstance(ids, ops.SparseTensor): raise TypeError('ids are expected to be dense Tensor, got: %s', ids) return math_ops.reduce_mean(embedding_ops.embedding_lookup( embeddings, ids), reduction_indices=1)
def test_dense_to_sparse_tensor_1d_no_shape(self): with self.test_session() as sess: tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32) st = sparse_ops.dense_to_sparse_tensor(tensor) result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]}) self.assertAllEqual([[1], [3]], result.indices) self.assertAllEqual([100, 3], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_2d(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([[1, 2, 0, 0], [3, 4, 5, 0]]) result = sess.run(st) self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]], result.indices) self.assertAllEqual([1, 2, 3, 4, 5], result.values) self.assertAllEqual([2, 4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_no_shape(self): with self.test_session() as sess: tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32) st = sparse_ops.dense_to_sparse_tensor(tensor) result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]}) self.assertAllEqual([[1], [3]], result.indices) self.assertAllEqual([100, 3], result.values) self.assertAllEqual([4], result.dense_shape)
def bow_encoder(ids, vocab_size, embed_dim, sparse_lookup=True, initializer=None, regularizer=None, trainable=True, scope=None, reuse=None): """Maps a sequence of symbols to a vector per example by averaging embeddings. Args: ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type `int32` or `int64` with symbol ids. vocab_size: Integer number of symbols in vocabulary. embed_dim: Integer number of dimensions for embedding matrix. sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor` and performs a sparse embedding lookup. This is usually faster, but not desirable if padding tokens should have an embedding. Empty rows are assigned a special embedding. initializer: An initializer for the embeddings, if `None` default for current scope is used. regularizer: Optional regularizer for the embeddings. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). scope: Optional string specifying the variable scope for the op, required if `reuse=True`. reuse: If `True`, variables inside the op will be reused. Returns: Encoding `Tensor` `[batch_size, embed_dim]` produced by averaging embeddings. Raises: ValueError: If `embed_dim` or `vocab_size` are not specified. """ if not vocab_size or not embed_dim: raise ValueError('Must specify vocab size and embedding dimension') with variable_scope.variable_scope( scope, 'bow_encoder', [ids], reuse=reuse): embeddings = variables.model_variable( 'embeddings', shape=[vocab_size, embed_dim], initializer=initializer, regularizer=regularizer, trainable=trainable) if sparse_lookup: if isinstance(ids, sparse_tensor.SparseTensor): sparse_ids = ids else: sparse_ids = sparse_ops.dense_to_sparse_tensor(ids) return contrib_embedding_ops.safe_embedding_lookup_sparse( [embeddings], sparse_ids, combiner='mean', default_id=0) else: if isinstance(ids, sparse_tensor.SparseTensor): raise TypeError('ids are expected to be dense Tensor, got: %s', ids) return math_ops.reduce_mean( embedding_ops.embedding_lookup(embeddings, ids), reduction_indices=1)
def test_dense_to_sparse_unknown_rank(self): ph = array_ops.placeholder(dtype=dtypes.int32) with self.cached_session() as sess: st = sparse_ops.dense_to_sparse_tensor(ph) result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]}) self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]], result.indices) self.assertAllEqual([1, 2, 3, 4, 5], result.values) self.assertAllEqual([2, 4], result.dense_shape)
def test_dense_to_sparse_unknown_rank(self): ph = array_ops.placeholder(dtype=dtypes.int32) with self.cached_session() as sess: st = sparse_ops.dense_to_sparse_tensor(ph) result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]}) self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]], result.indices) self.assertAllEqual([1, 2, 3, 4, 5], result.values) self.assertAllEqual([2, 4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_str(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b'']) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.object) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([b'qwe', b'ewq'], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_bool(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([True, False, True, False]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.bool) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([True, True], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.int32) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([1, 2], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_str(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b'']) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.object) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([b'qwe', b'ewq'], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_float(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.float32) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllClose([1.5, 2.3], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_bool(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([True, False, True, False]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.bool) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([True, True], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.int32) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllEqual([1, 2], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_float(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0]) result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.float32) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[0], [2]], result.indices) self.assertAllClose([1.5, 2.3], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_1d_str_special_ignore(self): with self.test_session() as sess: st = sparse_ops.dense_to_sparse_tensor( [b'qwe', b'', b'ewq', b''], ignore_value=b'qwe') result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.object) self.assertEqual(result.shape.dtype, np.int64) self.assertAllEqual([[1], [2], [3]], result.indices) self.assertAllEqual([b'', b'ewq', b''], result.values) self.assertAllEqual([4], result.shape)
def test_dense_to_sparse_tensor_1d_str_special_ignore(self): with self.cached_session() as sess: st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''], ignore_value=b'qwe') result = sess.run(st) self.assertEqual(result.indices.dtype, np.int64) self.assertEqual(result.values.dtype, np.object) self.assertEqual(result.dense_shape.dtype, np.int64) self.assertAllEqual([[1], [2], [3]], result.indices) self.assertAllEqual([b'', b'ewq', b''], result.values) self.assertAllEqual([4], result.dense_shape)
def test_dense_to_sparse_tensor_3d_no_shape(self): with self.test_session() as sess: tensor = tf.placeholder(shape=[None, None, None], dtype=tf.int32) st = sparse_ops.dense_to_sparse_tensor(tensor) result = sess.run(st, feed_dict={ tensor: [[[1, 2, 0, 0], [3, 4, 5, 0]], [[7, 8, 0, 0], [9, 0, 0, 0]]] }) self.assertAllEqual([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2], [1, 0, 0], [1, 0, 1], [1, 1, 0]], result.indices) self.assertAllEqual([1, 2, 3, 4, 5, 7, 8, 9], result.values) self.assertAllEqual([2, 2, 4], result.shape)
def test_dense_to_sparse_tensor_3d_no_shape(self): with self.test_session() as sess: tensor = tf.placeholder(shape=[None, None, None], dtype=tf.int32) st = sparse_ops.dense_to_sparse_tensor(tensor) result = sess.run(st, feed_dict={ tensor: [[[1, 2, 0, 0], [3, 4, 5, 0]], [[7, 8, 0, 0], [9, 0, 0, 0]]] }) self.assertAllEqual([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2], [1, 0, 0], [1, 0, 1], [1, 1, 0]], result.indices) self.assertAllEqual([1, 2, 3, 4, 5, 7, 8, 9], result.values) self.assertAllEqual([2, 2, 4], result.shape)
def _prepare_inputs_for_fit_sparse(self, model_matrix, response, model_coefficients_start=None, convert_to_sparse_tensor=False): if model_coefficients_start is None: model_coefficients_start = np.zeros(model_matrix.shape[:-2] + model_matrix.shape[-1:]) if convert_to_sparse_tensor: model_matrix = sparse_ops.dense_to_sparse_tensor(model_matrix) model_matrix = self._adjust_dtype_and_shape_hints(model_matrix) response = self._adjust_dtype_and_shape_hints(response) model_coefficients_start = self._adjust_dtype_and_shape_hints( model_coefficients_start) return model_matrix, response, model_coefficients_start
def _prepare_inputs_for_fit_sparse(self, model_matrix, response, model_coefficients_start=None, convert_to_sparse_tensor=False): if model_coefficients_start is None: model_coefficients_start = np.zeros(model_matrix.shape[:-2] + model_matrix.shape[-1:]) if convert_to_sparse_tensor: model_matrix = sparse_ops.dense_to_sparse_tensor(model_matrix) model_matrix = self._adjust_dtype_and_shape_hints(model_matrix) response = self._adjust_dtype_and_shape_hints(response) model_coefficients_start = self._adjust_dtype_and_shape_hints( model_coefficients_start) return model_matrix, response, model_coefficients_start
def verify_sparse_dense_matmul(self, x_, y_): if self.use_sparse_tensor: x = self._make_sparse_placeholder( sparse_ops.dense_to_sparse_tensor(x_)) else: x = self._make_placeholder(x_) y = self._make_placeholder(y_) z = tfp.math.sparse_or_dense_matmul(x, y) z_ = self.evaluate(z) if self.use_static_shape: batch_shape = x_.shape[:-2] self.assertAllEqual(z_.shape, batch_shape + (x_.shape[-2], y_.shape[-1])) self.assertAllClose(z_, np.matmul(x_, y_), atol=0., rtol=1e-3)
def to_sparse_tensor(self, input_tensor): """Creates a SparseTensor from the bucketized Tensor.""" dimension = self.source_column.dimension batch_size = array_ops.shape(input_tensor, name="shape")[0] if len(input_tensor.get_shape()) > 2: return sparse_ops.dense_to_sparse_tensor(input_tensor, ignore_value=-2**31) if dimension > 1: i1 = array_ops.reshape(array_ops.tile( array_ops.expand_dims(math_ops.range(0, batch_size), 1, name="expand_dims"), [1, dimension], name="tile"), [-1], name="reshape") i2 = array_ops.tile(math_ops.range(0, dimension), [batch_size], name="tile") # Flatten the bucket indices and unique them across dimensions # E.g. 2nd dimension indices will range from k to 2*k-1 with k buckets bucket_indices = array_ops.reshape( input_tensor, [-1], name="reshape") + self.length * i2 else: # Simpler indices when dimension=1 i1 = math_ops.range(0, batch_size) i2 = array_ops.zeros([batch_size], dtype=dtypes.int32, name="zeros") bucket_indices = array_ops.reshape(input_tensor, [-1], name="reshape") indices = math_ops.to_int64( array_ops.transpose(array_ops.stack((i1, i2)))) shape = math_ops.to_int64(array_ops.stack([batch_size, dimension])) sparse_id_values = sparse_tensor_py.SparseTensor( indices, bucket_indices, shape) return sparse_id_values
def testBowEncoderSparseTensorDenseLookup(self): with self.cached_session(): docs = [[0, 1]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) with self.assertRaises(TypeError): encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
def test_convert_to_sparse_undef_shape(self): with self.test_session(): with self.assertRaises(ValueError): tensor = array_ops.placeholder(dtype=dtypes.int32) sparse_ops.dense_to_sparse_tensor(tensor)
def test_convert_to_sparse_undef_shape(self): with self.test_session(): with self.assertRaises(ValueError): tensor = array_ops.placeholder(dtype=dtypes.int32) sparse_ops.dense_to_sparse_tensor(tensor)
def testBowEncoderSparseTensorDenseLookup(self): with self.test_session(): docs = [[0, 1]] sparse_docs = sparse_ops.dense_to_sparse_tensor(docs) with self.assertRaises(TypeError): encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)