def __init__(self, input_size, batch_size, data_generator_creator, max_steps=None): super().__init__(input_size) self.batch_size = batch_size self.data_generator_creator = data_generator_creator self.steps_left = max_steps with tf.device("/cpu:0"): self.inputs = tf.placeholder(tf.float32, [batch_size, None, input_size], name='inputs') self.sequence_lengths = tf.placeholder(tf.int32, [batch_size], name='sequence_lengths') self.labels = tf.sparse_placeholder(tf.int32, name='labels') self.queue = tf.FIFOQueue(dtypes=[tf.float32, tf.int32, tf.string], capacity=100) serialized_labels = tf.serialize_many_sparse(self.labels) self.enqueue_op = self.queue.enqueue( [self.inputs, self.sequence_lengths, serialized_labels])
def __init__(self, input_size, batch_size, data_generator_creator, max_steps=None): super().__init__(input_size) self.batch_size = batch_size self.data_generator_creator = data_generator_creator self.steps_left = max_steps with tf.device("/cpu:0"): # Define input and label placeholders # inputs is of dimension [batch_size, max_time, input_size] self.inputs = tf.placeholder(tf.float32, [batch_size, None, input_size], name='inputs') self.sequence_lengths = tf.placeholder(tf.int32, [batch_size], name='sequence_lengths') self.labels = tf.sparse_placeholder(tf.int32, name='labels') # Queue for inputs and labels self.queue = tf.FIFOQueue(dtypes=[tf.float32, tf.int32, tf.string], capacity=100) # queues do not support sparse tensors yet, we need to serialize... serialized_labels = tf.serialize_many_sparse(self.labels) self.enqueue_op = self.queue.enqueue( [self.inputs, self.sequence_lengths, serialized_labels])
def testSerializeManySparse(self): sp_input = tf.SparseTensor( indices=tf.constant([[0, 1]], dtype=tf.int64), values=tf.constant([2], dtype=tf.int64), dense_shape=[1, 2]) with self.cached_session(): serialized_sp = tf.serialize_many_sparse( sp_input, 'serialize_name', tf.string) self.assertEqual((1, 3), serialized_sp.shape)
def benchmarkVeryLarge2DFloatSparseTensor(self): np.random.seed(127) num_elements = 10000 batch_size = 64 indices_batch = np.random.randint(batch_size, size=num_elements, dtype=np.int64) indices_value = np.arange(num_elements, dtype=np.int64) indices = np.asarray(sorted(zip(indices_batch, indices_value)), dtype=np.int64) values = ["feature_value_for_embedding_lookup"] * num_elements shape = np.asarray([batch_size, num_elements], dtype=np.int64) with tf.Session() as sess: with tf.device("/cpu:0"): indices = tf.Variable(indices) values = tf.Variable(values) shape = tf.Variable(shape) st = tf.SparseTensor(indices, values, shape) st_handles = add_many_sparse_to_tensors_map(st) st_roundtrip = take_many_sparse_from_tensors_map( sparse_map_op=st_handles.op, sparse_handles=st_handles) st_roundtrip_op = st_roundtrip.values.op st_serialized = tf.serialize_many_sparse(st) st_deserialized = tf.deserialize_many_sparse( st_serialized, dtype=values.dtype) st_deserialized_op = st_deserialized.values.op tf.global_variables_initializer().run() st_roundtrip_values = sess.run(st_roundtrip) st_deserialized_values = sess.run(st_deserialized) np.testing.assert_equal(st_roundtrip_values.values, st_deserialized_values.values) np.testing.assert_equal(st_roundtrip_values.indices, st_deserialized_values.indices) np.testing.assert_equal(st_roundtrip_values.shape, st_deserialized_values.shape) self.run_op_benchmark( sess, st_roundtrip_op, min_iters=2000, name="benchmark_very_large_2d_float_st_tensor_maps") self.run_op_benchmark( sess, st_deserialized_op, min_iters=2000, name="benchmark_very_large_2d_float_st_serialization")
def benchmarkVeryLarge2DFloatSparseTensor(self): np.random.seed(127) num_elements = 10000 batch_size = 64 indices_batch = np.random.randint( batch_size, size=num_elements, dtype=np.int64) indices_value = np.arange(num_elements, dtype=np.int64) indices = np.asarray( sorted(zip(indices_batch, indices_value)), dtype=np.int64) values = ["feature_value_for_embedding_lookup"] * num_elements shape = np.asarray([batch_size, num_elements], dtype=np.int64) with tf.Session() as sess: with tf.device("/cpu:0"): indices = tf.Variable(indices) values = tf.Variable(values) shape = tf.Variable(shape) st = tf.SparseTensor(indices, values, shape) st_handles = add_many_sparse_to_tensors_map(st) st_roundtrip = take_many_sparse_from_tensors_map( sparse_map_op=st_handles.op, sparse_handles=st_handles) st_roundtrip_op = st_roundtrip.values.op st_serialized = tf.serialize_many_sparse(st) st_deserialized = tf.deserialize_many_sparse( st_serialized, dtype=values.dtype) st_deserialized_op = st_deserialized.values.op tf.initialize_all_variables().run() st_roundtrip_values = sess.run(st_roundtrip) st_deserialized_values = sess.run(st_deserialized) np.testing.assert_equal( st_roundtrip_values.values, st_deserialized_values.values) np.testing.assert_equal( st_roundtrip_values.indices, st_deserialized_values.indices) np.testing.assert_equal( st_roundtrip_values.shape, st_deserialized_values.shape) self.run_op_benchmark( sess, st_roundtrip_op, min_iters=2000, name="benchmark_very_large_2d_float_st_tensor_maps") self.run_op_benchmark( sess, st_deserialized_op, min_iters=2000, name="benchmark_very_large_2d_float_st_serialization")
def testSerializeManyDeserializeManyRoundTrip(self): with self.test_session(use_gpu=False) as sess: # N == 4 because shape_value == [4, 5] indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64) values_value = np.array([b"a", b"b", b"c"]) shape_value = np.array([4, 5], dtype=np.int64) sparse_tensor = self._SparseTensorPlaceholder(dtype=tf.string) serialized = tf.serialize_many_sparse(sparse_tensor) deserialized = tf.deserialize_many_sparse(serialized, dtype=tf.string) serialized_value, deserialized_value = sess.run( [serialized, deserialized], feed_dict={sparse_tensor.indices: indices_value, sparse_tensor.values: values_value, sparse_tensor.shape: shape_value}) self.assertEqual(serialized_value.shape, (4, 3)) self.assertAllEqual(deserialized_value.indices, indices_value) self.assertAllEqual(deserialized_value.values, values_value) self.assertAllEqual(deserialized_value.shape, shape_value)