def _batch_dense_window(dataset): """Batches a window of dense tensors.""" def key_fn(_): return np.int64(0) def shape_init_fn(_): return array_ops.shape(first_element) def shape_reduce_fn(state, value): check_ops.assert_equal(state, array_ops.shape(value)) return state def finalize_fn(state): return state if dataset.output_shapes.is_fully_defined(): shape = dataset.output_shapes else: first_element = get_single_element.get_single_element(dataset.take(1)) shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, finalize_fn) shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) def batch_init_fn(_): batch_shape = array_ops.concat([[0], shape], 0) return gen_array_ops.empty(batch_shape, dtype=dataset.output_types) def batch_reduce_fn(state, value): return array_ops.concat([state, [value]], 0) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
def _padded_batch_sparse_window(dataset, padded_shape): """Batches a window of sparse tensors with padding.""" def key_fn(_): return np.int64(0) def max_init_fn(_): return convert.partial_shape_to_tensor(padded_shape) def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(value.dense_shape, padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", value.dense_shape, padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, value.dense_shape) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat( [[0], [array_ops.size(padded_shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat( [np.array([0], dtype=np.int64), padded_shape], 0)) def batch_reduce_fn(state, value): padded_value = sparse_tensor.SparseTensor(indices=value.indices, values=value.values, dense_shape=padded_shape) reshaped_value = sparse_ops.sparse_reshape( padded_value, array_ops.concat( [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) return sparse_ops.sparse_concat(0, [state, reshaped_value]) reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
def _batch_sparse_window(dataset): """Batches a window of sparse tensors.""" def key_fn(_): return np.int64(0) def shape_init_fn(_): return first_element.dense_shape def shape_reduce_fn(state, value): check_ops.assert_equal(state, value.dense_shape) return state def finalize_fn(state): return state dataset_output_shapes = dataset_ops.get_legacy_output_shapes(dataset) if dataset_output_shapes.is_fully_defined(): shape = dataset_output_shapes else: first_element = get_single_element.get_single_element(dataset.take(1)) shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, finalize_fn) shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat([ np.array([0], dtype=np.int64), math_ops.cast(shape, dtypes.int64) ], 0)) def batch_reduce_fn(state, value): return sparse_ops.sparse_concat(0, [state, value]) def reshape_fn(value): return sparse_ops.sparse_reshape( value, array_ops.concat( [np.array([1], dtype=np.int64), value.dense_shape], 0)) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(reshape_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))
def testChangingStateShape(self): def reduce_fn(x, _): # Statically known rank, but dynamic length. larger_dim = array_ops.concat([x[0], x[0]], 0) # Statically unknown rank. larger_rank = array_ops.expand_dims(x[1], 0) return larger_dim, larger_rank reducer = grouping.Reducer(init_func=lambda x: ([0], 1), reduce_func=reduce_fn, finalize_func=lambda x, y: (x, y)) for i in range(1, 11): dataset = dataset_ops.Dataset.from_tensors( np.int64(0)).repeat(i).apply( grouping.group_by_reducer(lambda x: x, reducer)) self.assertEqual([None], dataset.output_shapes[0].as_list()) self.assertIs(None, dataset.output_shapes[1].ndims) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: x, y = sess.run(get_next) self.assertAllEqual([0] * (2**i), x) self.assertAllEqual(np.array(1, ndmin=i), y) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def _build_dataset(self, components): reducer = grouping.Reducer(init_func=lambda _: np.int64(0), reduce_func=lambda x, y: x + y, finalize_func=lambda x: x) return dataset_ops.Dataset.from_tensor_slices(components).apply( grouping.group_by_reducer(lambda x: x % 5, reducer))
def make_group_by_reducer_dataset(var): reducer = grouping.Reducer( init_func=lambda _: 0, reduce_func=lambda x, y: x, finalize_func=lambda _: var) return dataset_ops.Dataset.range(5).apply( grouping.group_by_reducer(lambda x: x % 2, reducer))
def testInvalidKeyType(self): reducer = grouping.Reducer(init_func=lambda x: np.int64(0), reduce_func=lambda x, y: x + y, finalize_func=lambda x: x) dataset = dataset_ops.Dataset.range(10) with self.assertRaises(ValueError): dataset.apply(grouping.group_by_reducer(lambda _: "wrong", reducer))
def testSum(self): reducer = grouping.Reducer(init_func=lambda _: np.int64(0), reduce_func=lambda x, y: x + y, finalize_func=lambda x: x) for i in range(1, 11): dataset = dataset_ops.Dataset.range(2 * i).apply( grouping.group_by_reducer(lambda x: x % 2, reducer)) self.assertDatasetProduces(dataset, expected_shapes=tensor_shape.scalar(), expected_output=[(i - 1) * i, i * i])
def testTypeMismatch(self): reducer = grouping.Reducer( init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32), reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64), finalize_func=lambda x: x) dataset = dataset_ops.Dataset.range(10) with self.assertRaises(TypeError): dataset.apply( grouping.group_by_reducer(lambda _: np.int64(0), reducer))
def testSum(self): reducer = grouping.Reducer(init_func=lambda _: np.int64(0), reduce_func=lambda x, y: x + y, finalize_func=lambda x: x) for i in range(1, 11): dataset = dataset_ops.Dataset.range(2 * i).apply( grouping.group_by_reducer(lambda x: x % 2, reducer)) self.checkResults(dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
def testInvalidKeyType(self): reducer = grouping.Reducer(init_func=lambda x: np.int64(0), reduce_func=lambda x, y: x + y, finalize_func=lambda x: x) dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( ValueError, "`key_func` must return a single tf.int64 tensor."): dataset.apply(grouping.group_by_reducer(lambda _: "wrong", reducer))
def testTypeMismatch(self): reducer = grouping.Reducer( init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32), reduce_func=lambda x, y: constant_op.constant(1, dtype=dtypes.int64), finalize_func=lambda x: x) dataset = dataset_ops.Dataset.range(10) with self.assertRaisesRegexp( TypeError, "The element types for the new state must match the initial state."): dataset.apply( grouping.group_by_reducer(lambda _: np.int64(0), reducer))
def testConcat(self): components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray) reducer = grouping.Reducer(init_func=lambda x: "", reduce_func=lambda x, y: x + y[0], finalize_func=lambda x: x) for i in range(1, 11): dataset = dataset_ops.Dataset.zip( (dataset_ops.Dataset.from_tensor_slices(components), dataset_ops.Dataset.range(2 * i))).apply( grouping.group_by_reducer(lambda x, y: y % 2, reducer)) self.checkResults(dataset, shapes=tensor_shape.scalar(), values=[b"acegikmoqs"[:i], b"bdfhjlnprt"[:i]])
def testAverage(self): def reduce_fn(x, y): return (x[0] * x[1] + math_ops.cast(y, dtypes.float32)) / (x[1] + 1), x[1] + 1 reducer = grouping.Reducer(init_func=lambda _: (0.0, 0.0), reduce_func=reduce_fn, finalize_func=lambda x, _: x) for i in range(1, 11): dataset = dataset_ops.Dataset.range(2 * i).apply( grouping.group_by_reducer( lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer)) self.assertDatasetProduces(dataset, expected_shapes=tensor_shape.scalar(), expected_output=[i - 1, i])
def testSparseSum(self): def _sparse(i): return sparse_tensor.SparseTensorValue( indices=np.array([[0, 0]]), values=(i * np.array([1], dtype=np.int64)), dense_shape=np.array([1, 1])) reducer = grouping.Reducer( init_func=lambda _: _sparse(np.int64(0)), reduce_func=lambda x, y: _sparse(x.values[0] + y.values[0]), finalize_func=lambda x: x.values[0]) for i in range(1, 11): dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply( grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer)) self.checkResults( dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
def testTuple(self): def init_fn(_): return np.array([], dtype=np.int64), np.int64(0) def reduce_fn(state, value): s1, s2 = state v1, v2 = value return array_ops.concat([s1, [v1]], 0), s2 + v2 def finalize_fn(s1, s2): return s1, s2 reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn) dataset = dataset_ops.Dataset.zip( (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply( grouping.group_by_reducer(lambda x, y: np.int64(0), reducer)) get_next = self.getNext(dataset) x, y = self.evaluate(get_next()) self.assertAllEqual(x, np.asarray([x for x in range(10)])) self.assertEqual(y, 45)
def testStatefulGroupByReducerNotCheckpointable(self): stateful_key_func = self._statefulInt64Func key_func = lambda _: math_ops.cast(0, dtypes.int64) stateful_init_func = self._statefulBoolFunc init_func = lambda x: True stateful_reduce_func = lambda _, x: self._statefulBoolFunc(x) reduce_func = lambda _, x: True stateful_finalize_func = self._statefulBoolFunc finalize_func = lambda x: True test_cases = [ (stateful_key_func, init_func, reduce_func, finalize_func), (key_func, stateful_init_func, reduce_func, finalize_func), (key_func, init_func, stateful_reduce_func, finalize_func), (key_func, init_func, reduce_func, stateful_finalize_func), ] for key_func, init_func, reduce_func, finalize_func in test_cases: dataset = dataset_ops.Dataset.range(10) reducer = grouping.Reducer(init_func, reduce_func, finalize_func) dataset = dataset.apply(grouping.group_by_reducer(key_func, reducer)) self._assertNotCheckpointable(dataset)
def testTuple(self): def init_fn(_): return np.array([], dtype=np.int64), np.int64(0) def reduce_fn(state, value): s1, s2 = state v1, v2 = value return array_ops.concat([s1, [v1]], 0), s2 + v2 def finalize_fn(s1, s2): return s1, s2 reducer = grouping.Reducer(init_fn, reduce_fn, finalize_fn) dataset = dataset_ops.Dataset.zip( (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply( grouping.group_by_reducer(lambda x, y: np.int64(0), reducer)) get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: x, y = sess.run(get_next) self.assertAllEqual(x, np.asarray([x for x in range(10)])) self.assertEqual(y, 45)
def _padded_batch_dense_window(dataset, padded_shape, padding_value=None): """Batches a window of dense tensors with padding.""" padded_shape = math_ops.cast( convert.partial_shape_to_tensor(padded_shape), dtypes.int32) def key_fn(_): return np.int64(0) def max_init_fn(_): return padded_shape def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(array_ops.shape(value), padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", array_ops.shape(value), padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, array_ops.shape(value)) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) dataset_output_types = dataset_ops.get_legacy_output_types(dataset) if padding_value is None: if dataset_output_types == dtypes.string: padding_value = "" elif dataset_output_types == dtypes.bool: padding_value = False elif dataset_output_types == dtypes.variant: raise TypeError("Unable to create padding for field of type 'variant'") else: padding_value = 0 def batch_init_fn(_): batch_shape = array_ops.concat( [np.array([0], dtype=np.int32), padded_shape], 0) return gen_array_ops.empty(batch_shape, dtype=dataset_output_types) def batch_reduce_fn(state, value): return array_ops.concat([state, [value]], 0) def pad_fn(value): shape = array_ops.shape(value) left = array_ops.zeros_like(shape) right = padded_shape - shape return array_ops.pad( value, array_ops.stack([left, right], 1), constant_values=padding_value) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(pad_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))