def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" if not sparse.any_sparse(dataset.output_classes): return _UnbatchDataset(dataset) # NOTE(mrry): We must ensure that any SparseTensors in `dataset` # are normalized to the rank-1 dense representation, so that the # sparse-oblivious unbatching logic will slice them # appropriately. This leads to a somewhat inefficient re-encoding step # for all SparseTensor components. # TODO(mrry): Consider optimizing this in future # if it turns out to be a bottleneck. def normalize(arg, *rest): if rest: return sparse.serialize_many_sparse_tensors((arg,) + rest) else: return sparse.serialize_many_sparse_tensors(arg) normalized_dataset = dataset.map(normalize) # NOTE(mrry): Our `map()` has lost information about the sparseness # of any SparseTensor components, so re-apply the structure of the # original dataset. restructured_dataset = _RestructuredDataset( normalized_dataset, dataset.output_types, dataset.output_shapes, dataset.output_classes, allow_unsafe_cast=True) return _UnbatchDataset(restructured_dataset)
def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """Initialize `PrependFromQueueAndPaddedBatchDataset`.""" super(_PrependFromQueueAndPaddedBatchDataset, self).__init__() if sparse.any_sparse(input_dataset.output_classes): raise TypeError( "Batching of padded sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor(batch_size, dtype=dtypes.int64, name="batch_size") # pylint: disable=protected-access if padded_shapes is None: self._padded_shapes = nest.map_structure( dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes) else: self._padded_shapes = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor, padded_shapes) padding_values = (padding_values if padding_values is not None else dataset_ops._default_padding(input_dataset)) self._padding_values = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._padding_value_to_tensor, padding_values, input_dataset.output_types)
def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" if not sparse.any_sparse(dataset.output_classes): return _UnbatchDataset(dataset) # NOTE(mrry): We must ensure that any SparseTensors in `dataset` # are normalized to the rank-1 dense representation, so that the # sparse-oblivious unbatching logic will slice them # appropriately. This leads to a somewhat inefficient re-encoding step # for all SparseTensor components. # TODO(mrry): Consider optimizing this in future # if it turns out to be a bottleneck. def normalize(arg, *rest): if rest: return sparse.serialize_many_sparse_tensors((arg,) + rest) else: return sparse.serialize_many_sparse_tensors(arg) normalized_dataset = dataset.map(normalize) # NOTE(mrry): Our `map()` has lost information about the sparseness # of any SparseTensor components, so re-apply the structure of the # original dataset. restructured_dataset = _RestructuredDataset( normalized_dataset, dataset_ops.get_legacy_output_types(dataset), dataset_ops.get_legacy_output_shapes(dataset), dataset_ops.get_legacy_output_classes(dataset), allow_unsafe_cast=True) return _UnbatchDataset(restructured_dataset)
def testAnySparse(self): test_cases = (((), False), ((None), False), ((dtypes.string), False), ((None, -1, dtypes.string), False), ((sparse.SparseType(dtypes.string)), True), ((None, sparse.SparseType(dtypes.string)), True), ((sparse.SparseType(dtypes.string), dtypes.string), True), ((((sparse.SparseType(dtypes.string)))), True)) for test_case in test_cases: self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1])
def __init__(self, input_dataset, map_func, batch_size, num_parallel_batches): """See `Dataset.map()` for details.""" super(_MapAndBatchDataset, self).__init__(input_dataset, map_func) if sparse.any_sparse(self._output_types): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") self._num_parallel_batches = ops.convert_to_tensor( num_parallel_batches, dtype=dtypes.int64, name="num_parallel_batches")
def testAnySparse(self): test_cases = ( ((), False), ((None), False), ((dtypes.string), False), ((None, -1, dtypes.string), False), ((sparse.SparseType(dtypes.string)), True), ((None, sparse.SparseType(dtypes.string)), True), ((sparse.SparseType(dtypes.string), dtypes.string), True), ((((sparse.SparseType(dtypes.string)))), True) ) for test_case in test_cases: self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1])
def testAnySparse(self): test_cases = ( { "classes": (), "expected": False }, { "classes": (ops.Tensor), "expected": False }, { "classes": (((ops.Tensor))), "expected": False }, { "classes": (ops.Tensor, ops.Tensor), "expected": False }, { "classes": (ops.Tensor, sparse_tensor.SparseTensor), "expected": True }, { "classes": (sparse_tensor.SparseTensor, sparse_tensor.SparseTensor), "expected": True }, { "classes": (sparse_tensor.SparseTensor, ops.Tensor), "expected": True }, { "classes": (((sparse_tensor.SparseTensor))), "expected": True }, ) for test_case in test_cases: self.assertEqual( sparse.any_sparse(test_case["classes"]), test_case["expected"])
def testAnySparse(self): test_cases = ( { "classes": (), "expected": False }, { "classes": (ops.Tensor), "expected": False }, { "classes": (((ops.Tensor))), "expected": False }, { "classes": (ops.Tensor, ops.Tensor), "expected": False }, { "classes": (ops.Tensor, sparse_tensor.SparseTensor), "expected": True }, { "classes": (sparse_tensor.SparseTensor, sparse_tensor.SparseTensor), "expected": True }, { "classes": (sparse_tensor.SparseTensor, ops.Tensor), "expected": True }, { "classes": (((sparse_tensor.SparseTensor))), "expected": True }, ) for test_case in test_cases: self.assertEqual( sparse.any_sparse(test_case["classes"]), test_case["expected"])
def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """Initialize `PrependFromQueueAndPaddedBatchDataset`.""" super(_PrependFromQueueAndPaddedBatchDataset, self).__init__() if sparse.any_sparse(input_dataset.output_classes): raise TypeError( "Batching of padded sparse tensors is not currently supported") self._input_dataset = input_dataset self._batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") # pylint: disable=protected-access if padded_shapes is None: self._padded_shapes = nest.map_structure( dataset_ops._partial_shape_to_tensor, input_dataset.output_shapes) else: self._padded_shapes = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._partial_shape_to_tensor, padded_shapes) padding_values = ( padding_values if padding_values is not None else dataset_ops._default_padding(input_dataset)) self._padding_values = nest.map_structure_up_to( input_dataset.output_shapes, dataset_ops._padding_value_to_tensor, padding_values, input_dataset.output_types)
def _apply_fn(dataset): if sparse.any_sparse(dataset.output_types): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") return _MapAndBatchDataset(dataset, map_func, batch_size, num_parallel_batches)
def testAnySparse(self, classes_fn, expected): classes = classes_fn() self.assertEqual(sparse.any_sparse(classes), expected)