def test_trim_and_pad_dataset(self): x = [{ "inputs": [7, 8, 5, 6, 1], "targets": [3, 9, 1], "idx": [0] }, { "inputs": [8, 4, 9, 3, 5, 7, 9, 1], "targets": [4, 1], "idx": [1, 2] }] ds = create_default_dataset(x, feature_names=("inputs", "targets", "idx")) padded_ds = utils.trim_and_pad_dataset(ds, feature_lengths={ "inputs": 7, "targets": 3 }) expected = [ { "inputs": [7, 8, 5, 6, 1, 0, 0], "targets": [3, 9, 1], "idx": [0], }, { # EOS is trimmed "inputs": [8, 4, 9, 3, 5, 7, 9], "targets": [4, 1, 0], "idx": [1, 2], } ] assert_dataset(padded_ds, expected, { "inputs": tf.int32, "targets": tf.int32 })
def _pack_or_pad(self, ds: tf.data.Dataset, packed_lengths: Mapping[str, int]) -> tf.data.Dataset: """Trim/pad to packed_lengths and optionally pack the input dataset.""" if self.pack: ds = utils.trim_and_pack_dataset(ds, packed_lengths, self._use_custom_packing_ops) else: ds = utils.trim_and_pad_dataset(ds, packed_lengths) return ds