def testPacking(self): packed = generator_utils.pack_examples( example_generator(), has_inputs=True, packed_length=5, queue_size=2, spacing=0) for example, reference in zip(packed, reference_packing(trim_right)): self.assertAllEqual(set(example.keys()), set(reference.keys())) for k in reference: self.assertAllEqual(example[k], reference[k])
def _maybe_pack_examples(self, generator): """Helper to generate_data().""" if self.packed_length: return generator_utils.pack_examples( generator, self.has_inputs, self.packed_length, chop_long_sequences=not self.has_inputs) else: return generator
def _maybe_pack_examples(self, generator): """Wraps generator with packer if self.packed_length.""" if not self.packed_length: return generator return generator_utils.pack_examples( generator, self.has_inputs, self.packed_length, chop_long_sequences=not self.has_inputs)
def process(self, source_target_list): example_dicts = [ self._make_spm_example_dict(source_text, target_text) for source_text, target_text in source_target_list ] if self._packed_examples: example_dicts = pack_examples( example_dicts, has_inputs=True, packed_length=self._packed_length) for example_dict in example_dicts: try: padded_example_dict = self._pad_example_dict(example_dict) except ValueError: metrics.Metrics.counter('err_too_long', 'count').inc() else: yield to_example(padded_example_dict).SerializeToString()