def test_pack_global_combiners(self): class MultipleCombines(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'mean-globally' >> combiners.Mean.Globally() _ = pcoll | 'count-globally' >> combiners.Count.Globally() pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] _ = pipeline | Create(vals) | 'multiple-combines' >> MultipleCombines() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api( default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [ translations.eliminate_common_key_with_none, translations.pack_combiners, ], known_runner_urns=frozenset()) key_with_void_stages = [ stage for stage in stages if 'KeyWithVoid' in stage.name ] self.assertEqual(len(key_with_void_stages), 1) self.assertIn('multiple-combines', key_with_void_stages[0].parent) self.assertNotIn('-globally', key_with_void_stages[0].parent) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('/Pack', combine_per_key_stages[0].name) self.assertIn('multiple-combines', combine_per_key_stages[0].parent) self.assertNotIn('-globally', combine_per_key_stages[0].parent)
def test_pack_combiners(self): class MultipleCombines(beam.PTransform): def expand(self, pcoll): _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1)) pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] _ = pipeline | Create([('a', x) for x in vals ]) | 'multiple-combines' >> MultipleCombines() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api(default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.pack_combiners], known_runner_urns=frozenset()) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('Packed', combine_per_key_stages[0].name) self.assertIn('Packed', combine_per_key_stages[0].transforms[0].unique_name) self.assertIn('multiple-combines', combine_per_key_stages[0].parent) self.assertNotIn('-perkey', combine_per_key_stages[0].parent)
def test_pack_combiners(self): pipeline = beam.Pipeline() vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6] pcoll = pipeline | 'start-perkey' >> Create([('a', x) for x in vals]) _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey() _ = pcoll | 'count-perkey' >> combiners.Count.PerKey() environment = environments.DockerEnvironment.from_options( pipeline_options.PortableOptions(sdk_location='container')) pipeline_proto = pipeline.to_runner_api(default_environment=environment) _, stages = translations.create_and_optimize_stages( pipeline_proto, [translations.pack_combiners], known_runner_urns=frozenset()) combine_per_key_stages = [] for stage in stages: for transform in stage.transforms: if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn: combine_per_key_stages.append(stage) self.assertEqual(len(combine_per_key_stages), 1) self.assertIn('/Pack', combine_per_key_stages[0].name)