Пример #1
0
    def test_pack_global_combiners(self):
        class MultipleCombines(beam.PTransform):
            def expand(self, pcoll):
                _ = pcoll | 'mean-globally' >> combiners.Mean.Globally()
                _ = pcoll | 'count-globally' >> combiners.Count.Globally()

        pipeline = beam.Pipeline()
        vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
        _ = pipeline | Create(vals) | 'multiple-combines' >> MultipleCombines()
        environment = environments.DockerEnvironment.from_options(
            pipeline_options.PortableOptions(sdk_location='container'))
        pipeline_proto = pipeline.to_runner_api(
            default_environment=environment)
        _, stages = translations.create_and_optimize_stages(
            pipeline_proto, [
                translations.eliminate_common_key_with_none,
                translations.pack_combiners,
            ],
            known_runner_urns=frozenset())
        key_with_void_stages = [
            stage for stage in stages if 'KeyWithVoid' in stage.name
        ]
        self.assertEqual(len(key_with_void_stages), 1)
        self.assertIn('multiple-combines', key_with_void_stages[0].parent)
        self.assertNotIn('-globally', key_with_void_stages[0].parent)

        combine_per_key_stages = []
        for stage in stages:
            for transform in stage.transforms:
                if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
                    combine_per_key_stages.append(stage)
        self.assertEqual(len(combine_per_key_stages), 1)
        self.assertIn('/Pack', combine_per_key_stages[0].name)
        self.assertIn('multiple-combines', combine_per_key_stages[0].parent)
        self.assertNotIn('-globally', combine_per_key_stages[0].parent)
Пример #2
0
  def test_pack_combiners(self):
    class MultipleCombines(beam.PTransform):
      def expand(self, pcoll):
        _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
        _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()
        _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1))

    pipeline = beam.Pipeline()
    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    _ = pipeline | Create([('a', x) for x in vals
                           ]) | 'multiple-combines' >> MultipleCombines()
    environment = environments.DockerEnvironment.from_options(
        pipeline_options.PortableOptions(sdk_location='container'))
    pipeline_proto = pipeline.to_runner_api(default_environment=environment)
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations.pack_combiners],
        known_runner_urns=frozenset())
    combine_per_key_stages = []
    for stage in stages:
      for transform in stage.transforms:
        if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
          combine_per_key_stages.append(stage)
    self.assertEqual(len(combine_per_key_stages), 1)
    self.assertIn('Packed', combine_per_key_stages[0].name)
    self.assertIn('Packed', combine_per_key_stages[0].transforms[0].unique_name)
    self.assertIn('multiple-combines', combine_per_key_stages[0].parent)
    self.assertNotIn('-perkey', combine_per_key_stages[0].parent)
Пример #3
0
  def test_pack_combiners(self):
    pipeline = beam.Pipeline()
    vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
    pcoll = pipeline | 'start-perkey' >> Create([('a', x) for x in vals])
    _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
    _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()

    environment = environments.DockerEnvironment.from_options(
        pipeline_options.PortableOptions(sdk_location='container'))
    pipeline_proto = pipeline.to_runner_api(default_environment=environment)
    _, stages = translations.create_and_optimize_stages(
        pipeline_proto, [translations.pack_combiners],
        known_runner_urns=frozenset())
    combine_per_key_stages = []
    for stage in stages:
      for transform in stage.transforms:
        if transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn:
          combine_per_key_stages.append(stage)
    self.assertEqual(len(combine_per_key_stages), 1)
    self.assertIn('/Pack', combine_per_key_stages[0].name)