Пример #1
0
    def test_top_shorthands(self):
        with TestPipeline() as pipeline:

            pcoll = pipeline | 'start' >> Create(
                [6, 3, 1, 1, 9, 1, 5, 2, 0, 6])
            result_top = pcoll | 'top' >> beam.CombineGlobally(
                combine.Largest(5))
            result_bot = pcoll | 'bot' >> beam.CombineGlobally(
                combine.Smallest(4))
            assert_that(result_top,
                        equal_to([[9, 6, 6, 5, 3]]),
                        label='assert:top')
            assert_that(result_bot,
                        equal_to([[0, 1, 1, 1]]),
                        label='assert:bot')

            pcoll = pipeline | 'start-perkey' >> Create(
                [('a', x) for x in [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]])
            result_ktop = pcoll | 'top-perkey' >> beam.CombinePerKey(
                combine.Largest(5))
            result_kbot = pcoll | 'bot-perkey' >> beam.CombinePerKey(
                combine.Smallest(4))
            assert_that(result_ktop,
                        equal_to([('a', [9, 6, 6, 5, 3])]),
                        label='ktop')
            assert_that(result_kbot,
                        equal_to([('a', [0, 1, 1, 1])]),
                        label='kbot')
Пример #2
0
  def test_combine_per_key_top_display_data(self):
    def individual_test_per_key_dd(combineFn):
      transform = beam.CombinePerKey(combineFn)
      dd = DisplayData.create_from(transform)
      expected_items = [
          DisplayDataItemMatcher('combine_fn', combineFn.__class__),
          DisplayDataItemMatcher('n', combineFn._n),
          DisplayDataItemMatcher('compare', combineFn._compare.__name__)]
      hc.assert_that(dd.items, hc.contains_inanyorder(*expected_items))

    individual_test_per_key_dd(combine.Largest(5))
    individual_test_per_key_dd(combine.Smallest(3))
    individual_test_per_key_dd(combine.TopCombineFn(8))
    individual_test_per_key_dd(combine.Largest(5))
Пример #3
0
 def expand(self, pcoll):
     # These CombinePerKey stages will be packed if and only if
     # translations.pack_combiners is enabled in the TestPipeline runner.
     assert_that(pcoll | 'mean-perkey' >> combiners.Mean.PerKey(),
                 equal_to([('a', 3.4)]),
                 label='assert-mean-perkey')
     assert_that(pcoll | 'count-perkey' >> combiners.Count.PerKey(),
                 equal_to([('a', 10)]),
                 label='assert-count-perkey')
     assert_that(pcoll
                 | 'largest-perkey' >> core.CombinePerKey(
                     combiners.Largest(1)),
                 equal_to([('a', [9])]),
                 label='assert-largest-perkey')
Пример #4
0
 def expand(self, pcoll):
   # These CombineGlobally stages will be packed if and only if
   # translations.eliminate_common_key_with_void and
   # translations.pack_combiners are enabled in the TestPipeline runner.
   assert_that(
       pcoll | 'mean-globally' >> combiners.Mean.Globally(),
       equal_to([3.4]),
       label='assert-mean-globally')
   assert_that(
       pcoll | 'count-globally' >> combiners.Count.Globally(),
       equal_to([10]),
       label='assert-count-globally')
   assert_that(
       pcoll
       | 'largest-globally' >> core.CombineGlobally(combiners.Largest(1)),
       equal_to([[9]]),
       label='assert-largest-globally')
Пример #5
0
 def test_optimize_multiple_combine_globally(self):
   pipeline = beam.Pipeline()
   vals = [6, 3, 1, 1, 9, 1, 5, 2, 0, 6]
   pcoll = pipeline | Create(vals)
   _ = pcoll | 'mean-globally' >> combiners.Mean.Globally()
   _ = pcoll | 'count-globally' >> combiners.Count.Globally()
   _ = pcoll | 'largest-globally' >> core.CombineGlobally(combiners.Largest(1))
   pipeline_proto = pipeline.to_runner_api()
   optimized_pipeline_proto = translations.optimize_pipeline(
       pipeline_proto, [
           translations.pack_combiners,
       ],
       known_runner_urns=frozenset(),
       partial=True)
   # Tests that Pipeline.from_runner_api() does not throw an exception.
   runner = runners.DirectRunner()
   beam.Pipeline.from_runner_api(
       optimized_pipeline_proto, runner, pipeline_options.PipelineOptions())
Пример #6
0
 def expand(self, pcoll):
   _ = pcoll | 'mean-perkey' >> combiners.Mean.PerKey()
   _ = pcoll | 'count-perkey' >> combiners.Count.PerKey()
   _ = pcoll | 'largest-perkey' >> core.CombinePerKey(combiners.Largest(1))
Пример #7
0
 def expand(self, pcoll):
   _ = pcoll | 'mean-globally' >> combiners.Mean.Globally()
   _ = pcoll | 'count-globally' >> combiners.Count.Globally()
   _ = pcoll | 'largest-globally' >> core.CombineGlobally(
       combiners.Largest(1))