def test_non_liftable_combine(self):
     test_options = PipelineOptions(flags=['--allow_unsafe_triggers'])
     run_combine(TestPipeline(runner=self.runner(), options=test_options),
                 lift_combiners=False)
     self._assert_teardown_called()
Пример #2
0
    def test_compute_top_sessions(self):
        with TestPipeline() as p:
            edits = p | beam.Create(self.EDITS)
            result = edits | top_wikipedia_sessions.ComputeTopSessions(1.0)

            assert_that(result, equal_to(self.EXPECTED))
Пример #3
0
 def test_forwards_batch_args(self):
     examples = list(range(100))
     with TestPipeline() as pipeline:
         pcoll = pipeline | 'start' >> beam.Create(examples)
         actual = pcoll | base.RunInference(FakeModelHandlerNeedsBigBatch())
         assert_that(actual, equal_to(examples), label='assert:inferences')
Пример #4
0
 def test_singleton(self):
   with TestPipeline() as p:
     data = [389]
     pc = p | Create(data)
     qunatiles = pc | beam.ApproximateQuantiles.Globally(5)
     assert_that(qunatiles, equal_to([[389, 389, 389, 389, 389]]))
Пример #5
0
 def setUp(self):
   self.pipeline = TestPipeline(is_integration_test=True)
   self.inputOptions = json.loads(self.pipeline.get_option('input_options'))
Пример #6
0
 def test_combine_globally_with_default(self):
     with TestPipeline() as p:
         assert_that(p | Create([]) | CombineGlobally(sum), equal_to([0]))
Пример #7
0
 def test_globally_empty(self):
     l = []
     with TestPipeline() as p:
         pc = p | Create(l) | Map(lambda x: x)
         latest = pc | combine.Latest.Globally()
         assert_that(latest, equal_to([None]))
Пример #8
0
 def test_tostring_iterables_with_delimeter(self):
     with TestPipeline() as p:
         data = [("one", "two", "three"), ("four", "five", "six")]
         result = (p | beam.Create(data) | util.ToString.Iterables("\t"))
         assert_that(result,
                     equal_to(["one\ttwo\tthree", "four\tfive\tsix"]))
Пример #9
0
 def test_tostring_kvs(self):
     with TestPipeline() as p:
         result = (p | beam.Create([("one", 1),
                                    ("two", 2)]) | util.ToString.Kvs())
         assert_that(result, equal_to(["one,1", "two,2"]))
Пример #10
0
    def test_tostring_elements(self):

        with TestPipeline() as p:
            result = (p | beam.Create([1, 1, 2, 3]) | util.ToString.Element())
            assert_that(result, equal_to(["1", "1", "2", "3"]))
Пример #11
0
 def test_tostring_iterables(self):
     with TestPipeline() as p:
         result = (p | beam.Create([("one", "two", "three"),
                                    ("four", "five", "six")])
                   | util.ToString.Iterables())
         assert_that(result, equal_to(["one,two,three", "four,five,six"]))
Пример #12
0
 def test_callable_k(self):
     with TestPipeline() as p:
         pc = p | beam.Create(self.l)
         with_keys = pc | util.WithKeys(lambda x: x * x)
     assert_that(with_keys, equal_to([(1, 1), (4, 2), (9, 3)]))
Пример #13
0
 def test_reshuffle_contents_unchanged(self):
     pipeline = TestPipeline()
     data = [(1, 1), (2, 1), (3, 1), (1, 2), (2, 2), (1, 3)]
     result = (pipeline | beam.Create(data) | beam.Reshuffle())
     assert_that(result, equal_to(data))
     pipeline.run()
 def test_combining_value_state(self):
     run_pardo(TestPipeline(runner=self.runner()))
     self._assert_teardown_called()
Пример #15
0
 def test_create_transform(self):
     with TestPipeline() as p:
         assert_that(p | Create(range(10)), equal_to(range(10)))
Пример #16
0
 def test_tostring_kvs_delimeter(self):
     with TestPipeline() as p:
         result = (p | beam.Create([("one", 1),
                                    ("two", 2)]) | util.ToString.Kvs("\t"))
         assert_that(result, equal_to(["one\t1", "two\t2"]))
Пример #17
0
 def test_empty_global_top(self):
     with TestPipeline() as p:
         assert_that(p | beam.Create([]) | combine.Top.Largest(10),
                     equal_to([[]]))
Пример #18
0
 def test_leader_board_users(self):
     test_options = PipelineOptions(flags=['--allow_unsafe_triggers'])
     with TestPipeline(options=test_options) as p:
         result = (self.create_data(p)
                   | leader_board.CalculateUserScores(allowed_lateness=120))
         assert_that(result, equal_to([]))
Пример #19
0
 def test_combine_globally_without_default(self):
     with TestPipeline() as p:
         result = p | Create([]) | CombineGlobally(sum).without_defaults()
         assert_that(result, equal_to([]))
Пример #20
0
 def test_create_singleton_pcollection(self):
   with TestPipeline() as pipeline:
     pcoll = pipeline | 'label' >> Create([[1, 2, 3]])
     assert_that(pcoll, equal_to([[1, 2, 3]]))
Пример #21
0
 def test_per_key_empty(self):
     l = []
     with TestPipeline() as p:
         pc = p | Create(l) | Map(lambda x: x)
         latest = pc | combine.Latest.PerKey()
         assert_that(latest, equal_to([]))
Пример #22
0
 def test_fake_read(self):
   with TestPipeline() as pipeline:
     pcoll = pipeline | 'read' >> Read(FakeSource([1, 2, 3]))
     assert_that(pcoll, equal_to([1, 2, 3]))
Пример #23
0
 def test_log_distribution(self):
   with TestPipeline() as p:
     data = [int(math.log(x)) for x in range(1, 1000)]
     pc = p | Create(data)
     quantiles = pc | beam.ApproximateQuantiles.Globally(5)
     assert_that(quantiles, equal_to([[0, 5, 6, 6, 6]]))
Пример #24
0
 def test_apply_custom_transform(self):
   with TestPipeline() as pipeline:
     pcoll = pipeline | 'pcoll' >> Create([1, 2, 3])
     result = pcoll | PipelineTest.CustomTransform()
     assert_that(result, equal_to([2, 3, 4]))
Пример #25
0
def test_output():
    options = PipelineOptions()
    options.view_as(StandardOptions).streaming = True
    test_pipeline = TestPipeline(options=options)

    events = (test_pipeline
              | TestStream().add_elements(
                  elements=["event"],
                  event_timestamp=datetime(
                      2021, 3, 1, 0, 0, 1, 0,
                      tzinfo=pytz.UTC).timestamp()).add_elements(
                          elements=["event"],
                          event_timestamp=datetime(
                              2021, 3, 1, 0, 0, 2, 0,
                              tzinfo=pytz.UTC).timestamp()).add_elements(
                                  elements=["event"],
                                  event_timestamp=datetime(
                                      2021, 3, 1, 0, 0, 3, 0,
                                      tzinfo=pytz.UTC).timestamp()).
              add_elements(
                  elements=["event"],
                  event_timestamp=datetime(
                      2021, 3, 1, 0, 0, 4, 0,
                      tzinfo=pytz.UTC).timestamp()).advance_watermark_to(
                          datetime(
                              2021, 3, 1, 0, 0, 5, 0,
                              tzinfo=pytz.UTC).timestamp()).add_elements(
                                  elements=["event"],
                                  event_timestamp=datetime(
                                      2021, 3, 1, 0, 0, 5, 0,
                                      tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3, 1, 0, 0, 6, 0,
                               tzinfo=pytz.UTC).timestamp()).add_elements(
                                   elements=["event"],
                                   event_timestamp=datetime(
                                       2021, 3, 1, 0, 0, 7, 0,
                                       tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3, 1, 0, 0, 8, 0,
                               tzinfo=pytz.UTC).timestamp()).add_elements(
                                   elements=["event"],
                                   event_timestamp=datetime(
                                       2021, 3, 1, 0, 0, 9, 0,
                                       tzinfo=pytz.UTC).timestamp()).
              advance_watermark_to(
                  datetime(
                      2021, 3, 1, 0, 0, 10, 0,
                      tzinfo=pytz.UTC).timestamp()).add_elements(
                          elements=["event"],
                          event_timestamp=datetime(
                              2021, 3, 1, 0, 0, 10, 0,
                              tzinfo=pytz.UTC).timestamp()).add_elements(
                                  elements=["event"],
                                  event_timestamp=datetime(
                                      2021, 3, 1, 0, 0, 11, 0,
                                      tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3, 1, 0, 0, 12, 0, tzinfo=pytz.UTC).
                           timestamp()).add_elements(
                               elements=["event"],
                               event_timestamp=datetime(
                                   2021, 3, 1, 0, 0, 13, 0,
                                   tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3,
                               1, 0, 0, 14, 0, tzinfo=pytz.UTC).timestamp()).
              advance_watermark_to(
                  datetime(
                      2021, 3, 1, 0, 0, 15, 0,
                      tzinfo=pytz.UTC).timestamp()).add_elements(
                          elements=["event"],
                          event_timestamp=datetime(
                              2021, 3, 1, 0, 0, 15, 0,
                              tzinfo=pytz.UTC).timestamp()).add_elements(
                                  elements=["event"],
                                  event_timestamp=datetime(
                                      2021, 3, 1, 0, 0, 16, 0,
                                      tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3, 1, 0, 0, 17, 0, tzinfo=pytz.UTC).
                           timestamp()).add_elements(
                               elements=["event"],
                               event_timestamp=datetime(
                                   2021, 3, 1, 0, 0, 18, 0,
                                   tzinfo=pytz.UTC).timestamp()).
              add_elements(elements=["event"],
                           event_timestamp=datetime(
                               2021, 3,
                               1, 0, 0, 19, 0, tzinfo=pytz.UTC).timestamp()).
              advance_watermark_to(
                  datetime(2021, 3, 1, 0, 0, 20, 0,
                           tzinfo=pytz.UTC).timestamp()
              ).add_elements(
                  elements=["event"],
                  event_timestamp=datetime(
                      2021, 3, 1, 0, 0, 20, 0,
                      tzinfo=pytz.UTC).timestamp()).advance_watermark_to(
                          datetime(2021, 3, 1, 0, 0, 25, 0, tzinfo=pytz.UTC).
                          timestamp()).advance_watermark_to_infinity())

    results = apply_transform(events)

    answers = {
        window.IntervalWindow(
            datetime(2021, 3, 1, 0, 0, 0, 0, tzinfo=pytz.UTC).timestamp(),
            datetime(2021, 3, 1, 0, 0, 5, 0, tzinfo=pytz.UTC).timestamp()):
        [4],
        window.IntervalWindow(
            datetime(2021, 3, 1, 0, 0, 5, 0, tzinfo=pytz.UTC).timestamp(),
            datetime(2021, 3, 1, 0, 0, 10, 0, tzinfo=pytz.UTC).timestamp()):
        [5],
        window.IntervalWindow(
            datetime(2021, 3, 1, 0, 0, 10, 0, tzinfo=pytz.UTC).timestamp(),
            datetime(2021, 3, 1, 0, 0, 15, 0, tzinfo=pytz.UTC).timestamp()):
        [5],
        window.IntervalWindow(
            datetime(2021, 3, 1, 0, 0, 15, 0, tzinfo=pytz.UTC).timestamp(),
            datetime(2021, 3, 1, 0, 0, 20, 0, tzinfo=pytz.UTC).timestamp()):
        [5],
        window.IntervalWindow(
            datetime(2021, 3, 1, 0, 0, 20, 0, tzinfo=pytz.UTC).timestamp(),
            datetime(2021, 3, 1, 0, 0, 25, 0, tzinfo=pytz.UTC).timestamp()):
        [1],
    }

    assert_that(results,
                equal_to_per_window(answers),
                label='count assert per window')

    test_pipeline.run()
Пример #26
0
  def test_ptransform_override_multiple_outputs(self):
    class MultiOutputComposite(PTransform):
      def __init__(self):
        self.output_tags = set()

      def expand(self, pcoll):
        def mux_input(x):
          x = x * 2
          if isinstance(x, int):
            yield TaggedOutput('numbers', x)
          else:
            yield TaggedOutput('letters', x)

        multi = pcoll | 'MyReplacement' >> beam.ParDo(mux_input).with_outputs()
        letters = multi.letters | 'LettersComposite' >> beam.Map(
            lambda x: x * 3)
        numbers = multi.numbers | 'NumbersComposite' >> beam.Map(
            lambda x: x * 5)

        return {
            'letters': letters,
            'numbers': numbers,
        }

    class MultiOutputOverride(PTransformOverride):
      def matches(self, applied_ptransform):
        return applied_ptransform.full_label == 'MyMultiOutput'

      def get_replacement_transform(self, ptransform):
        return MultiOutputComposite()

    def mux_input(x):
      if isinstance(x, int):
        yield TaggedOutput('numbers', x)
      else:
        yield TaggedOutput('letters', x)

    with TestPipeline() as p:
      multi = (
          p
          | beam.Create([1, 2, 3, 'a', 'b', 'c'])
          | 'MyMultiOutput' >> beam.ParDo(mux_input).with_outputs())
      letters = multi.letters | 'MyLetters' >> beam.Map(lambda x: x)
      numbers = multi.numbers | 'MyNumbers' >> beam.Map(lambda x: x)

      # Assert that the PCollection replacement worked correctly and that
      # elements are flowing through. The replacement transform first
      # multiples by 2 then the leaf nodes inside the composite multiply by
      # an additional 3 and 5. Use prime numbers to ensure that each
      # transform is getting executed once.
      assert_that(
          letters,
          equal_to(['a' * 2 * 3, 'b' * 2 * 3, 'c' * 2 * 3]),
          label='assert letters')
      assert_that(
          numbers,
          equal_to([1 * 2 * 5, 2 * 2 * 5, 3 * 2 * 5]),
          label='assert numbers')

      # Do the replacement and run the element assertions.
      p.replace_all([MultiOutputOverride()])

    # The following checks the graph to make sure the replacement occurred.
    visitor = PipelineTest.Visitor(visited=[])
    p.visit(visitor)
    pcollections = visitor.visited
    composites = visitor.enter_composite

    # Assert the replacement is in the composite list and retrieve the
    # AppliedPTransform.
    self.assertIn(
        MultiOutputComposite, [t.transform.__class__ for t in composites])
    multi_output_composite = list(
        filter(
            lambda t: t.transform.__class__ == MultiOutputComposite,
            composites))[0]

    # Assert that all of the replacement PCollections are in the graph.
    for output in multi_output_composite.outputs.values():
      self.assertIn(output, pcollections)

    # Assert that all of the "old"/replaced PCollections are not in the graph.
    self.assertNotIn(multi[None], visitor.visited)
    self.assertNotIn(multi.letters, visitor.visited)
    self.assertNotIn(multi.numbers, visitor.visited)
Пример #27
0
    def test_multiple_outputs_with_watermark_advancement(self):
        """Tests that the TestStream can independently control output watermarks."""

        # Purposely set the watermark of numbers to 20 then letters to 5 to test
        # that the watermark advancement is per PCollection.
        #
        # This creates two PCollections, (a, b, c) and (1, 2, 3). These will be
        # emitted at different times so that they will have different windows. The
        # watermark advancement is checked by checking their windows. If the
        # watermark does not advance, then the windows will be [-inf, -inf). If the
        # windows do not advance separately, then the PCollections will both
        # windowed in [15, 30).
        letters_elements = [
            TimestampedValue('a', 6),
            TimestampedValue('b', 7),
            TimestampedValue('c', 8),
        ]
        numbers_elements = [
            TimestampedValue('1', 21),
            TimestampedValue('2', 22),
            TimestampedValue('3', 23),
        ]
        test_stream = (TestStream().advance_watermark_to(
            0, tag='letters').advance_watermark_to(
                0, tag='numbers').advance_watermark_to(
                    20, tag='numbers').advance_watermark_to(
                        5, tag='letters').add_elements(
                            letters_elements,
                            tag='letters').advance_watermark_to(
                                10, tag='letters').add_elements(
                                    numbers_elements,
                                    tag='numbers').advance_watermark_to(
                                        30, tag='numbers'))

        options = StandardOptions(streaming=True)
        p = TestPipeline(options=options)

        main = p | test_stream

        # Use an AfterWatermark trigger with an early firing to test that the
        # watermark is advancing properly and that the element is being emitted in
        # the correct window.
        letters = (
            main['letters']
            | 'letter windows' >> beam.WindowInto(
                FixedWindows(15),
                trigger=trigger.AfterWatermark(early=trigger.AfterCount(1)),
                accumulation_mode=trigger.AccumulationMode.DISCARDING)
            | 'letter with key' >> beam.Map(lambda x: ('k', x))
            | 'letter gbk' >> beam.GroupByKey())

        numbers = (
            main['numbers']
            | 'number windows' >> beam.WindowInto(
                FixedWindows(15),
                trigger=trigger.AfterWatermark(early=trigger.AfterCount(1)),
                accumulation_mode=trigger.AccumulationMode.DISCARDING)
            | 'number with key' >> beam.Map(lambda x: ('k', x))
            | 'number gbk' >> beam.GroupByKey())

        # The letters were emitted when the watermark was at 5, thus we expect to
        # see the elements in the [0, 15) window. We used an early trigger to make
        # sure that the ON_TIME empty pane was also emitted with a TestStream.
        # This pane has no data because of the early trigger causes the elements to
        # fire before the end of the window and because the accumulation mode
        # discards any data after the trigger fired.
        expected_letters = {
            window.IntervalWindow(0, 15): [
                ('k', ['a', 'b', 'c']),
                ('k', []),
            ],
        }

        # Same here, except the numbers were emitted at watermark = 20, thus they
        # are in the [15, 30) window.
        expected_numbers = {
            window.IntervalWindow(15, 30): [
                ('k', ['1', '2', '3']),
                ('k', []),
            ],
        }
        assert_that(letters,
                    equal_to_per_window(expected_letters),
                    label='letters assert per window')
        assert_that(numbers,
                    equal_to_per_window(expected_numbers),
                    label='numbers assert per window')

        p.run()
Пример #28
0
 def test_timestamp_param_map(self):
   with TestPipeline() as p:
     assert_that(
         p | Create([1, 2]) | beam.Map(lambda _, t=DoFn.TimestampParam: t),
         equal_to([MIN_TIMESTAMP, MIN_TIMESTAMP]))
Пример #29
0
 def test_constant_k(self):
   with TestPipeline() as p:
     pc = p | beam.Create(self.l)
     with_keys = pc | util.WithKeys('k')
   assert_that(with_keys, equal_to([('k', 1), ('k', 2), ('k', 3)], ))
 def test_combine(self):
     run_combine(TestPipeline(runner=self.runner()))
     self._assert_teardown_called()