Пример #1
0
 def test_fixed_watermark_with_early(self):
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterWatermark(early=AfterCount(2)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(0, 10): [set('ab'), set('abc')]},
         2)
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterWatermark(early=AfterCount(2)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(0, 10): [set('abc'), set('abc')]},
         3)
Пример #2
0
 def test_sessions_repeatedly_after_count(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         Repeatedly(AfterCount(2)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')],
         {IntervalWindow(1, 25): [set('abc'), set('abcde')]},
         1,
         3)
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         Repeatedly(AfterCount(2)),
         AccumulationMode.DISCARDING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')],
         {IntervalWindow(1, 25): [set('abc'), set('de')]},
         1,
         3)
Пример #3
0
 def test_fixed_after_count(self):
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterCount(2),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c'), (11, 'z')],
         {IntervalWindow(0, 10): [set('ab')]},
         1,
         2)
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterCount(2),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c'), (11, 'z')],
         {IntervalWindow(0, 10): [set('abc')]},
         3,
         4)
Пример #4
0
 def test_fixed_watermark_with_early_late(self):
     self.run_trigger_simple(
         FixedWindows(100),  # pyformat break
         AfterWatermark(early=AfterCount(3), late=AfterCount(2)),
         AccumulationMode.DISCARDING,
         zip(range(9), 'abcdefghi'),
         {
             IntervalWindow(0, 100): [
                 set('abcd'),
                 set('efgh'),  # early
                 set('i'),  # on time
                 set('vw'),
                 set('xy')  # late
             ]
         },
         2,
         late_data=zip(range(5), 'vwxyz'))
Пример #5
0
 def test_sessions_after_all(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterAll(AfterCount(2), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(1, 13): [set('abc')]},
         1,
         2)
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterAll(AfterCount(5), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(1, 13): [set('abcxy')]},
         1,
         2,
         late_data=[(1, 'x'), (2, 'y'), (3, 'z')])
Пример #6
0
 def test_fixed_after_first(self):
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterFirst(AfterCount(2), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(0, 10): [set('ab')]},
         1,
         2)
     self.run_trigger_simple(
         FixedWindows(10),  # pyformat break
         AfterFirst(AfterCount(5), AfterWatermark()),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (2, 'b'), (3, 'c')],
         {IntervalWindow(0, 10): [set('abc')]},
         1,
         2,
         late_data=[(1, 'x'), (2, 'y'), (3, 'z')])
Пример #7
0
 def test_sessions_watermark_with_early_late(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterWatermark(early=AfterCount(2), late=AfterCount(1)),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (7, 'c'), (30, 'd')],
         {
             IntervalWindow(1, 25): [
                 set('abc'),  # early
                 set('abc'),  # on time
                 set('abcxy')  # late
             ],
             IntervalWindow(30, 40): [
                 set('d'),  # on time
             ],
             IntervalWindow(1, 40): [
                 set('abcdxyz')  # late
             ],
         },
         2,
         late_data=[(1, 'x'), (2, 'y'), (21, 'z')])
Пример #8
0
    def test_sessions_after_each(self):
        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            AfterEach(AfterCount(2), AfterCount(3)),
            AccumulationMode.ACCUMULATING,
            zip(range(10), 'abcdefghij'),
            {
                IntervalWindow(0, 11): [set('ab')],
                IntervalWindow(0, 15): [set('abcdef')]
            },
            2)

        self.run_trigger_simple(
            Sessions(10),  # pyformat break
            Repeatedly(AfterEach(AfterCount(2), AfterCount(3))),
            AccumulationMode.ACCUMULATING,
            zip(range(10), 'abcdefghij'),
            {
                IntervalWindow(0, 11): [set('ab')],
                IntervalWindow(0, 15): [set('abcdef')],
                IntervalWindow(0, 17): [set('abcdefgh')]
            },
            2)
Пример #9
0
 def test_sessions_after_count(self):
     self.run_trigger_simple(
         Sessions(10),  # pyformat break
         AfterCount(2),
         AccumulationMode.ACCUMULATING,
         [(1, 'a'), (15, 'b'), (6, 'c'), (30, 's'), (31, 't'), (50, 'z'),
          (50, 'y')],
         {
             IntervalWindow(1, 25): [set('abc')],
             IntervalWindow(30, 41): [set('st')],
             IntervalWindow(50, 60): [set('yz')]
         },
         1,
         2,
         3)
Пример #10
0
 def test_repeatedly_after_first(self):
     self.run_trigger_simple(
         FixedWindows(100),  # pyformat break
         Repeatedly(AfterFirst(AfterCount(3), AfterWatermark())),
         AccumulationMode.ACCUMULATING,
         zip(range(7), 'abcdefg'),
         {
             IntervalWindow(0, 100): [
                 set('abc'),
                 set('abcdef'),
                 set('abcdefg'),
                 set('abcdefgx'),
                 set('abcdefgxy'),
                 set('abcdefgxyz')
             ]
         },
         1,
         late_data=zip(range(3), 'xyz'))
Пример #11
0
 def test_after_count(self):
     p = Pipeline('DirectPipelineRunner')
     result = (p
               | df.Create([1, 2, 3, 4, 5, 10, 11])
               | df.FlatMap(lambda t: [('A', t), ('B', t + 5)])
               | df.Map(lambda (k, t): TimestampedValue((k, t), t))
               | df.WindowInto(
                   FixedWindows(10),
                   trigger=AfterCount(3),
                   accumulation_mode=AccumulationMode.DISCARDING)
               | df.GroupByKey()
               | df.Map(lambda (k, v): ('%s-%s' % (k, len(v)), set(v))))
     assert_that(
         result,
         equal_to({
             'A-5': {1, 2, 3, 4, 5},
             # A-10, A-11 never emitted due to AfterCount(3) never firing.
             'B-4': {6, 7, 8, 9},
             'B-3': {10, 15, 16},
         }.iteritems()))