def test_fixed_watermark_with_early(self): self.run_trigger_simple( FixedWindows(10), # pyformat break AfterWatermark(early=AfterCount(2)), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(0, 10): [set('ab'), set('abc')]}, 2) self.run_trigger_simple( FixedWindows(10), # pyformat break AfterWatermark(early=AfterCount(2)), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(0, 10): [set('abc'), set('abc')]}, 3)
def test_sessions_repeatedly_after_count(self): self.run_trigger_simple( Sessions(10), # pyformat break Repeatedly(AfterCount(2)), AccumulationMode.ACCUMULATING, [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')], {IntervalWindow(1, 25): [set('abc'), set('abcde')]}, 1, 3) self.run_trigger_simple( Sessions(10), # pyformat break Repeatedly(AfterCount(2)), AccumulationMode.DISCARDING, [(1, 'a'), (15, 'b'), (6, 'c'), (2, 'd'), (7, 'e')], {IntervalWindow(1, 25): [set('abc'), set('de')]}, 1, 3)
def test_fixed_after_count(self): self.run_trigger_simple( FixedWindows(10), # pyformat break AfterCount(2), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c'), (11, 'z')], {IntervalWindow(0, 10): [set('ab')]}, 1, 2) self.run_trigger_simple( FixedWindows(10), # pyformat break AfterCount(2), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c'), (11, 'z')], {IntervalWindow(0, 10): [set('abc')]}, 3, 4)
def test_fixed_watermark_with_early_late(self): self.run_trigger_simple( FixedWindows(100), # pyformat break AfterWatermark(early=AfterCount(3), late=AfterCount(2)), AccumulationMode.DISCARDING, zip(range(9), 'abcdefghi'), { IntervalWindow(0, 100): [ set('abcd'), set('efgh'), # early set('i'), # on time set('vw'), set('xy') # late ] }, 2, late_data=zip(range(5), 'vwxyz'))
def test_sessions_after_all(self): self.run_trigger_simple( Sessions(10), # pyformat break AfterAll(AfterCount(2), AfterWatermark()), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(1, 13): [set('abc')]}, 1, 2) self.run_trigger_simple( Sessions(10), # pyformat break AfterAll(AfterCount(5), AfterWatermark()), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(1, 13): [set('abcxy')]}, 1, 2, late_data=[(1, 'x'), (2, 'y'), (3, 'z')])
def test_fixed_after_first(self): self.run_trigger_simple( FixedWindows(10), # pyformat break AfterFirst(AfterCount(2), AfterWatermark()), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(0, 10): [set('ab')]}, 1, 2) self.run_trigger_simple( FixedWindows(10), # pyformat break AfterFirst(AfterCount(5), AfterWatermark()), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (3, 'c')], {IntervalWindow(0, 10): [set('abc')]}, 1, 2, late_data=[(1, 'x'), (2, 'y'), (3, 'z')])
def test_sessions_watermark_with_early_late(self): self.run_trigger_simple( Sessions(10), # pyformat break AfterWatermark(early=AfterCount(2), late=AfterCount(1)), AccumulationMode.ACCUMULATING, [(1, 'a'), (15, 'b'), (7, 'c'), (30, 'd')], { IntervalWindow(1, 25): [ set('abc'), # early set('abc'), # on time set('abcxy') # late ], IntervalWindow(30, 40): [ set('d'), # on time ], IntervalWindow(1, 40): [ set('abcdxyz') # late ], }, 2, late_data=[(1, 'x'), (2, 'y'), (21, 'z')])
def test_sessions_after_each(self): self.run_trigger_simple( Sessions(10), # pyformat break AfterEach(AfterCount(2), AfterCount(3)), AccumulationMode.ACCUMULATING, zip(range(10), 'abcdefghij'), { IntervalWindow(0, 11): [set('ab')], IntervalWindow(0, 15): [set('abcdef')] }, 2) self.run_trigger_simple( Sessions(10), # pyformat break Repeatedly(AfterEach(AfterCount(2), AfterCount(3))), AccumulationMode.ACCUMULATING, zip(range(10), 'abcdefghij'), { IntervalWindow(0, 11): [set('ab')], IntervalWindow(0, 15): [set('abcdef')], IntervalWindow(0, 17): [set('abcdefgh')] }, 2)
def test_sessions_after_count(self): self.run_trigger_simple( Sessions(10), # pyformat break AfterCount(2), AccumulationMode.ACCUMULATING, [(1, 'a'), (15, 'b'), (6, 'c'), (30, 's'), (31, 't'), (50, 'z'), (50, 'y')], { IntervalWindow(1, 25): [set('abc')], IntervalWindow(30, 41): [set('st')], IntervalWindow(50, 60): [set('yz')] }, 1, 2, 3)
def test_repeatedly_after_first(self): self.run_trigger_simple( FixedWindows(100), # pyformat break Repeatedly(AfterFirst(AfterCount(3), AfterWatermark())), AccumulationMode.ACCUMULATING, zip(range(7), 'abcdefg'), { IntervalWindow(0, 100): [ set('abc'), set('abcdef'), set('abcdefg'), set('abcdefgx'), set('abcdefgxy'), set('abcdefgxyz') ] }, 1, late_data=zip(range(3), 'xyz'))
def test_after_count(self): p = Pipeline('DirectPipelineRunner') result = (p | df.Create([1, 2, 3, 4, 5, 10, 11]) | df.FlatMap(lambda t: [('A', t), ('B', t + 5)]) | df.Map(lambda (k, t): TimestampedValue((k, t), t)) | df.WindowInto( FixedWindows(10), trigger=AfterCount(3), accumulation_mode=AccumulationMode.DISCARDING) | df.GroupByKey() | df.Map(lambda (k, v): ('%s-%s' % (k, len(v)), set(v)))) assert_that( result, equal_to({ 'A-5': {1, 2, 3, 4, 5}, # A-10, A-11 never emitted due to AfterCount(3) never firing. 'B-4': {6, 7, 8, 9}, 'B-3': {10, 15, 16}, }.iteritems()))