def run_trigger(self, window_fn, trigger_fn, accumulation_mode, bundles, late_bundles, expected_panes): actual_panes = collections.defaultdict(list) driver = GeneralTriggerDriver( Windowing(window_fn, trigger_fn, accumulation_mode)) state = InMemoryUnmergedState() for bundle in bundles: for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP): window, = wvalue.windows actual_panes[window].append(set(wvalue.value)) while state.timers: for timer_window, (name, time_domain, timestamp) in ( state.get_and_clear_timers()): for wvalue in driver.process_timer( timer_window, name, time_domain, timestamp, state): window, = wvalue.windows actual_panes[window].append(set(wvalue.value)) for bundle in late_bundles: for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP): window, = wvalue.windows actual_panes[window].append(set(wvalue.value)) while state.timers: for timer_window, (name, time_domain, timestamp) in ( state.get_and_clear_timers()): for wvalue in driver.process_timer( timer_window, name, time_domain, timestamp, state): window, = wvalue.windows actual_panes[window].append(set(wvalue.value)) self.assertEqual(expected_panes, actual_panes)
def _execute( self, window_fn, trigger_fn, accumulation_mode, timestamp_combiner, transcript, unused_spec): driver = GeneralTriggerDriver( Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner), TestClock()) state = InMemoryUnmergedState() output = [] watermark = MIN_TIMESTAMP def fire_timers(): to_fire = state.get_and_clear_timers(watermark) while to_fire: for timer_window, (name, time_domain, t_timestamp) in to_fire: for wvalue in driver.process_timer( timer_window, name, time_domain, t_timestamp, state): output.append(_windowed_value_info(wvalue)) to_fire = state.get_and_clear_timers(watermark) for action, params in transcript: if action != 'expect': # Fail if we have output that was not expected in the transcript. self.assertEqual( [], output, msg='Unexpected output: %s before %s: %s' % ( output, action, params)) if action == 'input': bundle = [ WindowedValue(t, t, window_fn.assign(WindowFn.AssignContext(t, t))) for t in params] output = [ _windowed_value_info(wv) for wv in driver.process_elements(state, bundle, watermark)] fire_timers() elif action == 'watermark': watermark = params fire_timers() elif action == 'expect': for expected_output in params: for candidate in output: if all(candidate[k] == expected_output[k] for k in candidate if k in expected_output): output.remove(candidate) break else: self.fail('Unmatched output %s in %s' % (expected_output, output)) elif action == 'state': # TODO(robertwb): Implement once we support allowed lateness. pass else: self.fail('Unknown action: ' + action) # Fail if we have output that was not expected in the transcript. self.assertEqual([], output, msg='Unexpected output: %s' % output)
def run_trigger(self, window_fn, trigger_fn, accumulation_mode, bundles, late_bundles, expected_panes): actual_panes = collections.defaultdict(list) allowed_lateness = Duration( micros=int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) * 1000) driver = GeneralTriggerDriver( Windowing(window_fn, trigger_fn, accumulation_mode, allowed_lateness=allowed_lateness), TestClock()) state = InMemoryUnmergedState() for bundle in bundles: for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP, MIN_TIMESTAMP): window, = wvalue.windows self.assertEqual(window.max_timestamp(), wvalue.timestamp) actual_panes[window].append(set(wvalue.value)) while state.timers: for timer_window, (name, time_domain, timestamp) in (state.get_and_clear_timers()): for wvalue in driver.process_timer(timer_window, name, time_domain, timestamp, state, MIN_TIMESTAMP): window, = wvalue.windows self.assertEqual(window.max_timestamp(), wvalue.timestamp) actual_panes[window].append(set(wvalue.value)) for bundle in late_bundles: for wvalue in driver.process_elements(state, bundle, MAX_TIMESTAMP, MAX_TIMESTAMP): window, = wvalue.windows self.assertEqual(window.max_timestamp(), wvalue.timestamp) actual_panes[window].append(set(wvalue.value)) while state.timers: for timer_window, (name, time_domain, timestamp) in ( state.get_and_clear_timers()): for wvalue in driver.process_timer(timer_window, name, time_domain, timestamp, state, MAX_TIMESTAMP): window, = wvalue.windows self.assertEqual(window.max_timestamp(), wvalue.timestamp) actual_panes[window].append(set(wvalue.value)) self.assertEqual(expected_panes, actual_panes)
def _run_log(self, spec): def parse_int_list(s): """Parses strings like '[1, 2, 3]'.""" s = s.strip() assert s[0] == '[' and s[-1] == ']', s if not s[1:-1].strip(): return [] return [int(x) for x in s[1:-1].split(',')] def split_args(s): """Splits 'a, b, [c, d]' into ['a', 'b', '[c, d]'].""" args = [] start = 0 depth = 0 for ix in range(len(s)): c = s[ix] if c in '({[': depth += 1 elif c in ')}]': depth -= 1 elif c == ',' and depth == 0: args.append(s[start:ix].strip()) start = ix + 1 assert depth == 0, s args.append(s[start:].strip()) return args def parse(s, names): """Parse (recursive) 'Foo(arg, kw=arg)' for Foo in the names dict.""" s = s.strip() if s in names: return names[s] elif s[0] == '[': return parse_int_list(s) elif '(' in s: assert s[-1] == ')', s callee = parse(s[:s.index('(')], names) posargs = [] kwargs = {} for arg in split_args(s[s.index('(') + 1:-1]): if '=' in arg: kw, value = arg.split('=', 1) kwargs[kw] = parse(value, names) else: posargs.append(parse(arg, names)) return callee(*posargs, **kwargs) else: try: return int(s) except ValueError: raise ValueError('Unknown function: %s' % s) def parse_fn(s, names): """Like parse(), but implicitly calls no-arg constructors.""" fn = parse(s, names) if isinstance(fn, type): return fn() return fn # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.transforms import window as window_module # pylint: enable=wrong-import-order, wrong-import-position window_fn_names = dict(window_module.__dict__) window_fn_names.update({ 'CustomTimestampingFixedWindowsWindowFn': CustomTimestampingFixedWindowsWindowFn }) trigger_names = {'Default': DefaultTrigger} trigger_names.update(trigger.__dict__) window_fn = parse_fn(spec.get('window_fn', 'GlobalWindows'), window_fn_names) trigger_fn = parse_fn(spec.get('trigger_fn', 'Default'), trigger_names) accumulation_mode = getattr( AccumulationMode, spec.get('accumulation_mode', 'ACCUMULATING').upper()) timestamp_combiner = getattr( TimestampCombiner, spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper()) driver = GeneralTriggerDriver( Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner), TestClock()) state = InMemoryUnmergedState() output = [] watermark = MIN_TIMESTAMP def fire_timers(): to_fire = state.get_and_clear_timers(watermark) while to_fire: for timer_window, (name, time_domain, t_timestamp) in to_fire: for wvalue in driver.process_timer(timer_window, name, time_domain, t_timestamp, state): window, = wvalue.windows output.append({ 'window': [window.start, window.end - 1], 'values': sorted(wvalue.value), 'timestamp': wvalue.timestamp }) to_fire = state.get_and_clear_timers(watermark) for line in spec['transcript']: action, params = line.items()[0] if action != 'expect': # Fail if we have output that was not expected in the transcript. self.assertEquals([], output, msg='Unexpected output: %s before %s' % (output, line)) if action == 'input': bundle = [ WindowedValue( t, t, window_fn.assign(WindowFn.AssignContext(t, t))) for t in params ] output = [{ 'window': [wvalue.windows[0].start, wvalue.windows[0].end - 1], 'values': sorted(wvalue.value), 'timestamp': wvalue.timestamp } for wvalue in driver.process_elements( state, bundle, watermark)] fire_timers() elif action == 'watermark': watermark = params fire_timers() elif action == 'expect': for expected_output in params: for candidate in output: if all(candidate[k] == expected_output[k] for k in candidate if k in expected_output): output.remove(candidate) break else: self.fail('Unmatched output %s in %s' % (expected_output, output)) elif action == 'state': # TODO(robertwb): Implement once we support allowed lateness. pass else: self.fail('Unknown action: ' + action) # Fail if we have output that was not expected in the transcript. self.assertEquals([], output, msg='Unexpected output: %s' % output)
def _run_log(self, spec): def parse_int_list(s): """Parses strings like '[1, 2, 3]'.""" s = s.strip() assert s[0] == '[' and s[-1] == ']', s if not s[1:-1].strip(): return [] return [int(x) for x in s[1:-1].split(',')] def split_args(s): """Splits 'a, b, [c, d]' into ['a', 'b', '[c, d]'].""" args = [] start = 0 depth = 0 for ix in xrange(len(s)): c = s[ix] if c in '({[': depth += 1 elif c in ')}]': depth -= 1 elif c == ',' and depth == 0: args.append(s[start:ix].strip()) start = ix + 1 assert depth == 0, s args.append(s[start:].strip()) return args def parse(s, names): """Parse (recursive) 'Foo(arg, kw=arg)' for Foo in the names dict.""" s = s.strip() if s in names: return names[s] elif s[0] == '[': return parse_int_list(s) elif '(' in s: assert s[-1] == ')', s callee = parse(s[:s.index('(')], names) posargs = [] kwargs = {} for arg in split_args(s[s.index('(') + 1:-1]): if '=' in arg: kw, value = arg.split('=', 1) kwargs[kw] = parse(value, names) else: posargs.append(parse(arg, names)) return callee(*posargs, **kwargs) else: try: return int(s) except ValueError: raise ValueError('Unknown function: %s' % s) def parse_fn(s, names): """Like parse(), but implicitly calls no-arg constructors.""" fn = parse(s, names) if isinstance(fn, type): return fn() return fn # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.transforms import window as window_module # pylint: enable=wrong-import-order, wrong-import-position window_fn_names = dict(window_module.__dict__) window_fn_names.update({'CustomTimestampingFixedWindowsWindowFn': CustomTimestampingFixedWindowsWindowFn}) trigger_names = {'Default': DefaultTrigger} trigger_names.update(trigger.__dict__) window_fn = parse_fn(spec.get('window_fn', 'GlobalWindows'), window_fn_names) trigger_fn = parse_fn(spec.get('trigger_fn', 'Default'), trigger_names) accumulation_mode = getattr( AccumulationMode, spec.get('accumulation_mode', 'ACCUMULATING').upper()) timestamp_combiner = getattr( TimestampCombiner, spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper()) driver = GeneralTriggerDriver( Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner)) state = InMemoryUnmergedState() output = [] watermark = MIN_TIMESTAMP def fire_timers(): to_fire = state.get_and_clear_timers(watermark) while to_fire: for timer_window, (name, time_domain, t_timestamp) in to_fire: for wvalue in driver.process_timer( timer_window, name, time_domain, t_timestamp, state): window, = wvalue.windows output.append({'window': [window.start, window.end - 1], 'values': sorted(wvalue.value), 'timestamp': wvalue.timestamp}) to_fire = state.get_and_clear_timers(watermark) for line in spec['transcript']: action, params = line.items()[0] if action != 'expect': # Fail if we have output that was not expected in the transcript. self.assertEquals( [], output, msg='Unexpected output: %s before %s' % (output, line)) if action == 'input': bundle = [ WindowedValue(t, t, window_fn.assign(WindowFn.AssignContext(t, t))) for t in params] output = [{'window': [wvalue.windows[0].start, wvalue.windows[0].end - 1], 'values': sorted(wvalue.value), 'timestamp': wvalue.timestamp} for wvalue in driver.process_elements(state, bundle, watermark)] fire_timers() elif action == 'watermark': watermark = params fire_timers() elif action == 'expect': for expected_output in params: for candidate in output: if all(candidate[k] == expected_output[k] for k in candidate if k in expected_output): output.remove(candidate) break else: self.fail('Unmatched output %s in %s' % (expected_output, output)) elif action == 'state': # TODO(robertwb): Implement once we support allowed lateness. pass else: self.fail('Unknown action: ' + action) # Fail if we have output that was not expected in the transcript. self.assertEquals([], output, msg='Unexpected output: %s' % output)