Пример #1
0
  def run_trigger(self, window_fn, trigger_fn, accumulation_mode,
                  bundles, late_bundles,
                  expected_panes):
    actual_panes = collections.defaultdict(list)
    driver = GeneralTriggerDriver(
        Windowing(window_fn, trigger_fn, accumulation_mode))
    state = InMemoryUnmergedState()

    for bundle in bundles:
      for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP):
        window, = wvalue.windows
        actual_panes[window].append(set(wvalue.value))

    while state.timers:
      for timer_window, (name, time_domain, timestamp) in (
          state.get_and_clear_timers()):
        for wvalue in driver.process_timer(
            timer_window, name, time_domain, timestamp, state):
          window, = wvalue.windows
          actual_panes[window].append(set(wvalue.value))

    for bundle in late_bundles:
      for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP):
        window, = wvalue.windows
        actual_panes[window].append(set(wvalue.value))

      while state.timers:
        for timer_window, (name, time_domain, timestamp) in (
            state.get_and_clear_timers()):
          for wvalue in driver.process_timer(
              timer_window, name, time_domain, timestamp, state):
            window, = wvalue.windows
            actual_panes[window].append(set(wvalue.value))

    self.assertEqual(expected_panes, actual_panes)
Пример #2
0
  def _execute(
      self, window_fn, trigger_fn, accumulation_mode, timestamp_combiner,
      transcript, unused_spec):

    driver = GeneralTriggerDriver(
        Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner),
        TestClock())
    state = InMemoryUnmergedState()
    output = []
    watermark = MIN_TIMESTAMP

    def fire_timers():
      to_fire = state.get_and_clear_timers(watermark)
      while to_fire:
        for timer_window, (name, time_domain, t_timestamp) in to_fire:
          for wvalue in driver.process_timer(
              timer_window, name, time_domain, t_timestamp, state):
            output.append(_windowed_value_info(wvalue))
        to_fire = state.get_and_clear_timers(watermark)

    for action, params in transcript:

      if action != 'expect':
        # Fail if we have output that was not expected in the transcript.
        self.assertEqual(
            [], output, msg='Unexpected output: %s before %s: %s' % (
                output, action, params))

      if action == 'input':
        bundle = [
            WindowedValue(t, t, window_fn.assign(WindowFn.AssignContext(t, t)))
            for t in params]
        output = [
            _windowed_value_info(wv)
            for wv in driver.process_elements(state, bundle, watermark)]
        fire_timers()

      elif action == 'watermark':
        watermark = params
        fire_timers()

      elif action == 'expect':
        for expected_output in params:
          for candidate in output:
            if all(candidate[k] == expected_output[k]
                   for k in candidate if k in expected_output):
              output.remove(candidate)
              break
          else:
            self.fail('Unmatched output %s in %s' % (expected_output, output))

      elif action == 'state':
        # TODO(robertwb): Implement once we support allowed lateness.
        pass

      else:
        self.fail('Unknown action: ' + action)

    # Fail if we have output that was not expected in the transcript.
    self.assertEqual([], output, msg='Unexpected output: %s' % output)
Пример #3
0
  def run_trigger(self, window_fn, trigger_fn, accumulation_mode,
                  bundles, late_bundles,
                  expected_panes):
    actual_panes = collections.defaultdict(list)
    driver = GeneralTriggerDriver(
        Windowing(window_fn, trigger_fn, accumulation_mode))
    state = InMemoryUnmergedState()

    for bundle in bundles:
      for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP):
        window, = wvalue.windows
        actual_panes[window].append(set(wvalue.value))

    while state.timers:
      for timer_window, (name, time_domain, timestamp) in (
          state.get_and_clear_timers()):
        for wvalue in driver.process_timer(
            timer_window, name, time_domain, timestamp, state):
          window, = wvalue.windows
          actual_panes[window].append(set(wvalue.value))

    for bundle in late_bundles:
      for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP):
        window, = wvalue.windows
        actual_panes[window].append(set(wvalue.value))

      while state.timers:
        for timer_window, (name, time_domain, timestamp) in (
            state.get_and_clear_timers()):
          for wvalue in driver.process_timer(
              timer_window, name, time_domain, timestamp, state):
            window, = wvalue.windows
            actual_panes[window].append(set(wvalue.value))

    self.assertEqual(expected_panes, actual_panes)
Пример #4
0
    def run_trigger(self, window_fn, trigger_fn, accumulation_mode, bundles,
                    late_bundles, expected_panes):
        actual_panes = collections.defaultdict(list)
        allowed_lateness = Duration(
            micros=int(common_urns.constants.MAX_TIMESTAMP_MILLIS.constant) *
            1000)
        driver = GeneralTriggerDriver(
            Windowing(window_fn,
                      trigger_fn,
                      accumulation_mode,
                      allowed_lateness=allowed_lateness), TestClock())
        state = InMemoryUnmergedState()

        for bundle in bundles:
            for wvalue in driver.process_elements(state, bundle, MIN_TIMESTAMP,
                                                  MIN_TIMESTAMP):
                window, = wvalue.windows
                self.assertEqual(window.max_timestamp(), wvalue.timestamp)
                actual_panes[window].append(set(wvalue.value))

        while state.timers:
            for timer_window, (name, time_domain,
                               timestamp) in (state.get_and_clear_timers()):
                for wvalue in driver.process_timer(timer_window, name,
                                                   time_domain, timestamp,
                                                   state, MIN_TIMESTAMP):
                    window, = wvalue.windows
                    self.assertEqual(window.max_timestamp(), wvalue.timestamp)
                    actual_panes[window].append(set(wvalue.value))

        for bundle in late_bundles:
            for wvalue in driver.process_elements(state, bundle, MAX_TIMESTAMP,
                                                  MAX_TIMESTAMP):
                window, = wvalue.windows
                self.assertEqual(window.max_timestamp(), wvalue.timestamp)
                actual_panes[window].append(set(wvalue.value))

            while state.timers:
                for timer_window, (name, time_domain, timestamp) in (
                        state.get_and_clear_timers()):
                    for wvalue in driver.process_timer(timer_window, name,
                                                       time_domain, timestamp,
                                                       state, MAX_TIMESTAMP):
                        window, = wvalue.windows
                        self.assertEqual(window.max_timestamp(),
                                         wvalue.timestamp)
                        actual_panes[window].append(set(wvalue.value))

        self.assertEqual(expected_panes, actual_panes)
Пример #5
0
    def _run_log(self, spec):
        def parse_int_list(s):
            """Parses strings like '[1, 2, 3]'."""
            s = s.strip()
            assert s[0] == '[' and s[-1] == ']', s
            if not s[1:-1].strip():
                return []
            return [int(x) for x in s[1:-1].split(',')]

        def split_args(s):
            """Splits 'a, b, [c, d]' into ['a', 'b', '[c, d]']."""
            args = []
            start = 0
            depth = 0
            for ix in range(len(s)):
                c = s[ix]
                if c in '({[':
                    depth += 1
                elif c in ')}]':
                    depth -= 1
                elif c == ',' and depth == 0:
                    args.append(s[start:ix].strip())
                    start = ix + 1
            assert depth == 0, s
            args.append(s[start:].strip())
            return args

        def parse(s, names):
            """Parse (recursive) 'Foo(arg, kw=arg)' for Foo in the names dict."""
            s = s.strip()
            if s in names:
                return names[s]
            elif s[0] == '[':
                return parse_int_list(s)
            elif '(' in s:
                assert s[-1] == ')', s
                callee = parse(s[:s.index('(')], names)
                posargs = []
                kwargs = {}
                for arg in split_args(s[s.index('(') + 1:-1]):
                    if '=' in arg:
                        kw, value = arg.split('=', 1)
                        kwargs[kw] = parse(value, names)
                    else:
                        posargs.append(parse(arg, names))
                return callee(*posargs, **kwargs)
            else:
                try:
                    return int(s)
                except ValueError:
                    raise ValueError('Unknown function: %s' % s)

        def parse_fn(s, names):
            """Like parse(), but implicitly calls no-arg constructors."""
            fn = parse(s, names)
            if isinstance(fn, type):
                return fn()
            return fn

        # pylint: disable=wrong-import-order, wrong-import-position
        from apache_beam.transforms import window as window_module
        # pylint: enable=wrong-import-order, wrong-import-position
        window_fn_names = dict(window_module.__dict__)
        window_fn_names.update({
            'CustomTimestampingFixedWindowsWindowFn':
            CustomTimestampingFixedWindowsWindowFn
        })
        trigger_names = {'Default': DefaultTrigger}
        trigger_names.update(trigger.__dict__)

        window_fn = parse_fn(spec.get('window_fn', 'GlobalWindows'),
                             window_fn_names)
        trigger_fn = parse_fn(spec.get('trigger_fn', 'Default'), trigger_names)
        accumulation_mode = getattr(
            AccumulationMode,
            spec.get('accumulation_mode', 'ACCUMULATING').upper())
        timestamp_combiner = getattr(
            TimestampCombiner,
            spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper())

        driver = GeneralTriggerDriver(
            Windowing(window_fn, trigger_fn, accumulation_mode,
                      timestamp_combiner), TestClock())
        state = InMemoryUnmergedState()
        output = []
        watermark = MIN_TIMESTAMP

        def fire_timers():
            to_fire = state.get_and_clear_timers(watermark)
            while to_fire:
                for timer_window, (name, time_domain, t_timestamp) in to_fire:
                    for wvalue in driver.process_timer(timer_window, name,
                                                       time_domain,
                                                       t_timestamp, state):
                        window, = wvalue.windows
                        output.append({
                            'window': [window.start, window.end - 1],
                            'values': sorted(wvalue.value),
                            'timestamp': wvalue.timestamp
                        })
                to_fire = state.get_and_clear_timers(watermark)

        for line in spec['transcript']:

            action, params = line.items()[0]

            if action != 'expect':
                # Fail if we have output that was not expected in the transcript.
                self.assertEquals([],
                                  output,
                                  msg='Unexpected output: %s before %s' %
                                  (output, line))

            if action == 'input':
                bundle = [
                    WindowedValue(
                        t, t, window_fn.assign(WindowFn.AssignContext(t, t)))
                    for t in params
                ]
                output = [{
                    'window':
                    [wvalue.windows[0].start, wvalue.windows[0].end - 1],
                    'values':
                    sorted(wvalue.value),
                    'timestamp':
                    wvalue.timestamp
                } for wvalue in driver.process_elements(
                    state, bundle, watermark)]
                fire_timers()

            elif action == 'watermark':
                watermark = params
                fire_timers()

            elif action == 'expect':
                for expected_output in params:
                    for candidate in output:
                        if all(candidate[k] == expected_output[k]
                               for k in candidate if k in expected_output):
                            output.remove(candidate)
                            break
                    else:
                        self.fail('Unmatched output %s in %s' %
                                  (expected_output, output))

            elif action == 'state':
                # TODO(robertwb): Implement once we support allowed lateness.
                pass

            else:
                self.fail('Unknown action: ' + action)

        # Fail if we have output that was not expected in the transcript.
        self.assertEquals([], output, msg='Unexpected output: %s' % output)
Пример #6
0
  def _run_log(self, spec):

    def parse_int_list(s):
      """Parses strings like '[1, 2, 3]'."""
      s = s.strip()
      assert s[0] == '[' and s[-1] == ']', s
      if not s[1:-1].strip():
        return []
      return [int(x) for x in s[1:-1].split(',')]

    def split_args(s):
      """Splits 'a, b, [c, d]' into ['a', 'b', '[c, d]']."""
      args = []
      start = 0
      depth = 0
      for ix in xrange(len(s)):
        c = s[ix]
        if c in '({[':
          depth += 1
        elif c in ')}]':
          depth -= 1
        elif c == ',' and depth == 0:
          args.append(s[start:ix].strip())
          start = ix + 1
      assert depth == 0, s
      args.append(s[start:].strip())
      return args

    def parse(s, names):
      """Parse (recursive) 'Foo(arg, kw=arg)' for Foo in the names dict."""
      s = s.strip()
      if s in names:
        return names[s]
      elif s[0] == '[':
        return parse_int_list(s)
      elif '(' in s:
        assert s[-1] == ')', s
        callee = parse(s[:s.index('(')], names)
        posargs = []
        kwargs = {}
        for arg in split_args(s[s.index('(') + 1:-1]):
          if '=' in arg:
            kw, value = arg.split('=', 1)
            kwargs[kw] = parse(value, names)
          else:
            posargs.append(parse(arg, names))
        return callee(*posargs, **kwargs)
      else:
        try:
          return int(s)
        except ValueError:
          raise ValueError('Unknown function: %s' % s)

    def parse_fn(s, names):
      """Like parse(), but implicitly calls no-arg constructors."""
      fn = parse(s, names)
      if isinstance(fn, type):
        return fn()
      return fn

    # pylint: disable=wrong-import-order, wrong-import-position
    from apache_beam.transforms import window as window_module
    # pylint: enable=wrong-import-order, wrong-import-position
    window_fn_names = dict(window_module.__dict__)
    window_fn_names.update({'CustomTimestampingFixedWindowsWindowFn':
                            CustomTimestampingFixedWindowsWindowFn})
    trigger_names = {'Default': DefaultTrigger}
    trigger_names.update(trigger.__dict__)

    window_fn = parse_fn(spec.get('window_fn', 'GlobalWindows'),
                         window_fn_names)
    trigger_fn = parse_fn(spec.get('trigger_fn', 'Default'), trigger_names)
    accumulation_mode = getattr(
        AccumulationMode, spec.get('accumulation_mode', 'ACCUMULATING').upper())
    timestamp_combiner = getattr(
        TimestampCombiner,
        spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper())

    driver = GeneralTriggerDriver(
        Windowing(window_fn, trigger_fn, accumulation_mode, timestamp_combiner))
    state = InMemoryUnmergedState()
    output = []
    watermark = MIN_TIMESTAMP

    def fire_timers():
      to_fire = state.get_and_clear_timers(watermark)
      while to_fire:
        for timer_window, (name, time_domain, t_timestamp) in to_fire:
          for wvalue in driver.process_timer(
              timer_window, name, time_domain, t_timestamp, state):
            window, = wvalue.windows
            output.append({'window': [window.start, window.end - 1],
                           'values': sorted(wvalue.value),
                           'timestamp': wvalue.timestamp})
        to_fire = state.get_and_clear_timers(watermark)

    for line in spec['transcript']:

      action, params = line.items()[0]

      if action != 'expect':
        # Fail if we have output that was not expected in the transcript.
        self.assertEquals(
            [], output, msg='Unexpected output: %s before %s' % (output, line))

      if action == 'input':
        bundle = [
            WindowedValue(t, t, window_fn.assign(WindowFn.AssignContext(t, t)))
            for t in params]
        output = [{'window': [wvalue.windows[0].start,
                              wvalue.windows[0].end - 1],
                   'values': sorted(wvalue.value),
                   'timestamp': wvalue.timestamp}
                  for wvalue
                  in driver.process_elements(state, bundle, watermark)]
        fire_timers()

      elif action == 'watermark':
        watermark = params
        fire_timers()

      elif action == 'expect':
        for expected_output in params:
          for candidate in output:
            if all(candidate[k] == expected_output[k]
                   for k in candidate if k in expected_output):
              output.remove(candidate)
              break
          else:
            self.fail('Unmatched output %s in %s' % (expected_output, output))

      elif action == 'state':
        # TODO(robertwb): Implement once we support allowed lateness.
        pass

      else:
        self.fail('Unknown action: ' + action)

    # Fail if we have output that was not expected in the transcript.
    self.assertEquals([], output, msg='Unexpected output: %s' % output)