def __init__(
      self, sdf, args_for_invoker, kwargs_for_invoker):
    self.sdf = sdf
    self._element_tag = _ValueStateTag('element')
    self._restriction_tag = _ValueStateTag('restriction')
    self.watermark_hold_tag = _ValueStateTag('watermark_hold')
    self._process_element_invoker = None

    self.sdf_invoker = DoFnInvoker.create_invoker(
        DoFnSignature(self.sdf), context=DoFnContext('unused_context'),
        input_args=args_for_invoker, input_kwargs=kwargs_for_invoker)

    self._step_context = None
Пример #2
0
  def __init__(
      self, sdf, args_for_invoker, kwargs_for_invoker):
    self.sdf = sdf
    self._element_tag = _ValueStateTag('element')
    self._restriction_tag = _ValueStateTag('restriction')
    self.watermark_hold_tag = _ValueStateTag('watermark_hold')
    self._process_element_invoker = None

    self.sdf_invoker = DoFnInvoker.create_invoker(
        DoFnSignature(self.sdf), context=DoFnContext('unused_context'),
        input_args=args_for_invoker, input_kwargs=kwargs_for_invoker)

    self._step_context = None
Пример #3
0
class _WatermarkControllerEvaluator(_TransformEvaluator):
    """TransformEvaluator for the _WatermarkController transform.

  This is used to enable multiple output watermarks for the TestStream.
  """

    # The state tag used to store the watermark.
    WATERMARK_TAG = _ValueStateTag(
        '_WatermarkControllerEvaluator_Watermark_Tag')

    def __init__(self, evaluation_context, applied_ptransform,
                 input_committed_bundle, side_inputs):
        assert not side_inputs
        self.transform = applied_ptransform.transform
        super(_WatermarkControllerEvaluator,
              self).__init__(evaluation_context, applied_ptransform,
                             input_committed_bundle, side_inputs)
        self._init_state()

    def _init_state(self):
        """Gets and sets the initial state.

    This is used to keep track of the watermark hold between calls.
    """
        transform_states = self._evaluation_context._transform_keyed_states
        state = transform_states[self._applied_ptransform]
        if self.WATERMARK_TAG not in state:
            watermark_state = InMemoryUnmergedState()
            watermark_state.set_global_state(self.WATERMARK_TAG, MIN_TIMESTAMP)
            state[self.WATERMARK_TAG] = watermark_state
        self._state = state[self.WATERMARK_TAG]

    @property
    def _watermark(self):
        return self._state.get_global_state(self.WATERMARK_TAG)

    @_watermark.setter
    def _watermark(self, watermark):
        self._state.set_global_state(self.WATERMARK_TAG, watermark)

    def start_bundle(self):
        self.bundles = []

    def process_element(self, element):
        # In order to keep the order of the elements between the script and what
        # flows through the pipeline the same, emit the elements here.
        event = element.value
        if isinstance(event, WatermarkEvent):
            self._watermark = event.new_watermark
        elif isinstance(event, ElementEvent):
            main_output = list(self._outputs)[0]
            bundle = self._evaluation_context.create_bundle(main_output)
            for tv in event.timestamped_values:
                # Unreify the value into the correct window.
                try:
                    bundle.output(WindowedValue(**tv.value))
                except TypeError:
                    bundle.output(
                        GlobalWindows.windowed_value(tv.value,
                                                     timestamp=tv.timestamp))
            self.bundles.append(bundle)

    def finish_bundle(self):
        # The watermark hold we set here is the way we allow the TestStream events
        # to control the output watermark.
        return TransformResult(self, self.bundles, [], None,
                               {None: self._watermark})