示例#1
0
  def __init__(self, sdf, args_for_invoker, kwargs_for_invoker):
    self.sdf = sdf
    self._element_tag = _ReadModifyWriteStateTag('element')
    self._restriction_tag = _ReadModifyWriteStateTag('restriction')
    self.watermark_hold_tag = _ReadModifyWriteStateTag('watermark_hold')
    self._process_element_invoker = None
    self._output_processor = _OutputProcessor()

    self.sdf_invoker = DoFnInvoker.create_invoker(
        DoFnSignature(self.sdf),
        context=DoFnContext('unused_context'),
        output_processor=self._output_processor,
        input_args=args_for_invoker,
        input_kwargs=kwargs_for_invoker)

    self._step_context = None
    def __init__(self, step_context, dofn, key_coder):
        self.step_context = step_context
        self.dofn = dofn
        self.key_coder = key_coder

        self.all_state_specs, self.all_timer_specs = userstate.get_dofn_specs(
            dofn)
        self.state_tags = {}
        for state_spec in self.all_state_specs:
            state_key = 'user/%s' % state_spec.name
            if isinstance(state_spec, userstate.ReadModifyWriteStateSpec):
                state_tag = _ReadModifyWriteStateTag(state_key)
            elif isinstance(state_spec, userstate.BagStateSpec):
                state_tag = _ListStateTag(state_key)
            elif isinstance(state_spec, userstate.CombiningValueStateSpec):
                state_tag = _ListStateTag(state_key)
            elif isinstance(state_spec, userstate.SetStateSpec):
                state_tag = _SetStateTag(state_key)
            else:
                raise ValueError('Invalid state spec: %s' % state_spec)
            self.state_tags[state_spec] = state_tag

        self.cached_states = {}
        self.cached_timers = {}
示例#3
0
class _WatermarkControllerEvaluator(_TransformEvaluator):
    """TransformEvaluator for the _WatermarkController transform.

  This is used to enable multiple output watermarks for the TestStream.
  """

    # The state tag used to store the watermark.
    WATERMARK_TAG = _ReadModifyWriteStateTag(
        '_WatermarkControllerEvaluator_Watermark_Tag')

    def __init__(self, evaluation_context, applied_ptransform,
                 input_committed_bundle, side_inputs):
        assert not side_inputs
        self.transform = applied_ptransform.transform
        super().__init__(evaluation_context, applied_ptransform,
                         input_committed_bundle, side_inputs)
        self._init_state()

    def _init_state(self):
        """Gets and sets the initial state.

    This is used to keep track of the watermark hold between calls.
    """
        transform_states = self._evaluation_context._transform_keyed_states
        state = transform_states[self._applied_ptransform]
        if self.WATERMARK_TAG not in state:
            watermark_state = InMemoryUnmergedState()
            watermark_state.set_global_state(self.WATERMARK_TAG, MIN_TIMESTAMP)
            state[self.WATERMARK_TAG] = watermark_state
        self._state = state[self.WATERMARK_TAG]

    @property
    def _watermark(self):
        return self._state.get_global_state(self.WATERMARK_TAG)

    @_watermark.setter
    def _watermark(self, watermark):
        self._state.set_global_state(self.WATERMARK_TAG, watermark)

    def start_bundle(self):
        self.bundles = []

    def process_element(self, element):
        # In order to keep the order of the elements between the script and what
        # flows through the pipeline the same, emit the elements here.
        event = element.value
        if isinstance(event, WatermarkEvent):
            self._watermark = event.new_watermark
        elif isinstance(event, ElementEvent):
            main_output = list(self._outputs)[0]
            bundle = self._evaluation_context.create_bundle(main_output)
            for tv in event.timestamped_values:
                # Unreify the value into the correct window.
                if isinstance(tv.value, WindowedValueHolder):
                    bundle.output(tv.value.windowed_value)
                else:
                    bundle.output(
                        GlobalWindows.windowed_value(tv.value,
                                                     timestamp=tv.timestamp))
            self.bundles.append(bundle)

    def finish_bundle(self):
        # The watermark hold we set here is the way we allow the TestStream events
        # to control the output watermark.
        return TransformResult(self, self.bundles, [], None,
                               {None: self._watermark})