Пример #1
0
    def open(self, function_context: FunctionContext):
        self._internal_timer_service = InternalTimerServiceImpl(
            self._state_backend)
        self._window_aggregator.open(
            PerWindowStateDataViewStore(function_context, self._state_backend))

        if isinstance(self._window_assigner, PanedWindowAssigner):
            self._window_function = PanedWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator)
        elif isinstance(self._window_assigner, MergingWindowAssigner):
            self._window_function = MergingWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator, self._state_backend)
        else:
            self._window_function = GeneralWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator)
        self._trigger_context = TriggerContext(self._trigger,
                                               self._internal_timer_service,
                                               self._state_backend)
        self._trigger_context.open()
        self._window_context = WindowContext(
            self, self._trigger_context, self._state_backend,
            self._state_value_coder, self._internal_timer_service,
            self._window_assigner.is_event_time())
        self._window_function.open(self._window_context)
Пример #2
0
class GroupWindowAggFunctionBase(Generic[K, W]):
    def __init__(self, allowed_lateness: int, key_selector: RowKeySelector,
                 state_backend: RemoteKeyedStateBackend,
                 state_value_coder: Coder, window_assigner: WindowAssigner[W],
                 window_aggregator: NamespaceAggsHandleFunctionBase[W],
                 trigger: Trigger[W], rowtime_index: int, shift_timezone: str):
        self._allowed_lateness = allowed_lateness
        self._key_selector = key_selector
        self._state_backend = state_backend
        self._state_value_coder = state_value_coder
        self._window_assigner = window_assigner
        self._window_aggregator = window_aggregator
        self._rowtime_index = rowtime_index
        self._shift_timezone = shift_timezone
        self._window_function = None  # type: InternalWindowProcessFunction[K, W]
        self._internal_timer_service = None  # type: InternalTimerServiceImpl
        self._window_context = None  # type: WindowContext
        self._trigger = trigger
        self._trigger_context = None  # type: TriggerContext
        self._window_state = self._state_backend.get_value_state(
            "window_state", state_value_coder)

    def open(self, function_context: FunctionContext):
        self._internal_timer_service = InternalTimerServiceImpl(
            self._state_backend)
        self._window_aggregator.open(
            PerWindowStateDataViewStore(function_context, self._state_backend))

        if isinstance(self._window_assigner, PanedWindowAssigner):
            self._window_function = PanedWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator)
        elif isinstance(self._window_assigner, MergingWindowAssigner):
            self._window_function = MergingWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator, self._state_backend)
        else:
            self._window_function = GeneralWindowProcessFunction(
                self._allowed_lateness, self._window_assigner,
                self._window_aggregator)
        self._trigger_context = TriggerContext(self._trigger,
                                               self._internal_timer_service,
                                               self._state_backend)
        self._trigger_context.open()
        self._window_context = WindowContext(
            self, self._trigger_context, self._state_backend,
            self._state_value_coder, self._internal_timer_service,
            self._window_assigner.is_event_time())
        self._window_function.open(self._window_context)

    def process_element(self, input_row: Row):
        input_value = input_row._values
        current_key = self._key_selector.get_key(input_value)
        self._state_backend.set_current_key(current_key)
        if self._window_assigner.is_event_time():
            timestamp = input_value[self._rowtime_index]
            seconds = int(
                timestamp.replace(tzinfo=datetime.timezone.utc).timestamp())
            microseconds_of_second = timestamp.microsecond
            milliseconds = seconds * 1000 + microseconds_of_second // 1000
            timestamp = milliseconds
        else:
            timestamp = self._internal_timer_service.current_processing_time()

        timestamp = self.to_utc_timestamp_mills(timestamp)

        # the windows which the input row should be placed into
        affected_windows = self._window_function.assign_state_namespace(
            input_value, timestamp)
        for window in affected_windows:
            self._window_state.set_current_namespace(window)
            acc = self._window_state.value()  # type: List
            if acc is None:
                acc = self._window_aggregator.create_accumulators()
            self._window_aggregator.set_accumulators(window, acc)

            if input_row._is_accumulate_msg():
                self._window_aggregator.accumulate(input_row)
            else:
                self._window_aggregator.retract(input_row)
            acc = self._window_aggregator.get_accumulators()
            self._window_state.update(acc)

        # the actual window which the input row is belongs to
        actual_windows = self._window_function.assign_actual_windows(
            input_value, timestamp)
        result = []
        for window in actual_windows:
            self._trigger_context.window = window
            trigger_result = self._trigger_context.on_element(
                input_row, timestamp)
            if trigger_result:
                result.append(self._emit_window_result(current_key, window))
            self._register_cleanup_timer(window)
        return result

    def process_watermark(self, watermark: int):
        self._internal_timer_service.advance_watermark(watermark)

    def on_event_time(self, timer: InternalTimer):
        result = []
        timestamp = timer.get_timestamp()
        key = timer.get_key()
        self._state_backend.set_current_key(key)
        window = timer.get_namespace()
        self._trigger_context.window = window
        if self._trigger_context.on_event_time(timestamp):
            # fire
            result.append(self._emit_window_result(key, window))

        if self._window_assigner.is_event_time():
            self._window_function.clean_window_if_needed(window, timestamp)
        return result

    def on_processing_time(self, timer: InternalTimer):
        result = []
        timestamp = timer.get_timestamp()
        key = timer.get_key()
        self._state_backend.set_current_key(key)
        window = timer.get_namespace()
        self._trigger_context.window = window
        if self._trigger_context.on_processing_time(timestamp):
            # fire
            result.append(self._emit_window_result(key, window))

        if not self._window_assigner.is_event_time():
            self._window_function.clean_window_if_needed(window, timestamp)
        return result

    def get_timers(self):
        yield from self._internal_timer_service.timers.keys()
        self._internal_timer_service.timers.clear()

    def to_utc_timestamp_mills(self, epoch_mills):
        if self._shift_timezone == "UTC":
            return epoch_mills
        else:
            timezone = pytz.timezone(self._shift_timezone)
            local_date_time = datetime.datetime.fromtimestamp(epoch_mills / 1000., timezone)\
                .replace(tzinfo=None)
            epoch = datetime.datetime.utcfromtimestamp(0)
            return int((local_date_time - epoch).total_seconds() * 1000.0)

    def close(self):
        self._window_aggregator.close()

    def _register_cleanup_timer(self, window: N):
        cleanup_time = self.cleanup_time(window)
        if cleanup_time == MAX_LONG_VALUE:
            return

        if self._window_assigner.is_event_time():
            self._trigger_context.register_event_time_timer(cleanup_time)
        else:
            self._trigger_context.register_processing_time_timer(cleanup_time)

    def cleanup_time(self, window: N) -> int:
        if self._window_assigner.is_event_time():
            cleanup_time = max(0,
                               window.max_timestamp() + self._allowed_lateness)
            if cleanup_time >= window.max_timestamp():
                return cleanup_time
            else:
                return MAX_LONG_VALUE
        else:
            return max(0, window.max_timestamp())

    @abstractmethod
    def _emit_window_result(self, key: List, window: W):
        pass