def open(self, runtime_context: RuntimeContext): state_descriptor = ValueStateDescriptor("state", Types.FLOAT()) state_ttl_config = StateTtlConfig \ .new_builder(Time.seconds(1)) \ .set_update_type(StateTtlConfig.UpdateType.OnReadAndWrite) \ .disable_cleanup_in_background() \ .build() state_descriptor.enable_time_to_live(state_ttl_config) self.state = runtime_context.get_state(state_descriptor)
# define the source data_stream = env.from_collection([('hi', 1), ('hi', 2), ('hi', 3), ('hi', 4), ('hi', 5), ('hi', 8), ('hi', 9), ('hi', 15)], type_info=Types.TUPLE( [Types.STRING(), Types.INT()])) # define the watermark strategy watermark_strategy = WatermarkStrategy.for_monotonous_timestamps() \ .with_timestamp_assigner(MyTimestampAssigner()) ds = data_stream.assign_timestamps_and_watermarks(watermark_strategy) \ .key_by(lambda x: x[0], key_type=Types.STRING()) \ .window(TumblingEventTimeWindows.of(Time.milliseconds(5))) \ .process(CountWindowProcessFunction(), Types.TUPLE([Types.STRING(), Types.INT(), Types.INT(), Types.INT()])) # define the sink if output_path is not None: ds.sink_to(sink=FileSink.for_row_format( base_path=output_path, encoder=Encoder.simple_string_encoder() ).with_output_file_config(OutputFileConfig.builder().with_part_prefix( "prefix").with_part_suffix(".ext").build()).with_rolling_policy( RollingPolicy.default_rolling_policy()).build()) else: print( "Printing result to stdout. Use --output to specify output path.") ds.print()
# define the source data_stream = env.from_collection([('hi', 1), ('hi', 2), ('hi', 3), ('hi', 4), ('hi', 8), ('hi', 9), ('hi', 15)], type_info=Types.TUPLE( [Types.STRING(), Types.INT()])) # define the watermark strategy watermark_strategy = WatermarkStrategy.for_monotonous_timestamps() \ .with_timestamp_assigner(MyTimestampAssigner()) ds = data_stream.assign_timestamps_and_watermarks(watermark_strategy) \ .key_by(lambda x: x[0], key_type=Types.STRING()) \ .window(EventTimeSessionWindows.with_gap(Time.milliseconds(5))) \ .process(CountWindowProcessFunction(), Types.TUPLE([Types.STRING(), Types.INT(), Types.INT(), Types.INT()])) # define the sink if output_path is not None: ds.sink_to(sink=FileSink.for_row_format( base_path=output_path, encoder=Encoder.simple_string_encoder() ).with_output_file_config(OutputFileConfig.builder().with_part_prefix( "prefix").with_part_suffix(".ext").build()).with_rolling_policy( RollingPolicy.default_rolling_policy()).build()) else: print( "Printing result to stdout. Use --output to specify output path.") ds.print()
# define the source data_stream = env.from_collection([('hi', 1), ('hi', 2), ('hi', 3), ('hi', 4), ('hi', 5), ('hi', 8), ('hi', 9), ('hi', 15)], type_info=Types.TUPLE( [Types.STRING(), Types.INT()])) # define the watermark strategy watermark_strategy = WatermarkStrategy.for_monotonous_timestamps() \ .with_timestamp_assigner(MyTimestampAssigner()) ds = data_stream.assign_timestamps_and_watermarks(watermark_strategy) \ .key_by(lambda x: x[0], key_type=Types.STRING()) \ .window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(2))) \ .process(CountWindowProcessFunction(), Types.TUPLE([Types.STRING(), Types.INT(), Types.INT(), Types.INT()])) # define the sink if output_path is not None: ds.sink_to(sink=FileSink.for_row_format( base_path=output_path, encoder=Encoder.simple_string_encoder() ).with_output_file_config(OutputFileConfig.builder().with_part_prefix( "prefix").with_part_suffix(".ext").build()).with_rolling_policy( RollingPolicy.default_rolling_policy()).build()) else: print( "Printing result to stdout. Use --output to specify output path.") ds.print()