def test_event_time_dynamic_gap_session_window(self): self.env.set_parallelism(1) data_stream = self.env.from_collection( [('hi', 1), ('hi', 2), ('hi', 3), ('hi', 4), ('hi', 9), ('hi', 9), ('hi', 15)], type_info=Types.TUPLE([Types.STRING(), Types.INT()])) # type: DataStream watermark_strategy = WatermarkStrategy.for_monotonous_timestamps() \ .with_timestamp_assigner(SecondColumnTimestampAssigner()) data_stream.assign_timestamps_and_watermarks(watermark_strategy) \ .key_by(lambda x: x[0], key_type=Types.STRING()) \ .window(EventTimeSessionWindows.with_dynamic_gap(MySessionWindowTimeGapExtractor())) \ .process(CountWindowProcessFunction(), Types.TUPLE([Types.STRING(), Types.LONG(), Types.LONG(), Types.INT()])) \ .add_sink(self.test_sink) self.env.execute('test_event_time_dynamic_gap_session_window') results = self.test_sink.get_results() expected = ['(hi,1,8,4)', '(hi,9,30,3)'] self.assert_equals_sorted(expected, results)
env = StreamExecutionEnvironment.get_execution_environment() # write all the data to one file env.set_parallelism(1) # define the source data_stream = env.from_collection([ ('hi', 1), ('hi', 2), ('hi', 3), ('hi', 4), ('hi', 8), ('hi', 9), ('hi', 15)], type_info=Types.TUPLE([Types.STRING(), Types.INT()])) # define the watermark strategy watermark_strategy = WatermarkStrategy.for_monotonous_timestamps() \ .with_timestamp_assigner(MyTimestampAssigner()) ds = data_stream.assign_timestamps_and_watermarks(watermark_strategy) \ .key_by(lambda x: x[0], key_type=Types.STRING()) \ .window(EventTimeSessionWindows.with_dynamic_gap(MySessionWindowTimeGapExtractor())) \ .process(CountWindowProcessFunction(), Types.TUPLE([Types.STRING(), Types.INT(), Types.INT(), Types.INT()])) # define the sink if output_path is not None: ds.sink_to( sink=FileSink.for_row_format( base_path=output_path, encoder=Encoder.simple_string_encoder()) .with_output_file_config( OutputFileConfig.builder() .with_part_prefix("prefix") .with_part_suffix(".ext") .build()) .with_rolling_policy(RollingPolicy.default_rolling_policy())