Exemplo n.º 1
0
    def test_seq_source(self):
        seq_source = NumberSequenceSource(1, 10)

        seq_source_clz = load_java_class(
            "org.apache.flink.api.connector.source.lib.NumberSequenceSource")
        from_field = seq_source_clz.getDeclaredField("from")
        from_field.setAccessible(True)
        self.assertEqual(1, from_field.get(seq_source.get_java_function()))

        to_field = seq_source_clz.getDeclaredField("to")
        to_field.setAccessible(True)
        self.assertEqual(10, to_field.get(seq_source.get_java_function()))
def state_access_demo():
    env = StreamExecutionEnvironment.get_execution_environment()
    env.set_parallelism(1)
    env.set_runtime_mode(RuntimeExecutionMode.BATCH)

    seq_num_source = NumberSequenceSource(1, 10)

    output_path = '/opt/examples/datastream/output/state_access'
    file_sink = FileSink \
        .for_row_format(output_path, Encoder.simple_string_encoder()) \
        .with_output_file_config(OutputFileConfig.builder().with_part_prefix('pre').with_part_suffix('suf').build()) \
        .build()

    ds = env.from_source(
        source=seq_num_source,
        watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(),
        source_name='seq_num_source',
        type_info=Types.LONG())

    ds.map(lambda a: Row(a % 4, 1), output_type=Types.ROW([Types.LONG(), Types.LONG()])) \
        .key_by(lambda a: a[0]) \
        .map(MyMapFunction(), output_type=Types.ROW([Types.LONG(), Types.LONG()])) \
        .key_by(lambda a: a[0]) \
        .process(MyKeyedProcessFunction(), Types.LONG()) \
        .sink_to(file_sink)

    env.execute('11-data_stream_state_access')
def batch_seq_num_test():
    env = StreamExecutionEnvironment.get_execution_environment()
    env.set_parallelism(2)
    env.set_runtime_mode(RuntimeExecutionMode.BATCH)

    seq_num_source = NumberSequenceSource(1, 1000)

    output_path = '/opt/examples/output/batch_seq_num'
    file_sink = FileSink \
        .for_row_format(output_path, Encoder.simple_string_encoder()) \
        .with_output_file_config(OutputFileConfig.builder().with_part_prefix('pre').with_part_suffix('suf').build()) \
        .build()

    ds = env.from_source(
        source=seq_num_source,
        watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(),
        source_name='file_source',
        type_info=Types.LONG())

    ds.map(lambda a: Row(a % 4, 1), output_type=Types.ROW([Types.LONG(), Types.LONG()])) \
        .key_by(lambda a: a[0]) \
        .reduce(lambda a, b: Row(a[0], a[1] + b[1])) \
        .sink_to(file_sink)

    env.execute('9-data_stream_batch_seq_num')