Exemplo n.º 1
0
def read_from_kafka(env):
    deserialization_schema = JsonRowDeserializationSchema.Builder() \
        .type_info(Types.ROW([Types.INT(), Types.STRING()])) \
        .build()

    kafka_consumer = FlinkKafkaConsumer(
        topics='test_csv_topic',
        deserialization_schema=deserialization_schema,
        properties={'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group_1'}
    )
    kafka_consumer.set_start_from_earliest()

    env.add_source(kafka_consumer).print()
    env.execute()
    def test_add_classpaths(self):
        # find kafka connector jars
        flink_source_root = _find_flink_source_root()
        jars_abs_path = flink_source_root + '/flink-connectors/flink-sql-connector-kafka'
        specific_jars = glob.glob(jars_abs_path + '/target/flink*.jar')
        specific_jars = [
            'file://' + specific_jar for specific_jar in specific_jars
        ]

        self.env.add_classpaths(*specific_jars)
        source_topic = 'test_source_topic'
        props = {
            'bootstrap.servers': 'localhost:9092',
            'group.id': 'test_group'
        }
        type_info = Types.ROW([Types.INT(), Types.STRING()])

        # Test for kafka consumer
        deserialization_schema = JsonRowDeserializationSchema.builder() \
            .type_info(type_info=type_info).build()

        # It Will raise a ClassNotFoundException if the kafka connector is not added into the
        # pipeline classpaths.
        kafka_consumer = FlinkKafkaConsumer(source_topic,
                                            deserialization_schema, props)
        self.env.add_source(kafka_consumer).print()
        self.env.get_execution_plan()
Exemplo n.º 3
0
def read_from_kafka(env):
    deserialization_schema = AvroRowDeserializationSchema(
        avro_schema_string="""
            {
                "type": "record",
                "name": "TestRecord",
                "fields": [
                    {"name": "id", "type": "int"},
                    {"name": "name", "type": "string"}
                ]
            }""")

    kafka_consumer = FlinkKafkaConsumer(
        topics='test_avro_topic',
        deserialization_schema=deserialization_schema,
        properties={
            'bootstrap.servers': 'localhost:9092',
            'group.id': 'test_group_1'
        })
    kafka_consumer.set_start_from_earliest()

    env.add_source(kafka_consumer).print()
    env.execute()
Exemplo n.º 4
0
    def test_legacy_kafka_connector(self):
        source_topic = 'test_source_topic'
        sink_topic = 'test_sink_topic'
        props = {
            'bootstrap.servers': 'localhost:9092',
            'group.id': 'test_group'
        }
        type_info = Types.ROW([Types.INT(), Types.STRING()])

        # Test for kafka consumer
        deserialization_schema = JsonRowDeserializationSchema.builder() \
            .type_info(type_info=type_info).build()

        flink_kafka_consumer = FlinkKafkaConsumer(source_topic,
                                                  deserialization_schema,
                                                  props)
        flink_kafka_consumer.set_start_from_earliest()
        flink_kafka_consumer.set_commit_offsets_on_checkpoints(True)

        j_properties = get_field_value(
            flink_kafka_consumer.get_java_function(), 'properties')
        self.assertEqual('localhost:9092',
                         j_properties.getProperty('bootstrap.servers'))
        self.assertEqual('test_group', j_properties.getProperty('group.id'))
        self.assertTrue(
            get_field_value(flink_kafka_consumer.get_java_function(),
                            'enableCommitOnCheckpoints'))
        j_start_up_mode = get_field_value(
            flink_kafka_consumer.get_java_function(), 'startupMode')

        j_deserializer = get_field_value(
            flink_kafka_consumer.get_java_function(), 'deserializer')
        j_deserialize_type_info = invoke_java_object_method(
            j_deserializer, "getProducedType")
        deserialize_type_info = typeinfo._from_java_type(
            j_deserialize_type_info)
        self.assertTrue(deserialize_type_info == type_info)
        self.assertTrue(
            j_start_up_mode.equals(
                get_gateway().jvm.org.apache.flink.streaming.connectors.kafka.
                config.StartupMode.EARLIEST))
        j_topic_desc = get_field_value(
            flink_kafka_consumer.get_java_function(), 'topicsDescriptor')
        j_topics = invoke_java_object_method(j_topic_desc, 'getFixedTopics')
        self.assertEqual(['test_source_topic'], list(j_topics))

        # Test for kafka producer
        serialization_schema = JsonRowSerializationSchema.builder().with_type_info(type_info) \
            .build()
        flink_kafka_producer = FlinkKafkaProducer(sink_topic,
                                                  serialization_schema, props)
        flink_kafka_producer.set_write_timestamp_to_kafka(False)

        j_producer_config = get_field_value(
            flink_kafka_producer.get_java_function(), 'producerConfig')
        self.assertEqual('localhost:9092',
                         j_producer_config.getProperty('bootstrap.servers'))
        self.assertEqual('test_group',
                         j_producer_config.getProperty('group.id'))
        self.assertFalse(
            get_field_value(flink_kafka_producer.get_java_function(),
                            'writeTimestampToKafka'))