def test_es_sink_dynamic(self): ds = self.env.from_collection([{ 'name': 'ada', 'id': '1' }, { 'name': 'luna', 'id': '2' }], type_info=Types.MAP( Types.STRING(), Types.STRING())) es_dynamic_index_sink = Elasticsearch7SinkBuilder() \ .set_emitter(ElasticsearchEmitter.dynamic_index('name', 'id')) \ .set_hosts(['localhost:9200']) \ .build() j_emitter = get_field_value(es_dynamic_index_sink.get_java_function(), 'emitter') self.assertTrue( is_instance_of( j_emitter, 'org.apache.flink.connector.elasticsearch.sink.SimpleElasticsearchEmitter' )) ds.sink_to(es_dynamic_index_sink).name('es dynamic index sink')
def _create_parquet_map_row_and_data() -> Tuple[RowType, RowTypeInfo, List[Row]]: row_type = DataTypes.ROW([ DataTypes.FIELD('map', DataTypes.MAP(DataTypes.INT(), DataTypes.STRING())), ]) row_type_info = Types.ROW_NAMED(['map'], [Types.MAP(Types.INT(), Types.STRING())]) data = [Row( map={0: 'a', 1: 'b', 2: 'c'} )] return row_type, row_type_info, data
def __init__(self, name: str, key_type_info: TypeInformation, value_type_info: TypeInformation): """ Constructor of the MapStateDescriptor. :param name: The name of the state. :param key_type_info: The type information of the key. :param value_type_info: the type information of the value. """ super(MapStateDescriptor, self).__init__(name, Types.MAP(key_type_info, value_type_info))
def __init__(self, name: str, key_type_info: TypeInformation, value_type_info: TypeInformation): """ Constructor of the MapStateDescriptor. :param name: The name of the state. :param key_type_info: The type information of the key. :param value_type_info: the type information of the value. """ if not isinstance(key_type_info, PickledBytesTypeInfo): raise ValueError( "The type information of the key could only be PickledBytesTypeInfo " "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" % type(key_type_info)) if not isinstance(value_type_info, PickledBytesTypeInfo): raise ValueError( "The type information of the value could only be PickledBytesTypeInfo " "(created via Types.PICKLED_BYTE_ARRAY()) currently, got %s" % type(value_type_info)) super(MapStateDescriptor, self).__init__(name, Types.MAP(key_type_info, value_type_info))
def test_from_java_type(self): basic_int_type_info = Types.INT() self.assertEqual(basic_int_type_info, _from_java_type(basic_int_type_info.get_java_type_info())) basic_short_type_info = Types.SHORT() self.assertEqual(basic_short_type_info, _from_java_type(basic_short_type_info.get_java_type_info())) basic_long_type_info = Types.LONG() self.assertEqual(basic_long_type_info, _from_java_type(basic_long_type_info.get_java_type_info())) basic_float_type_info = Types.FLOAT() self.assertEqual(basic_float_type_info, _from_java_type(basic_float_type_info.get_java_type_info())) basic_double_type_info = Types.DOUBLE() self.assertEqual(basic_double_type_info, _from_java_type(basic_double_type_info.get_java_type_info())) basic_char_type_info = Types.CHAR() self.assertEqual(basic_char_type_info, _from_java_type(basic_char_type_info.get_java_type_info())) basic_byte_type_info = Types.BYTE() self.assertEqual(basic_byte_type_info, _from_java_type(basic_byte_type_info.get_java_type_info())) basic_big_int_type_info = Types.BIG_INT() self.assertEqual(basic_big_int_type_info, _from_java_type(basic_big_int_type_info.get_java_type_info())) basic_big_dec_type_info = Types.BIG_DEC() self.assertEqual(basic_big_dec_type_info, _from_java_type(basic_big_dec_type_info.get_java_type_info())) basic_sql_date_type_info = Types.SQL_DATE() self.assertEqual(basic_sql_date_type_info, _from_java_type(basic_sql_date_type_info.get_java_type_info())) basic_sql_time_type_info = Types.SQL_TIME() self.assertEqual(basic_sql_time_type_info, _from_java_type(basic_sql_time_type_info.get_java_type_info())) basic_sql_timestamp_type_info = Types.SQL_TIMESTAMP() self.assertEqual(basic_sql_timestamp_type_info, _from_java_type(basic_sql_timestamp_type_info.get_java_type_info())) row_type_info = Types.ROW([Types.INT(), Types.STRING()]) self.assertEqual(row_type_info, _from_java_type(row_type_info.get_java_type_info())) tuple_type_info = Types.TUPLE([Types.CHAR(), Types.INT()]) self.assertEqual(tuple_type_info, _from_java_type(tuple_type_info.get_java_type_info())) primitive_int_array_type_info = Types.PRIMITIVE_ARRAY(Types.INT()) self.assertEqual(primitive_int_array_type_info, _from_java_type(primitive_int_array_type_info.get_java_type_info())) object_array_type_info = Types.OBJECT_ARRAY(Types.SQL_DATE()) self.assertEqual(object_array_type_info, _from_java_type(object_array_type_info.get_java_type_info())) pickled_byte_array_type_info = Types.PICKLED_BYTE_ARRAY() self.assertEqual(pickled_byte_array_type_info, _from_java_type(pickled_byte_array_type_info.get_java_type_info())) sql_date_type_info = Types.SQL_DATE() self.assertEqual(sql_date_type_info, _from_java_type(sql_date_type_info.get_java_type_info())) map_type_info = Types.MAP(Types.INT(), Types.STRING()) self.assertEqual(map_type_info, _from_java_type(map_type_info.get_java_type_info())) list_type_info = Types.LIST(Types.INT()) self.assertEqual(list_type_info, _from_java_type(list_type_info.get_java_type_info()))
def test_es_sink(self): ds = self.env.from_collection([{ 'name': 'ada', 'id': '1' }, { 'name': 'luna', 'id': '2' }], type_info=Types.MAP( Types.STRING(), Types.STRING())) es_sink = Elasticsearch7SinkBuilder() \ .set_emitter(ElasticsearchEmitter.static_index('foo', 'id')) \ .set_hosts(['localhost:9200']) \ .set_delivery_guarantee(DeliveryGuarantee.AT_LEAST_ONCE) \ .set_bulk_flush_max_actions(1) \ .set_bulk_flush_max_size_mb(2) \ .set_bulk_flush_interval(1000) \ .set_bulk_flush_backoff_strategy(FlushBackoffType.CONSTANT, 3, 3000) \ .set_connection_username('foo') \ .set_connection_password('bar') \ .set_connection_path_prefix('foo-bar') \ .set_connection_request_timeout(30000) \ .set_connection_timeout(31000) \ .set_socket_timeout(32000) \ .build() j_emitter = get_field_value(es_sink.get_java_function(), 'emitter') self.assertTrue( is_instance_of( j_emitter, 'org.apache.flink.connector.elasticsearch.sink.SimpleElasticsearchEmitter' )) self.assertEqual( get_field_value(es_sink.get_java_function(), 'hosts')[0].toString(), 'http://localhost:9200') self.assertEqual( get_field_value(es_sink.get_java_function(), 'deliveryGuarantee').toString(), 'at-least-once') j_build_bulk_processor_config = get_field_value( es_sink.get_java_function(), 'buildBulkProcessorConfig') self.assertEqual( j_build_bulk_processor_config.getBulkFlushMaxActions(), 1) self.assertEqual(j_build_bulk_processor_config.getBulkFlushMaxMb(), 2) self.assertEqual(j_build_bulk_processor_config.getBulkFlushInterval(), 1000) self.assertEqual( j_build_bulk_processor_config.getFlushBackoffType().toString(), 'CONSTANT') self.assertEqual( j_build_bulk_processor_config.getBulkFlushBackoffRetries(), 3) self.assertEqual( j_build_bulk_processor_config.getBulkFlushBackOffDelay(), 3000) j_network_client_config = get_field_value(es_sink.get_java_function(), 'networkClientConfig') self.assertEqual(j_network_client_config.getUsername(), 'foo') self.assertEqual(j_network_client_config.getPassword(), 'bar') self.assertEqual(j_network_client_config.getConnectionRequestTimeout(), 30000) self.assertEqual(j_network_client_config.getConnectionTimeout(), 31000) self.assertEqual(j_network_client_config.getSocketTimeout(), 32000) self.assertEqual(j_network_client_config.getConnectionPathPrefix(), 'foo-bar') ds.sink_to(es_sink).name('es sink')