def setUp(self):
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.test_sink = DataStreamTestSinkFunction()
示例#2
0
 def setUp(self) -> None:
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.test_sink = DataStreamTestSinkFunction()
     _load_specific_flink_module_jars(
         '/flink-connectors/flink-connector-files')
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.table import *

from ml_udf import label_encode

settings = EnvironmentSettings.new_instance().use_blink_planner().build()
exec_env = StreamExecutionEnvironment.get_execution_environment()
t_env = StreamTableEnvironment.create(exec_env, environment_settings=settings)

t_env.create_temporary_function("label_encode", label_encode)

CREATE_USER_TABLE_DDL = """
CREATE TABLE users (
    user_id STRING,
    source STRING,
    sex_name STRING,
    age_name STRING,
    city_name STRING,
    pic_vip_type STRING,
    lt30 STRING,
    last_pic_app_active_device_type STRING,
    last_pic_app_active_device_model STRING,
    country_name STRING,
    province_name STRING,
    is_encodephone STRING,
    is_wechat STRING
) WITH (
    'connector' = 'filesystem',
    'format' = 'csv',
    'path' = 'users.csv'
)
 def create_new_env():
     env = StreamExecutionEnvironment.get_execution_environment()
     env.set_parallelism(2)
     return env
示例#5
0
 def setUp(self) -> None:
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self._cxt_clz_loader = get_gateway().jvm.Thread.currentThread(
     ).getContextClassLoader()
     _load_specific_flink_module_jars(
         '/flink-connectors/flink-connector-jdbc')
示例#6
0
 def setUp(self) -> None:
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.test_sink = DataStreamTestSinkFunction()
示例#7
0
 def setUp(self):
     super(PyFlinkStreamTableTestCase, self).setUp()
     self.env = StreamExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = StreamTableEnvironment.create(self.env)
示例#8
0
def session_time_window_streaming():
    s_env = StreamExecutionEnvironment.get_execution_environment()
    s_env.set_parallelism(1)
    s_env.set_stream_time_characteristic(TimeCharacteristic.EventTime)
    st_env = StreamTableEnvironment.create(s_env)
    result_file = "/tmp/session_time_window_streaming.csv"
    if os.path.exists(result_file):
        os.remove(result_file)
    st_env \
        .connect(  # declare the external system to connect to
            Kafka()
            .version("0.11")
            .topic("user")
            .start_from_earliest()
            .property("zookeeper.connect", "localhost:2181")
            .property("bootstrap.servers", "localhost:9092")
        ) \
        .with_format(  # declare a format for this system
            Json()
            .fail_on_missing_field(True)
            .json_schema(
                "{"
                "  type: 'object',"
                "  properties: {"
                "    a: {"
                "      type: 'string'"
                "    },"
                "    b: {"
                "      type: 'string'"
                "    },"
                "    c: {"
                "      type: 'string'"
                "    },"
                "    time: {"
                "      type: 'string',"
                "      format: 'date-time'"
                "    }"
                "  }"
                "}"
             )
         ) \
        .with_schema(  # declare the schema of the table
             Schema()
             .field("rowtime", DataTypes.TIMESTAMP())
             .rowtime(
                Rowtime()
                .timestamps_from_field("time")
                .watermarks_periodic_bounded(60000))
             .field("a", DataTypes.STRING())
             .field("b", DataTypes.STRING())
             .field("c", DataTypes.STRING())
         ) \
        .in_append_mode() \
        .register_table_source("source")

    st_env.register_table_sink(
        "result",
        CsvTableSink(
            ["a", "b"],
            [DataTypes.STRING(), DataTypes.STRING()], result_file))

    st_env.scan("source").window(Session.with_gap("10.minutes").on("rowtime").alias("w")) \
        .group_by("w, a") \
        .select("a, max(b)").insert_into("result")

    st_env.execute("session time window streaming")
示例#9
0
 def setUp(self):
     self.env = StreamExecutionEnvironment.get_execution_environment()
示例#10
0
 def setUp(self) -> None:
     self.env = StreamExecutionEnvironment.get_execution_environment()
     getConfigurationMethod = invoke_java_object_method(
         self.env._j_stream_execution_environment, "getConfiguration")
     getConfigurationMethod.setString("akka.ask.timeout", "20 s")
     self.test_sink = DataStreamTestSinkFunction()