def input_output_table():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        statement_set = table_env.create_statement_set()
        work_num = 2
        ps_num = 1
        python_file = os.getcwd() + "/../../src/test/python/input_output.py"
        prop = {}
        func = "map_func"
        env_path = None
        prop[
            MLCONSTANTS.
            ENCODING_CLASS] = "org.flinkextended.flink.ml.operator.coding.RowCSVCoding"
        prop[
            MLCONSTANTS.
            DECODING_CLASS] = "org.flinkextended.flink.ml.operator.coding.RowCSVCoding"
        inputSb = "INT_32" + "," + "INT_64" + "," + "FLOAT_32" + "," + "FLOAT_64" + "," + "STRING"
        prop["sys:csv_encode_types"] = inputSb
        prop["sys:csv_decode_types"] = inputSb
        prop[MLCONSTANTS.PYTHON_VERSION] = "3.7"
        source_file = os.getcwd() + "/../../src/test/resources/input.csv"
        sink_file = os.getcwd() + "/../../src/test/resources/output.csv"
        table_source = CsvTableSource(source_file, ["a", "b", "c", "d", "e"], [
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING()
        ])
        table_env.register_table_source("source", table_source)
        input_tb = table_env.from_path("source")
        output_schema = TableSchema(["a", "b", "c", "d", "e"], [
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING()
        ])
        sink = CsvTableSink(["a", "b", "c", "d", "e"], [
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING()
        ],
                            sink_file,
                            write_mode=WriteMode.OVERWRITE)
        table_env.register_table_sink("table_row_sink", sink)
        tf_config = TFConfig(work_num, ps_num, prop, python_file, func,
                             env_path)
        output_table = train(stream_env, table_env, statement_set, input_tb,
                             tf_config, output_schema)

        # output_table = inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema)

        statement_set.add_insert("table_row_sink", output_table)
        job_client = statement_set.execute().get_job_client()
        if job_client is not None:
            job_client.get_job_execution_result(
                user_class_loader=None).result()
    def add_train_chief_alone_table():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        statement_set = table_env.create_statement_set()
        work_num = 2
        ps_num = 1
        python_file = os.getcwd() + "/../../src/test/python/add.py"
        func = "map_func"
        prop = {}
        prop[TFCONSTANS.TF_IS_CHIEF_ALONE] = "true"
        prop[MLCONSTANTS.PYTHON_VERSION] = "3.7"
        env_path = None
        input_tb = None
        output_schema = None

        tf_config = TFConfig(work_num, ps_num, prop, python_file, func,
                             env_path)

        train(stream_env, table_env, statement_set, input_tb, tf_config,
              output_schema)

        # inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema)

        job_client = statement_set.execute().get_job_client()
        if job_client is not None:
            job_client.get_job_execution_result(
                user_class_loader=None).result()
 def get_tf_config() -> TFConfig:
     work_num = 1
     ps_num = 1
     prop = {
         MLCONSTANTS.PYTHON_VERSION: '',
         MLCONSTANTS.ENCODING_CLASS:
         'org.flinkextended.flink.ml.operator.coding.RowCSVCoding',
         MLCONSTANTS.DECODING_CLASS:
         'org.flinkextended.flink.ml.operator.coding.RowCSVCoding',
         'sys:csv_encode_types': 'STRING'
     }
     tf_config = TFConfig(
         work_num, ps_num, prop,
         os.path.join(os.path.dirname(__file__), "add_one.py"),
         "flink_stream_train", None)
     return tf_config
示例#4
0
    def worker_zero_finish():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        work_num = 3
        ps_num = 2
        python_file = os.getcwd() + "/../../src/test/python/worker_0_finish.py"
        func = "map_func"
        prop = {MLCONSTANTS.PYTHON_VERSION: '3.7'}
        env_path = None
        input_tb = None
        output_schema = None

        tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path)
        train(stream_env, table_env, input_tb, tf_config, output_schema)
        # inference(stream_env, table_env, input_tb, tf_config, output_schema)

        table_env.execute("train")
示例#5
0
    def add_train_chief_alone_table():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        work_num = 2
        ps_num = 1
        python_file = os.getcwd() + "/../../src/test/python/add.py"
        func = "map_func"
        prop = {}
        prop[TFCONSTANS.TF_IS_CHIEF_ALONE] = "true"
        prop[MLCONSTANTS.PYTHON_VERSION] = "3.7"
        env_path = None
        input_tb = None
        output_schema = None

        tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path)

        train(stream_env, table_env, input_tb, tf_config, output_schema)
        # inference(stream_env, table_env, input_tb, tf_config, output_schema)

        table_env.execute("train")
    def worker_zero_finish():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        statement_set = table_env.create_statement_set()
        work_num = 3
        ps_num = 2
        python_file = os.getcwd() + "/../../src/test/python/worker_0_finish.py"
        func = "map_func"
        prop = {MLCONSTANTS.PYTHON_VERSION: '3.7'}
        env_path = None
        input_tb = None
        output_schema = None

        tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path)
        train(stream_env, table_env, statement_set, input_tb, tf_config, output_schema)

        # inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema)

        job_client = statement_set.execute().get_job_client()
        if job_client is not None:
            job_client.get_job_execution_result(user_class_loader=None).result()
示例#7
0
    def input_output_table():
        stream_env = StreamExecutionEnvironment.get_execution_environment()
        table_env = StreamTableEnvironment.create(stream_env)
        work_num = 2
        ps_num = 1
        python_file = os.getcwd() + "/../../src/test/python/input_output.py"
        prop = {}
        func = "map_func"
        env_path = None
        prop[MLCONSTANTS.ENCODING_CLASS] = "com.alibaba.flink.ml.operator.coding.RowCSVCoding"
        prop[MLCONSTANTS.DECODING_CLASS] = "com.alibaba.flink.ml.operator.coding.RowCSVCoding"
        inputSb = "INT_32" + "," + "INT_64" + "," + "FLOAT_32" + "," + "FLOAT_64" + "," + "STRING"
        prop["SYS:csv_encode_types"] = inputSb
        prop["SYS:csv_decode_types"] = inputSb
        prop[MLCONSTANTS.PYTHON_VERSION] = "3.7"
        source_file = os.getcwd() + "/../../src/test/resources/input.csv"
        table_source = CsvTableSource(source_file,
                                      ["a", "b", "c", "d", "e"],
                                      [DataTypes.INT(),
                                       DataTypes.INT(),
                                       DataTypes.FLOAT(),
                                       DataTypes.DOUBLE(),
                                       DataTypes.STRING()])
        table_env.register_table_source("source", table_source)
        input_tb = table_env.scan("source")
        output_schema = TableSchema(["a", "b", "c", "d", "e"],
                                    [DataTypes.INT(),
                                     DataTypes.INT(),
                                     DataTypes.FLOAT(),
                                     DataTypes.DOUBLE(),
                                     DataTypes.STRING()]
                                    )

        tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path)

        train(stream_env, table_env, input_tb, tf_config, output_schema)
        # inference(stream_env, table_env, input_tb, tf_config, output_schema)

        table_env.execute("train")