def input_output_table(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) statement_set = table_env.create_statement_set() work_num = 2 ps_num = 1 python_file = os.getcwd() + "/../../src/test/python/input_output.py" prop = {} func = "map_func" env_path = None prop[ MLCONSTANTS. ENCODING_CLASS] = "org.flinkextended.flink.ml.operator.coding.RowCSVCoding" prop[ MLCONSTANTS. DECODING_CLASS] = "org.flinkextended.flink.ml.operator.coding.RowCSVCoding" inputSb = "INT_32" + "," + "INT_64" + "," + "FLOAT_32" + "," + "FLOAT_64" + "," + "STRING" prop["sys:csv_encode_types"] = inputSb prop["sys:csv_decode_types"] = inputSb prop[MLCONSTANTS.PYTHON_VERSION] = "3.7" source_file = os.getcwd() + "/../../src/test/resources/input.csv" sink_file = os.getcwd() + "/../../src/test/resources/output.csv" table_source = CsvTableSource(source_file, ["a", "b", "c", "d", "e"], [ DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING() ]) table_env.register_table_source("source", table_source) input_tb = table_env.from_path("source") output_schema = TableSchema(["a", "b", "c", "d", "e"], [ DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING() ]) sink = CsvTableSink(["a", "b", "c", "d", "e"], [ DataTypes.INT(), DataTypes.BIGINT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING() ], sink_file, write_mode=WriteMode.OVERWRITE) table_env.register_table_sink("table_row_sink", sink) tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) output_table = train(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) # output_table = inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) statement_set.add_insert("table_row_sink", output_table) job_client = statement_set.execute().get_job_client() if job_client is not None: job_client.get_job_execution_result( user_class_loader=None).result()
def add_train_chief_alone_table(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) statement_set = table_env.create_statement_set() work_num = 2 ps_num = 1 python_file = os.getcwd() + "/../../src/test/python/add.py" func = "map_func" prop = {} prop[TFCONSTANS.TF_IS_CHIEF_ALONE] = "true" prop[MLCONSTANTS.PYTHON_VERSION] = "3.7" env_path = None input_tb = None output_schema = None tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) train(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) # inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) job_client = statement_set.execute().get_job_client() if job_client is not None: job_client.get_job_execution_result( user_class_loader=None).result()
def get_tf_config() -> TFConfig: work_num = 1 ps_num = 1 prop = { MLCONSTANTS.PYTHON_VERSION: '', MLCONSTANTS.ENCODING_CLASS: 'org.flinkextended.flink.ml.operator.coding.RowCSVCoding', MLCONSTANTS.DECODING_CLASS: 'org.flinkextended.flink.ml.operator.coding.RowCSVCoding', 'sys:csv_encode_types': 'STRING' } tf_config = TFConfig( work_num, ps_num, prop, os.path.join(os.path.dirname(__file__), "add_one.py"), "flink_stream_train", None) return tf_config
def worker_zero_finish(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) work_num = 3 ps_num = 2 python_file = os.getcwd() + "/../../src/test/python/worker_0_finish.py" func = "map_func" prop = {MLCONSTANTS.PYTHON_VERSION: '3.7'} env_path = None input_tb = None output_schema = None tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) train(stream_env, table_env, input_tb, tf_config, output_schema) # inference(stream_env, table_env, input_tb, tf_config, output_schema) table_env.execute("train")
def add_train_chief_alone_table(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) work_num = 2 ps_num = 1 python_file = os.getcwd() + "/../../src/test/python/add.py" func = "map_func" prop = {} prop[TFCONSTANS.TF_IS_CHIEF_ALONE] = "true" prop[MLCONSTANTS.PYTHON_VERSION] = "3.7" env_path = None input_tb = None output_schema = None tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) train(stream_env, table_env, input_tb, tf_config, output_schema) # inference(stream_env, table_env, input_tb, tf_config, output_schema) table_env.execute("train")
def worker_zero_finish(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) statement_set = table_env.create_statement_set() work_num = 3 ps_num = 2 python_file = os.getcwd() + "/../../src/test/python/worker_0_finish.py" func = "map_func" prop = {MLCONSTANTS.PYTHON_VERSION: '3.7'} env_path = None input_tb = None output_schema = None tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) train(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) # inference(stream_env, table_env, statement_set, input_tb, tf_config, output_schema) job_client = statement_set.execute().get_job_client() if job_client is not None: job_client.get_job_execution_result(user_class_loader=None).result()
def input_output_table(): stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = StreamTableEnvironment.create(stream_env) work_num = 2 ps_num = 1 python_file = os.getcwd() + "/../../src/test/python/input_output.py" prop = {} func = "map_func" env_path = None prop[MLCONSTANTS.ENCODING_CLASS] = "com.alibaba.flink.ml.operator.coding.RowCSVCoding" prop[MLCONSTANTS.DECODING_CLASS] = "com.alibaba.flink.ml.operator.coding.RowCSVCoding" inputSb = "INT_32" + "," + "INT_64" + "," + "FLOAT_32" + "," + "FLOAT_64" + "," + "STRING" prop["SYS:csv_encode_types"] = inputSb prop["SYS:csv_decode_types"] = inputSb prop[MLCONSTANTS.PYTHON_VERSION] = "3.7" source_file = os.getcwd() + "/../../src/test/resources/input.csv" table_source = CsvTableSource(source_file, ["a", "b", "c", "d", "e"], [DataTypes.INT(), DataTypes.INT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING()]) table_env.register_table_source("source", table_source) input_tb = table_env.scan("source") output_schema = TableSchema(["a", "b", "c", "d", "e"], [DataTypes.INT(), DataTypes.INT(), DataTypes.FLOAT(), DataTypes.DOUBLE(), DataTypes.STRING()] ) tf_config = TFConfig(work_num, ps_num, prop, python_file, func, env_path) train(stream_env, table_env, input_tb, tf_config, output_schema) # inference(stream_env, table_env, input_tb, tf_config, output_schema) table_env.execute("train")