def run_flink_job(): input_file = "/test1.csv" output_file ="/output_test1.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example(name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example(name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.flink_home = "/Users/chenwuchao/soft/apache/flink-1.10.0" with af.config(flink_config): batch_args_1: Properties = {} ddl = """CREATE TABLE input_table (a STRING, b STRING, c STRING) WITH ('connector' = 'filesystem', 'path' = 'INPUT', 'format' = 'csv' )""" table_name = "input_table" batch_args_1['ddl'] = ddl batch_args_1['table_name'] = table_name stream_args_1 = batch_args_1 batch_args_2: Properties = {} ddl = """CREATE TABLE output_table (aa STRING, bb STRING) WITH ('connector' = 'filesystem', 'path' = 'OUTPUT', 'format' = 'csv' )""" table_name = "output_table" batch_args_2['ddl'] = ddl batch_args_2['table_name'] = table_name stream_args_2 = batch_args_2 input_example = af.read_example(example_info=example_1, exec_args=ExecuteProperties( batch_properties=batch_args_1, stream_properties=stream_args_1) ) processed = af.transform(input_data_list=[input_example], executor=faf.FlinkJavaExecutor( java_class="com.apache.flink.ai.flow.TestTransformer")) af.write_example(input_data=processed, example_info=example_2, exec_args=ExecuteProperties( batch_properties=batch_args_2, stream_properties=stream_args_2) ) workflow = af.compile_workflow(test_util.get_project_path()) print(json_utils.dumps(list(workflow.jobs.values())[0]))
def test_run_pyflink_job(self): project_path = os.path.dirname(__file__) + '/../' af.set_project_config_file(project_path + "project.yaml") input_file = get_parent_dir( get_file_dir(__file__)) + '/resources/word_count.txt' output_file = get_file_dir(__file__) + "/word_count_output.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.local_mode = 'cluster' flink_config.flink_home = '/Users/chenwuchao/soft/apache/flink-1.11.0/' flink_config.set_table_env_create_func(TableEnvCreator()) with af.config(flink_config): input_example = af.read_example( example_info=example_1, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Source())) processed = af.transform( input_data_list=[input_example], executor=faf.flink_executor.FlinkPythonExecutor( python_object=Transformer())) af.write_example(input_data=processed, example_info=example_2, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Sink())) workflow_id = af.run(project_path) res = af.wait_workflow_execution_finished(workflow_id)
def test_run_pyflink_job(self): input_file = get_parent_dir( get_file_dir(__file__)) + '/resources/word_count.txt' output_file = get_file_dir(__file__) + "/word_count_output.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.local_mode = 'python' flink_config.set_table_env_create_func(TableEnvCreator()) with af.config(flink_config): input_example = af.read_example( example_info=example_1, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Source())) processed = af.transform( input_data_list=[input_example], executor=faf.flink_executor.FlinkPythonExecutor( python_object=Transformer())) af.write_example(input_data=processed, example_info=example_2, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Sink())) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def run_flink_predict_job(): input_file = "/test1.csv" output_file = "/output_test2.csv" example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.flink_home = '' with af.config(flink_config): batch_args_1: Properties = {} ddl = """CREATE TABLE input_table (a INT, b INT, c INT) WITH ('connector' = 'filesystem', 'path' = 'INPUT', 'format' = 'csv' )""" table_name = "input_table" batch_args_1['ddl'] = ddl batch_args_1['table_name'] = table_name stream_args_1 = batch_args_1 batch_args_2: Properties = {} ddl = """CREATE TABLE output_table (aa INT, cc INT) WITH ('connector' = 'filesystem', 'path' = 'OUTPUT', 'format' = 'csv' )""" table_name = "output_table" batch_args_2['ddl'] = ddl batch_args_2['table_name'] = table_name stream_args_2 = batch_args_2 input_example = af.read_example(example_info=example_1, exec_args=ExecuteArgs( batch_properties=batch_args_1, stream_properties=stream_args_1)) model_meta = af.ModelMeta(name="test", model_type="saved_model") model_version = af.ModelVersionMeta(version="11111", model_path="./tmp/saved_model/", model_metric="./tmp/saved_model/", model_id=0) processed = af.predict( input_data_list=[input_example], model_info=model_meta, model_version_info=model_version, executor=faf.flink_executor.FlinkJavaExecutor( java_class="com.apache.flink.ai.flow.TestPredict")) af.write_example(input_data=processed, example_info=example_2, exec_args=ExecuteArgs( batch_properties=batch_args_2, stream_properties=stream_args_2)) g = af.default_graph() workflow = af.compile_workflow(project_path=test_util.get_project_path()) print(dumps(list(workflow.jobs.values())[0]))