Exemplo n.º 1
0
def run_flink_job():
    input_file = "/test1.csv"
    output_file ="/output_test1.csv"
    if os.path.exists(output_file):
        os.remove(output_file)

    example_1 = af.create_example(name="example_1",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=input_file,
                                  stream_uri=input_file,
                                  data_format="csv")

    example_2 = af.create_example(name="example_2",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=output_file,
                                  stream_uri=output_file,
                                  data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = "/Users/chenwuchao/soft/apache/flink-1.10.0"
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a STRING, b STRING, c STRING) WITH ('connector' = 'filesystem',
                'path' = 'INPUT',
                'format' = 'csv'
                )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa STRING, bb STRING) WITH ('connector' = 'filesystem',
                'path' = 'OUTPUT',
                'format' = 'csv'
                )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteProperties(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1)
                                        )
        processed = af.transform(input_data_list=[input_example],
                                 executor=faf.FlinkJavaExecutor(
                                     java_class="com.apache.flink.ai.flow.TestTransformer"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteProperties(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2)
                         )

    workflow = af.compile_workflow(test_util.get_project_path())
    print(json_utils.dumps(list(workflow.jobs.values())[0]))
    def test_run_pyflink_job(self):
        project_path = os.path.dirname(__file__) + '/../'
        af.set_project_config_file(project_path + "project.yaml")
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'cluster'
        flink_config.flink_home = '/Users/chenwuchao/soft/apache/flink-1.11.0/'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(project_path)
        res = af.wait_workflow_execution_finished(workflow_id)
    def test_run_pyflink_job(self):
        input_file = get_parent_dir(
            get_file_dir(__file__)) + '/resources/word_count.txt'
        output_file = get_file_dir(__file__) + "/word_count_output.csv"
        if os.path.exists(output_file):
            os.remove(output_file)

        example_1 = af.create_example(
            name="example_1",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=input_file,
            stream_uri=input_file,
            data_format="csv")

        example_2 = af.create_example(
            name="example_2",
            support_type=af.ExampleSupportType.EXAMPLE_BOTH,
            batch_uri=output_file,
            stream_uri=output_file,
            data_format="csv")
        flink_config = faf.LocalFlinkJobConfig()
        flink_config.local_mode = 'python'
        flink_config.set_table_env_create_func(TableEnvCreator())
        with af.config(flink_config):
            input_example = af.read_example(
                example_info=example_1,
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Source()))
            processed = af.transform(
                input_data_list=[input_example],
                executor=faf.flink_executor.FlinkPythonExecutor(
                    python_object=Transformer()))

            af.write_example(input_data=processed,
                             example_info=example_2,
                             executor=faf.flink_executor.FlinkPythonExecutor(
                                 python_object=Sink()))
        workflow_id = af.run(test_util.get_project_path())
        res = af.wait_workflow_execution_finished(workflow_id)
        self.assertEqual(0, res)
def run_flink_predict_job():
    input_file = "/test1.csv"
    output_file = "/output_test2.csv"
    example_1 = af.create_example(
        name="example_1",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=input_file,
        stream_uri=input_file,
        data_format="csv")

    example_2 = af.create_example(
        name="example_2",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=output_file,
        stream_uri=output_file,
        data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = ''
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a INT, b INT, c INT) WITH ('connector' = 'filesystem',
                        'path' = 'INPUT',
                        'format' = 'csv'
                        )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa INT, cc INT) WITH ('connector' = 'filesystem',
                        'path' = 'OUTPUT',
                        'format' = 'csv'
                        )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteArgs(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1))
        model_meta = af.ModelMeta(name="test", model_type="saved_model")
        model_version = af.ModelVersionMeta(version="11111",
                                            model_path="./tmp/saved_model/",
                                            model_metric="./tmp/saved_model/",
                                            model_id=0)
        processed = af.predict(
            input_data_list=[input_example],
            model_info=model_meta,
            model_version_info=model_version,
            executor=faf.flink_executor.FlinkJavaExecutor(
                java_class="com.apache.flink.ai.flow.TestPredict"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteArgs(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2))

    g = af.default_graph()
    workflow = af.compile_workflow(project_path=test_util.get_project_path())
    print(dumps(list(workflow.jobs.values())[0]))