Exemplo n.º 1
0
def submit_workflow(workflow_name: Text = None,
                    args: Dict = None) -> WorkflowInfo:
    """
    Submit the ai flow workflow to the scheduler.
    :param workflow_name: The ai flow workflow identify.
    :param args: The arguments of the submit action.
    :return: The result of the submit action.
    """
    call_path = os.path.abspath(sys._getframe(1).f_code.co_filename)
    project_path = os.path.abspath(project_description().project_path)
    # length /python_codes/ is 14; length .py is 3
    entry_module_path = call_path[len(project_path) + 14:-3].replace('/', '.')
    namespace = project_config().get_project_name()
    translator = get_default_translator()
    workflow = translator.translate(graph=default_graph(),
                                    project_desc=project_description())
    for job in workflow.jobs.values():
        _register_job_meta(workflow_id=workflow.workflow_id, job=job)
    _set_entry_module_path(workflow, entry_module_path)
    _upload_project_package(workflow)
    return proto_to_workflow(get_ai_flow_client().submit_workflow_to_scheduler(
        namespace=namespace,
        workflow_json=json_utils.dumps(workflow),
        workflow_name=workflow_name,
        args=args))
Exemplo n.º 2
0
    def generate_job_resource(self, job: LocalFlinkJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)

        if job.job_config.language_type == LanguageType.JAVA:
            execution_config_file = project_path_temp + '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id

            with open(execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = execution_config_file

        else:
            execution_config_file = '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id
            real_execution_config_file = project_path_temp + execution_config_file
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file
Exemplo n.º 3
0
def run_flink_job():
    input_file = "/test1.csv"
    output_file ="/output_test1.csv"
    if os.path.exists(output_file):
        os.remove(output_file)

    example_1 = af.create_example(name="example_1",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=input_file,
                                  stream_uri=input_file,
                                  data_format="csv")

    example_2 = af.create_example(name="example_2",
                                  support_type=af.ExampleSupportType.EXAMPLE_BOTH,
                                  batch_uri=output_file,
                                  stream_uri=output_file,
                                  data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = "/Users/chenwuchao/soft/apache/flink-1.10.0"
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a STRING, b STRING, c STRING) WITH ('connector' = 'filesystem',
                'path' = 'INPUT',
                'format' = 'csv'
                )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa STRING, bb STRING) WITH ('connector' = 'filesystem',
                'path' = 'OUTPUT',
                'format' = 'csv'
                )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteProperties(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1)
                                        )
        processed = af.transform(input_data_list=[input_example],
                                 executor=faf.FlinkJavaExecutor(
                                     java_class="com.apache.flink.ai.flow.TestTransformer"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteProperties(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2)
                         )

    workflow = af.compile_workflow(test_util.get_project_path())
    print(json_utils.dumps(list(workflow.jobs.values())[0]))
Exemplo n.º 4
0
    def submit_workflow(self, ex_workflow: Workflow) -> Optional[int]:
        """
        Submit Workflow.

        :param ex_workflow: The generated workflow.
        :return: Workflow id.
        """
        return self.get_client().submit_workflow(
            json_utils.dumps(ex_workflow))[1]
 def validate_engine_type(ai_sub_graph: AISubGraph):
     if len(ai_sub_graph.nodes) <= 0:
         raise Exception("AISubGraph is Empty!")
     nodes = list(ai_sub_graph.nodes.values())
     engine_name = nodes[0].properties[ENGINE_NAME]
     for node in ai_sub_graph.nodes.values():
         engine = node.properties[ENGINE_NAME]
         if engine_name != engine:
             raise Exception(
                 "AISubGraph engine name not same:{}!={} graph:{}".
                 format(engine_name, engine, dumps(ai_sub_graph)))
     ai_sub_graph.job_type = engine_name
    def generate_code(self, op_index, job: AbstractJob):
        from tempfile import NamedTemporaryFile

        with NamedTemporaryFile(
                mode='w+t',
                prefix=job.job_name,
                suffix='.json',
                dir=job.job_config.project_desc.get_absolute_temp_path(),
                delete=False) as f:
            f.write(json_utils.dumps(job))
            K8S_FLINK = """k8s_flink_{0} = "{2}"\nop_{0} = KubernetesFlinkOperator(task_id='{1}', dag=dag, job_file=k8s_flink_{0})\n"""
            return K8S_FLINK.format(op_index,
                                    job_name_to_task_id(job.job_name), f.name)
    def register_workflow_execution(self, workflow: Workflow):
        exec_name = generate_time_str()
        workflow_meta: WorkflowExecutionMeta = self.get_client(
        ).register_workflow_execution(
            name=exec_name,
            project_id=int(
                workflow.project_desc.project_config.get_project_uuid()),
            execution_state=State.INIT,
            workflow_json=dumps(workflow))
        workflow.workflow_id = workflow_meta.uuid
        workflow.execution_name = exec_name

        # set workflow execution id to job context
        for job in workflow.jobs.values():
            job.job_context.workflow_execution_id = workflow_meta.uuid
    def optimize(graph: AIGraph) -> AIGraph:
        """
        rewrite AIGraph copy the node belong to different sub graph
        :param graph: Default AIGraph
        :return:
        """
        has_circle = True
        while has_circle:
            has_circle, do_split, err_message = DefaultTranslator.compute_cluster_graph(
                graph)
            if has_circle and not do_split:
                raise Exception("{} cluster has circle".format(
                    dumps(err_message)))

        return graph
Exemplo n.º 9
0
def _upload_project_package(workflow: Workflow):
    """
    Upload the project package.

    :param workflow: The generated workflow.
    """
    project_desc = project_description()
    workflow_json_file = os.path.join(
        project_desc.get_absolute_temp_path(),
        project_desc.project_config.get_project_uuid() + "_workflow.json")
    with open(workflow_json_file, 'w') as f:
        f.write(json_utils.dumps(workflow))
    blob_manager = BlobManagerFactory.get_blob_manager(
        project_desc.project_config['blob'])
    uploaded_project_path = blob_manager.upload_blob(str(workflow.workflow_id),
                                                     project_desc.project_path)
    project_desc.project_config.set_uploaded_project_path(
        uploaded_project_path)
    for job in workflow.jobs.values():
        job.job_config.project_path = uploaded_project_path
Exemplo n.º 10
0
    def upload_project_package(self, workflow: Workflow):
        """
        Upload the project package.

        :param workflow: The generated workflow.
        """
        # todo need to add update project uri
        with open(
                self.project_desc.get_absolute_temp_path() + "/" +
                self.project_desc.project_config.get_project_uuid() +
                "_workflow.json", 'w') as f:
            f.write(json_utils.dumps(workflow))
        blob_manager = BlobManagerFactory.get_blob_manager(
            self.project_desc.project_config)
        uploaded_project_path = blob_manager.upload_blob(
            str(workflow.workflow_id), self.project_desc.project_path)
        self.project_desc.project_config[
            'uploaded_project_path'] = uploaded_project_path
        for job in workflow.jobs.values():
            job.job_config.project_path = uploaded_project_path
            job.job_config.project_local_path = self.project_desc.project_path
Exemplo n.º 11
0
 def generate_workflow_config():
     workflow_config = WorkFlowConfig()
     workflow_config.add_job_config(
         config_key="global_config_key",
         job_config=af.BaseJobConfig(
             platform="local",
             engine="python",
             properties={"common_key": "common_value"}))
     workflow_config.add_job_config(
         config_key="test_job",
         job_config=af.BaseJobConfig(
             platform=None,
             engine=None,
             properties={"job_key": "job_value"}))
     workflow_config.add_job_config(
         config_key="test_job_1",
         job_config=af.BaseJobConfig(
             platform='kubernetes',
             engine='flink',
             properties={"job_key_1": "job_value_1"}))
     with open(config_file, 'w') as f:
         f.write(json_utils.dumps(workflow_config))
    def generate_job_resource(self, job: KubernetesFlinkJob) -> None:
        """
        Generate kubernetes flink job resource.

        :param job: Kubernetes flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"

        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)
        execution_config_file = 'job_execution_config_' + str(
            uuid.uuid4()) + "_" + job.instance_id
        real_execution_config_file = project_path_temp + '/' + execution_config_file

        if job.job_config.language_type == LanguageType.JAVA:
            with open(real_execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = Path(real_execution_config_file).name

        else:
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file

        # generate python_codes.zip
        python_codes = '{}/python_codes'.format(project_path)

        if os.path.exists(python_codes):
            zip_dir = '{}/zip'.format(project_path)
            if os.path.exists(zip_dir):
                shutil.rmtree(zip_dir)
            shutil.copytree(python_codes, zip_dir + '/python_codes')
            shutil.make_archive(python_codes, 'zip', zip_dir)
Exemplo n.º 13
0
 def test_json(self):
     node = BaseNode(name="a")
     json_text = json_utils.dumps(node)
     print(json_text)
     n2: BaseNode = json_utils.loads(json_text)
     self.assertEqual(node.name, n2.name)
 def generate_code(self, op_index, job):
     K8S_CMD = """k8s_cmd_{0} = \"""{2}\"""\nop_{0} = KubernetesCMDOperator(task_id='{1}', dag=dag, job=k8s_cmd_{0})\n"""
     return K8S_CMD.format(op_index, job_name_to_task_id(job.job_name), json_utils.dumps(job))
Exemplo n.º 15
0
    def generate_job_resource(self, job: LocalFlinkJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)

        if job.job_config.language_type == LanguageType.JAVA:
            execution_config_file = project_path_temp + '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id

            with open(execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = execution_config_file

            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', execution_config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(
                    sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config[
                    'entry_module_path']
            execution_config_file = '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id
            real_execution_config_file = project_path_temp + execution_config_file
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file
            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [
                    python3_location, version.py_main_file,
                    job.job_config.project_path, execution_config_file,
                    entry_module_path
                ]
            else:
                exec_cmd = [
                    'flink', 'run', '-pym', version.py_cluster_main_file,
                    '-pyfs', job.job_config.project_path + ',' +
                    job.job_config.project_path + '/python_codes/', '-pyexec',
                    python3_location, '--project-path',
                    job.job_config.project_path, '--config-file',
                    execution_config_file, '--entry-module-path',
                    entry_module_path
                ]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))
Exemplo n.º 16
0
 def generate_handler(self, op, configs: List[MetConfig]):
     return DAGTemplate.MET_HANDLER.format(op, json_utils.dumps(configs))
def run_flink_predict_job():
    input_file = "/test1.csv"
    output_file = "/output_test2.csv"
    example_1 = af.create_example(
        name="example_1",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=input_file,
        stream_uri=input_file,
        data_format="csv")

    example_2 = af.create_example(
        name="example_2",
        support_type=af.ExampleSupportType.EXAMPLE_BOTH,
        batch_uri=output_file,
        stream_uri=output_file,
        data_format="csv")
    flink_config = faf.LocalFlinkJobConfig()
    flink_config.flink_home = ''
    with af.config(flink_config):
        batch_args_1: Properties = {}
        ddl = """CREATE TABLE input_table (a INT, b INT, c INT) WITH ('connector' = 'filesystem',
                        'path' = 'INPUT',
                        'format' = 'csv'
                        )"""
        table_name = "input_table"
        batch_args_1['ddl'] = ddl
        batch_args_1['table_name'] = table_name

        stream_args_1 = batch_args_1

        batch_args_2: Properties = {}
        ddl = """CREATE TABLE output_table (aa INT, cc INT) WITH ('connector' = 'filesystem',
                        'path' = 'OUTPUT',
                        'format' = 'csv'
                        )"""
        table_name = "output_table"
        batch_args_2['ddl'] = ddl
        batch_args_2['table_name'] = table_name
        stream_args_2 = batch_args_2

        input_example = af.read_example(example_info=example_1,
                                        exec_args=ExecuteArgs(
                                            batch_properties=batch_args_1,
                                            stream_properties=stream_args_1))
        model_meta = af.ModelMeta(name="test", model_type="saved_model")
        model_version = af.ModelVersionMeta(version="11111",
                                            model_path="./tmp/saved_model/",
                                            model_metric="./tmp/saved_model/",
                                            model_id=0)
        processed = af.predict(
            input_data_list=[input_example],
            model_info=model_meta,
            model_version_info=model_version,
            executor=faf.flink_executor.FlinkJavaExecutor(
                java_class="com.apache.flink.ai.flow.TestPredict"))

        af.write_example(input_data=processed,
                         example_info=example_2,
                         exec_args=ExecuteArgs(
                             batch_properties=batch_args_2,
                             stream_properties=stream_args_2))

    g = af.default_graph()
    workflow = af.compile_workflow(project_path=test_util.get_project_path())
    print(dumps(list(workflow.jobs.values())[0]))
Exemplo n.º 18
0
 def test_json(self):
     channel = DataEdge("a", 0)
     json_text = json_utils.dumps(channel)
     c2: DataEdge = json_utils.loads(json_text)
     self.assertEqual(channel.target_node_id, c2.target_node_id)
     self.assertEqual(channel.port, c2.port)
Exemplo n.º 19
0
def compile_graph(graph: Graph = _default_ai_graph) -> str:
    return dumps(graph)