def submit_workflow(workflow_name: Text = None, args: Dict = None) -> WorkflowInfo: """ Submit the ai flow workflow to the scheduler. :param workflow_name: The ai flow workflow identify. :param args: The arguments of the submit action. :return: The result of the submit action. """ call_path = os.path.abspath(sys._getframe(1).f_code.co_filename) project_path = os.path.abspath(project_description().project_path) # length /python_codes/ is 14; length .py is 3 entry_module_path = call_path[len(project_path) + 14:-3].replace('/', '.') namespace = project_config().get_project_name() translator = get_default_translator() workflow = translator.translate(graph=default_graph(), project_desc=project_description()) for job in workflow.jobs.values(): _register_job_meta(workflow_id=workflow.workflow_id, job=job) _set_entry_module_path(workflow, entry_module_path) _upload_project_package(workflow) return proto_to_workflow(get_ai_flow_client().submit_workflow_to_scheduler( namespace=namespace, workflow_json=json_utils.dumps(workflow), workflow_name=workflow_name, args=args))
def generate_job_resource(self, job: LocalFlinkJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) if job.job_config.language_type == LanguageType.JAVA: execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id with open(execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = execution_config_file else: execution_config_file = '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + execution_config_file with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file
def run_flink_job(): input_file = "/test1.csv" output_file ="/output_test1.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example(name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example(name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.flink_home = "/Users/chenwuchao/soft/apache/flink-1.10.0" with af.config(flink_config): batch_args_1: Properties = {} ddl = """CREATE TABLE input_table (a STRING, b STRING, c STRING) WITH ('connector' = 'filesystem', 'path' = 'INPUT', 'format' = 'csv' )""" table_name = "input_table" batch_args_1['ddl'] = ddl batch_args_1['table_name'] = table_name stream_args_1 = batch_args_1 batch_args_2: Properties = {} ddl = """CREATE TABLE output_table (aa STRING, bb STRING) WITH ('connector' = 'filesystem', 'path' = 'OUTPUT', 'format' = 'csv' )""" table_name = "output_table" batch_args_2['ddl'] = ddl batch_args_2['table_name'] = table_name stream_args_2 = batch_args_2 input_example = af.read_example(example_info=example_1, exec_args=ExecuteProperties( batch_properties=batch_args_1, stream_properties=stream_args_1) ) processed = af.transform(input_data_list=[input_example], executor=faf.FlinkJavaExecutor( java_class="com.apache.flink.ai.flow.TestTransformer")) af.write_example(input_data=processed, example_info=example_2, exec_args=ExecuteProperties( batch_properties=batch_args_2, stream_properties=stream_args_2) ) workflow = af.compile_workflow(test_util.get_project_path()) print(json_utils.dumps(list(workflow.jobs.values())[0]))
def submit_workflow(self, ex_workflow: Workflow) -> Optional[int]: """ Submit Workflow. :param ex_workflow: The generated workflow. :return: Workflow id. """ return self.get_client().submit_workflow( json_utils.dumps(ex_workflow))[1]
def validate_engine_type(ai_sub_graph: AISubGraph): if len(ai_sub_graph.nodes) <= 0: raise Exception("AISubGraph is Empty!") nodes = list(ai_sub_graph.nodes.values()) engine_name = nodes[0].properties[ENGINE_NAME] for node in ai_sub_graph.nodes.values(): engine = node.properties[ENGINE_NAME] if engine_name != engine: raise Exception( "AISubGraph engine name not same:{}!={} graph:{}". format(engine_name, engine, dumps(ai_sub_graph))) ai_sub_graph.job_type = engine_name
def generate_code(self, op_index, job: AbstractJob): from tempfile import NamedTemporaryFile with NamedTemporaryFile( mode='w+t', prefix=job.job_name, suffix='.json', dir=job.job_config.project_desc.get_absolute_temp_path(), delete=False) as f: f.write(json_utils.dumps(job)) K8S_FLINK = """k8s_flink_{0} = "{2}"\nop_{0} = KubernetesFlinkOperator(task_id='{1}', dag=dag, job_file=k8s_flink_{0})\n""" return K8S_FLINK.format(op_index, job_name_to_task_id(job.job_name), f.name)
def register_workflow_execution(self, workflow: Workflow): exec_name = generate_time_str() workflow_meta: WorkflowExecutionMeta = self.get_client( ).register_workflow_execution( name=exec_name, project_id=int( workflow.project_desc.project_config.get_project_uuid()), execution_state=State.INIT, workflow_json=dumps(workflow)) workflow.workflow_id = workflow_meta.uuid workflow.execution_name = exec_name # set workflow execution id to job context for job in workflow.jobs.values(): job.job_context.workflow_execution_id = workflow_meta.uuid
def optimize(graph: AIGraph) -> AIGraph: """ rewrite AIGraph copy the node belong to different sub graph :param graph: Default AIGraph :return: """ has_circle = True while has_circle: has_circle, do_split, err_message = DefaultTranslator.compute_cluster_graph( graph) if has_circle and not do_split: raise Exception("{} cluster has circle".format( dumps(err_message))) return graph
def _upload_project_package(workflow: Workflow): """ Upload the project package. :param workflow: The generated workflow. """ project_desc = project_description() workflow_json_file = os.path.join( project_desc.get_absolute_temp_path(), project_desc.project_config.get_project_uuid() + "_workflow.json") with open(workflow_json_file, 'w') as f: f.write(json_utils.dumps(workflow)) blob_manager = BlobManagerFactory.get_blob_manager( project_desc.project_config['blob']) uploaded_project_path = blob_manager.upload_blob(str(workflow.workflow_id), project_desc.project_path) project_desc.project_config.set_uploaded_project_path( uploaded_project_path) for job in workflow.jobs.values(): job.job_config.project_path = uploaded_project_path
def upload_project_package(self, workflow: Workflow): """ Upload the project package. :param workflow: The generated workflow. """ # todo need to add update project uri with open( self.project_desc.get_absolute_temp_path() + "/" + self.project_desc.project_config.get_project_uuid() + "_workflow.json", 'w') as f: f.write(json_utils.dumps(workflow)) blob_manager = BlobManagerFactory.get_blob_manager( self.project_desc.project_config) uploaded_project_path = blob_manager.upload_blob( str(workflow.workflow_id), self.project_desc.project_path) self.project_desc.project_config[ 'uploaded_project_path'] = uploaded_project_path for job in workflow.jobs.values(): job.job_config.project_path = uploaded_project_path job.job_config.project_local_path = self.project_desc.project_path
def generate_workflow_config(): workflow_config = WorkFlowConfig() workflow_config.add_job_config( config_key="global_config_key", job_config=af.BaseJobConfig( platform="local", engine="python", properties={"common_key": "common_value"})) workflow_config.add_job_config( config_key="test_job", job_config=af.BaseJobConfig( platform=None, engine=None, properties={"job_key": "job_value"})) workflow_config.add_job_config( config_key="test_job_1", job_config=af.BaseJobConfig( platform='kubernetes', engine='flink', properties={"job_key_1": "job_value_1"})) with open(config_file, 'w') as f: f.write(json_utils.dumps(workflow_config))
def generate_job_resource(self, job: KubernetesFlinkJob) -> None: """ Generate kubernetes flink job resource. :param job: Kubernetes flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) execution_config_file = 'job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + '/' + execution_config_file if job.job_config.language_type == LanguageType.JAVA: with open(real_execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = Path(real_execution_config_file).name else: with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file # generate python_codes.zip python_codes = '{}/python_codes'.format(project_path) if os.path.exists(python_codes): zip_dir = '{}/zip'.format(project_path) if os.path.exists(zip_dir): shutil.rmtree(zip_dir) shutil.copytree(python_codes, zip_dir + '/python_codes') shutil.make_archive(python_codes, 'zip', zip_dir)
def test_json(self): node = BaseNode(name="a") json_text = json_utils.dumps(node) print(json_text) n2: BaseNode = json_utils.loads(json_text) self.assertEqual(node.name, n2.name)
def generate_code(self, op_index, job): K8S_CMD = """k8s_cmd_{0} = \"""{2}\"""\nop_{0} = KubernetesCMDOperator(task_id='{1}', dag=dag, job=k8s_cmd_{0})\n""" return K8S_CMD.format(op_index, job_name_to_task_id(job.job_name), json_utils.dumps(job))
def generate_job_resource(self, job: LocalFlinkJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) if job.job_config.language_type == LanguageType.JAVA: execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id with open(execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = execution_config_file exec_cmd = ['flink', 'run'] exec_cmd.extend(['-m', job.job_config.jm_host_port]) if job.job_config.class_path is not None: exec_cmd.extend(['-C', job.job_config.class_path]) if job.job_config.project_desc.jar_dependencies is not None: for jar in job.job_config.project_desc.jar_dependencies: exec_cmd.extend(['-C', "file://{}".format(jar)]) if job.job_config.main_class is not None: exec_cmd.extend(['-c', job.job_config.main_class]) exec_cmd.extend([job.job_config.jar_path]) exec_cmd.extend(['--execution-config', execution_config_file]) if job.job_config.args is not None: exec_cmd.extend(job.job_config.args) else: if 'entry_module_path' not in job.job_config.project_desc.project_config: entry_module_path = (file_path_to_absolute_module( sys.argv[0])).split('.')[-1] else: entry_module_path = job.job_config.project_desc.project_config[ 'entry_module_path'] execution_config_file = '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + execution_config_file with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file python3_location = sys.executable if job.job_config.local_mode == 'python': exec_cmd = [ python3_location, version.py_main_file, job.job_config.project_path, execution_config_file, entry_module_path ] else: exec_cmd = [ 'flink', 'run', '-pym', version.py_cluster_main_file, '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/', '-pyexec', python3_location, '--project-path', job.job_config.project_path, '--config-file', execution_config_file, '--entry-module-path', entry_module_path ] job.exec_cmd = exec_cmd logging.info(' '.join(exec_cmd))
def generate_handler(self, op, configs: List[MetConfig]): return DAGTemplate.MET_HANDLER.format(op, json_utils.dumps(configs))
def run_flink_predict_job(): input_file = "/test1.csv" output_file = "/output_test2.csv" example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.flink_home = '' with af.config(flink_config): batch_args_1: Properties = {} ddl = """CREATE TABLE input_table (a INT, b INT, c INT) WITH ('connector' = 'filesystem', 'path' = 'INPUT', 'format' = 'csv' )""" table_name = "input_table" batch_args_1['ddl'] = ddl batch_args_1['table_name'] = table_name stream_args_1 = batch_args_1 batch_args_2: Properties = {} ddl = """CREATE TABLE output_table (aa INT, cc INT) WITH ('connector' = 'filesystem', 'path' = 'OUTPUT', 'format' = 'csv' )""" table_name = "output_table" batch_args_2['ddl'] = ddl batch_args_2['table_name'] = table_name stream_args_2 = batch_args_2 input_example = af.read_example(example_info=example_1, exec_args=ExecuteArgs( batch_properties=batch_args_1, stream_properties=stream_args_1)) model_meta = af.ModelMeta(name="test", model_type="saved_model") model_version = af.ModelVersionMeta(version="11111", model_path="./tmp/saved_model/", model_metric="./tmp/saved_model/", model_id=0) processed = af.predict( input_data_list=[input_example], model_info=model_meta, model_version_info=model_version, executor=faf.flink_executor.FlinkJavaExecutor( java_class="com.apache.flink.ai.flow.TestPredict")) af.write_example(input_data=processed, example_info=example_2, exec_args=ExecuteArgs( batch_properties=batch_args_2, stream_properties=stream_args_2)) g = af.default_graph() workflow = af.compile_workflow(project_path=test_util.get_project_path()) print(dumps(list(workflow.jobs.values())[0]))
def test_json(self): channel = DataEdge("a", 0) json_text = json_utils.dumps(channel) c2: DataEdge = json_utils.loads(json_text) self.assertEqual(channel.target_node_id, c2.target_node_id) self.assertEqual(channel.port, c2.port)
def compile_graph(graph: Graph = _default_ai_graph) -> str: return dumps(graph)