def generate_code(self, op_index, job: AbstractJob): LOCAL_PYTHON_OPERATOR = """env_{0}={{'PYTHONPATH': '{3}'}} op_{0} = BashOperator(task_id='{1}', dag=dag, bash_command='{2}', env=env_{0})\n""" blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) current_path = os.path.abspath(__file__) python_package_path = os.path.abspath( os.path.dirname(current_path) + os.path.sep + ".") script_path = python_package_path + '/local_job_run.py' entry_module_path = job.job_config.properties['entry_module_path'] python3_location = sys.executable cmd = [ python3_location, script_path, job.job_config.project_path, job.exec_func_file, job.exec_args_file, entry_module_path ] cmd_str = ' '.join(cmd) add_path = ':'.join(copy_path) code_text = LOCAL_PYTHON_OPERATOR.format( op_index, job_name_to_task_id(job.job_name), cmd_str, add_path) return code_text
def submit_job(self, job: VVPJob) -> VVPJobHandler: blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) vvp_config: VVPJobConfig = job.job_config dp_id, job_id = job.vvp_restful.submit_job( name=job.job_config.deployment_name, artifact_path=job.job_config.jar_path, entry_class=job.job_config.entry_class, main_args=job.job_config.main_args, addition_dependencies=job.job_config.addition_dependencies, flink_image_info=vvp_config.flink_image_info, parallelism=vvp_config.parallelism, resources=vvp_config.resources, flink_config=vvp_config.flink_config, logging=vvp_config.logging, kubernetes=vvp_config.kubernetes, upgrade_strategy=job.job_config.upgrade_strategy, restore_strategy=job.job_config.restore_strategy, spec=job.job_config.spec) job.vvp_deployment_id = dp_id job.vvp_restful.start_deployment(job.vvp_deployment_id) job.vvp_job_id = job_id return VVPJobHandler(vvp_restful=job.vvp_restful, vvp_job_id=job_id, vvp_deployment_id=dp_id, job_instance_id=job.instance_id, job_uuid=job.uuid, workflow_id=job.job_context.workflow_execution_id)
def generate_code(self, op_index, job: AbstractJob): sys_env = os.environ.copy() if job.job_config.flink_home is not None: sys_env[ 'PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH'] python_path = sys.path.copy() if job.job_config.project_path is not None: blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) python_path.append(downloaded_blob_path + '/python_codes') if job.job_config.project_desc.python_paths is not None: python_path.extend(job.job_config.project_desc.python_paths) sys_env['PYTHONPATH'] = ':'.join(python_path) sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable return """from flink_ai_flow.local_flink_job import LocalFlinkOperator\nenv_{0}={{'PYTHONPATH': '{4}', 'PATH': '{5}', 'PYFLINK_CLIENT_EXECUTABLE': '{6}'}}\nop_{0} = LocalFlinkOperator(task_id='{1}', bash_command='{2}', properties='{3}', dag=dag, env=env_{0})\n""".format( op_index, job_name_to_task_id(job.job_name), ' '.join(job.exec_cmd), json.dumps({ 'project_path': job.job_config.project_path, 'workflow_execution_id': job.job_context.workflow_execution_id, 'instance_id': job.instance_id }), sys_env['PYTHONPATH'], sys_env['PATH'], sys_env['PYFLINK_CLIENT_EXECUTABLE'])
def test_project_upload_download_local_2(self): project_path = get_file_dir(__file__) config = {'local_repository': '/tmp', 'remote_repository': '/tmp'} # blob_server.type = local blob_manager = BlobManagerFactory.get_blob_manager(config) uploaded_path = blob_manager.upload_blob('1', project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
def test_custom_blob_manager(self): config = { 'blob_server.type': 'ai_flow.test.project.test_blob_manager.MockBlockManager' } blob_manager = BlobManagerFactory.get_blob_manager(config) uploaded_path = blob_manager.upload_blob('1', None) self.assertEqual('upload', uploaded_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual('download', downloaded_path)
def submit_job(self, job: LocalPythonJob) -> Any: """ :param job: :return: """ blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) env = os.environ.copy() env['PYTHONPATH'] = ':'.join(copy_path) current_path = os.path.abspath(__file__) father_path = os.path.abspath( os.path.dirname(current_path) + os.path.sep + ".") script_path = father_path + '/local_job_run.py' entry_module_path = job.job_config.properties['entry_module_path'] python3_location = sys.executable cmd = [ python3_location, script_path, job.job_config.project_path, job.exec_func_file, job.exec_args_file, entry_module_path ] logging.info(' '.join(cmd)) # every job submitter need set the job log file, # local python job set log file name LocalPythonJob_{workflow_execution_id}_{stdout,stderr}.log stdout_log = log_path_utils.stdout_log_path( job.job_config.project_desc.get_absolute_log_path(), job.job_name) stderr_log = log_path_utils.stderr_log_path( job.job_config.project_desc.get_absolute_log_path(), job.job_name) if not os.path.exists( job.job_config.project_desc.get_absolute_log_path()): os.mkdir(job.job_config.project_desc.get_absolute_log_path()) with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err: process = sp.Popen(cmd, stderr=err, stdout=out, shell=False, env=env) job_handler = LocalJobHandler( job_instance_id=job.instance_id, job_uuid=job.uuid, workflow_id=job.job_context.workflow_execution_id, process_object=process) self.job_handler_map[job.uuid] = job_handler return job_handler
def test_project_upload_download_local(self): project_path = get_file_dir(__file__) project_desc = get_project_description_from(project_path + "/../") # blob_server.type = local blob_manager = BlobManagerFactory.get_blob_manager( project_desc.project_config) uploaded_path = blob_manager.upload_blob('1', project_path) self.assertEqual(uploaded_path, project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual(project_path, downloaded_path)
def submit_job(self, job: LocalFlinkJob): """ Submit the flink job to run in local. :param job: A flink job object which contains the necessary information for an execution. :return: A job handler that maintains the handler of a job in runtime. """ sys_env = os.environ.copy() if job.job_config.flink_home is not None: sys_env[ 'PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH'] blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) sys_env['PYTHONPATH'] = ':'.join(copy_path) logging.info(sys_env['PYTHONPATH']) # every job submitter need set the job log file, # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log stdout_log = log_path_utils.stdout_log_path( job.job_config.project_desc.get_absolute_log_path(), job.job_name) stderr_log = log_path_utils.stderr_log_path( job.job_config.project_desc.get_absolute_log_path(), job.job_name) if not os.path.exists( job.job_config.project_desc.get_absolute_log_path()): os.mkdir(job.job_config.project_desc.get_absolute_log_path()) sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err: submitted_process = subprocess.Popen(args=job.exec_cmd, shell=False, stdout=out, stderr=err, env=sys_env) exec_handle = LocalJobHandler( job_uuid=job.uuid, job_instance_id=job.instance_id, workflow_id=job.job_context.workflow_execution_id, process_object=submitted_process) self.job_handler_map[job.uuid] = exec_handle return exec_handle
def _upload_project_package(workflow: Workflow): """ Upload the project package. :param workflow: The generated workflow. """ project_desc = project_description() workflow_json_file = os.path.join( project_desc.get_absolute_temp_path(), project_desc.project_config.get_project_uuid() + "_workflow.json") with open(workflow_json_file, 'w') as f: f.write(json_utils.dumps(workflow)) blob_manager = BlobManagerFactory.get_blob_manager( project_desc.project_config['blob']) uploaded_project_path = blob_manager.upload_blob(str(workflow.workflow_id), project_desc.project_path) project_desc.project_config.set_uploaded_project_path( uploaded_project_path) for job in workflow.jobs.values(): job.job_config.project_path = uploaded_project_path
def upload_project_package(self, workflow: Workflow): """ Upload the project package. :param workflow: The generated workflow. """ # todo need to add update project uri with open( self.project_desc.get_absolute_temp_path() + "/" + self.project_desc.project_config.get_project_uuid() + "_workflow.json", 'w') as f: f.write(json_utils.dumps(workflow)) blob_manager = BlobManagerFactory.get_blob_manager( self.project_desc.project_config) uploaded_project_path = blob_manager.upload_blob( str(workflow.workflow_id), self.project_desc.project_path) self.project_desc.project_config[ 'uploaded_project_path'] = uploaded_project_path for job in workflow.jobs.values(): job.job_config.project_path = uploaded_project_path job.job_config.project_local_path = self.project_desc.project_path
def submitWorkflow(self, request, context): try: rq: ScheduleWorkflowRequest = request workflow: Workflow = json_utils.loads(rq.workflow_json) workflow.workflow_name = rq.workflow_name config = {} config.update(workflow.project_desc.project_config['blob']) # config['local_repository'] = self._scheduler_config.repository() blob_manager = BlobManagerFactory.get_blob_manager(config) project_path: Text = blob_manager\ .download_blob(workflow_id=workflow.workflow_id, remote_path=workflow.project_desc.project_config.get('uploaded_project_path'), local_path=self._scheduler_config.repository()) project_desc: ProjectDesc = get_project_description_from( project_path) project_name = project_desc.project_name # update workflow workflow.project_desc = project_desc for n, j in workflow.jobs.items(): j.job_config.project_desc = project_desc j.job_config.project_path = project_path workflow_info = self._scheduler.submit_workflow( workflow, project_desc) if workflow_info is None: return WorkflowInfoResponse(result=ResultProto( status=StatusProto.ERROR, error_message='{}, {} do not exist!'.format( project_name, workflow.workflow_name))) return WorkflowInfoResponse( result=ResultProto(status=StatusProto.OK), workflow=workflow_to_proto(workflow_info)) except Exception as err: return WorkflowInfoResponse( result=ResultProto(status=StatusProto.ERROR, error_message=traceback.format_exc()))
def generate_code(self, op_index, job): blob_manager = BlobManagerFactory.get_blob_manager( job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob( job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) copy_set = set(copy_path) add_path = ':'.join(copy_set) VVP_OPERATOR = """env_{0} = {{'PYTHONPATH': '{7}'}}\nop_{0} = VVPFlinkOperator(task_id='{1}', dag=dag, bash_command='{2}', """ \ + """id_file='{3}', base_url='{4}', namespace='{5}', token='{6}', env=env_{0})\n""" id_file = '{}/temp/vvp/{}/{}'.format( job.job_config.project_path, str(job.job_context.workflow_execution_id), job.instance_id) return VVP_OPERATOR.format(op_index, job_name_to_task_id(job.job_name), job.exec_cmd, id_file, job.job_config.base_url, job.job_config.namespace, job.job_config.token, add_path)
def test_project_upload_download_oss(self): project_path = get_file_dir(__file__) config = { 'blob_server.type': 'oss', 'local_repository': '/tmp', 'blob_server.access_key_id': os.environ.get('blob_server.access_key_id'), 'blob_server.access_key_secret': os.environ.get('blob_server.access_key_secret'), 'blob_server.endpoint': os.environ.get('blob_server.endpoint'), 'blob_server.bucket': os.environ.get('blob_server.bucket'), 'blob_server.repo_name': os.environ.get('blob_server.repo_name') } blob_manager = BlobManagerFactory.get_blob_manager(config) uploaded_path = blob_manager.upload_blob('1', project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
def submit_job(self, job: LocalFlinkJob): """ Submit the flink job to run in local. :param job: A flink job object which contains the necessary information for an execution. :return: A job handler that maintains the handler of a job in runtime. """ # generate cmd if job.job_config.language_type == LanguageType.JAVA: exec_cmd = ['flink', 'run'] exec_cmd.extend(['-m', job.job_config.jm_host_port]) if job.job_config.class_path is not None: exec_cmd.extend(['-C', job.job_config.class_path]) if job.job_config.project_desc.jar_dependencies is not None: for jar in job.job_config.project_desc.jar_dependencies: exec_cmd.extend(['-C', "file://{}".format(jar)]) if job.job_config.main_class is not None: exec_cmd.extend(['-c', job.job_config.main_class]) exec_cmd.extend([job.job_config.jar_path]) exec_cmd.extend(['--execution-config', job.config_file]) if job.job_config.args is not None: exec_cmd.extend(job.job_config.args) else: if 'entry_module_path' not in job.job_config.project_desc.project_config: entry_module_path = (file_path_to_absolute_module(sys.argv[0])).split('.')[-1] else: entry_module_path = job.job_config.project_desc.project_config['entry_module_path'] python3_location = sys.executable if job.job_config.local_mode == 'python': exec_cmd = [python3_location, version.py_main_file, job.job_config.project_path, job.config_file, entry_module_path] else: exec_cmd = ['flink', 'run', '-pym', version.py_cluster_module, '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/', '-pyexec', python3_location, '--project-path', job.job_config.project_path, '--config-file', job.config_file, '--entry-module-path', entry_module_path] job.exec_cmd = exec_cmd logging.info(' '.join(exec_cmd)) sys_env = os.environ.copy() if job.job_config.flink_home is not None: sys_env['PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH'] blob_manager = BlobManagerFactory.get_blob_manager(job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob(job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) sys_env['PYTHONPATH'] = ':'.join(copy_path) logging.info(sys_env['PYTHONPATH']) # every job submitter need set the job log file, # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log stdout_log = log_path_utils.stdout_log_path(job.job_config.project_desc.get_absolute_log_path(), job.job_name) stderr_log = log_path_utils.stderr_log_path(job.job_config.project_desc.get_absolute_log_path(), job.job_name) if not os.path.exists(job.job_config.project_desc.get_absolute_log_path()): os.mkdir(job.job_config.project_desc.get_absolute_log_path()) sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err: submitted_process = subprocess.Popen( args=job.exec_cmd, shell=False, stdout=out, stderr=err, env=sys_env ) exec_handle = LocalJobHandler(job_uuid=job.uuid, job_instance_id=job.instance_id, workflow_id=job.job_context.workflow_execution_id, process_object=submitted_process) self.job_handler_map[job.uuid] = exec_handle return exec_handle