def create_job(job_id, role, party_id): src_fate_ver = request.json.get('src_fate_ver') if src_fate_ver is not None and compare_version(src_fate_ver, '1.7.0') == 'lt': return get_json_result(retcode=RetCode.INCOMPATIBLE_FATE_VER, retmsg='Incompatible FATE versions', data={ 'src_fate_ver': src_fate_ver, "current_fate_ver": RuntimeConfig.get_env('FATE') }) try: result = JobController.create_job(job_id=job_id, role=role, party_id=int(party_id), job_info=request.json) return get_json_result(retcode=0, retmsg='success', data=result) except RuntimeError as e: return get_json_result(retcode=RetCode.OPERATING_ERROR, retmsg=str(e), data={"job_id": job_id})
def deploy(): request_data = request.json require_parameters = ['model_id', 'model_version'] check_config(request_data, require_parameters) model_id = request_data.get("model_id") model_version = request_data.get("model_version") retcode, retmsg, model_info = model_utils.query_model_info_from_file( model_id=model_id, model_version=model_version, to_dict=True) if not model_info: raise Exception( f'Deploy model failed, no model {model_id} {model_version} found.') else: for key, value in model_info.items(): version_check = model_utils.compare_version( value.get('f_fate_version'), '1.5.0') if version_check == 'lt': continue else: init_role = key.split('/')[-2].split('#')[0] init_party_id = key.split('/')[-2].split('#')[1] model_init_role = value.get('f_initiator_role') if value.get( 'f_initiator_role') else value.get( 'f_train_runtime_conf', {}).get('initiator', {}).get( 'role', '') model_init_party_id = value.get( 'f_initiator_role_party_id') if value.get( 'f_initiator_role_party_id') else value.get( 'f_train_runtime_conf', {}).get( 'initiator', {}).get('party_id', '') if (init_role == model_init_role) and (init_party_id == str(model_init_party_id)): break else: raise Exception( "Deploy model failed, can not found model of initiator role or the fate version of model is older than 1.5.0" ) # distribute federated deploy task _job_id = job_utils.generate_job_id() request_data['child_model_version'] = _job_id initiator_party_id = model_init_party_id initiator_role = model_init_role request_data['initiator'] = { 'role': initiator_role, 'party_id': initiator_party_id } deploy_status = True deploy_status_info = {} deploy_status_msg = 'success' deploy_status_info['detail'] = {} for role_name, role_partys in value.get("f_train_runtime_conf", {}).get('role', {}).items(): if role_name not in ['arbiter', 'host', 'guest']: continue deploy_status_info[role_name] = deploy_status_info.get( role_name, {}) deploy_status_info['detail'][role_name] = {} adapter = JobRuntimeConfigAdapter( value.get("f_train_runtime_conf", {})) work_mode = adapter.get_job_work_mode() for _party_id in role_partys: request_data['local'] = { 'role': role_name, 'party_id': _party_id } try: response = federated_api( job_id=_job_id, method='POST', endpoint='/model/deploy/do', src_party_id=initiator_party_id, dest_party_id=_party_id, src_role=initiator_role, json_body=request_data, federated_mode=FederatedMode.MULTIPLE if work_mode else FederatedMode.SINGLE) deploy_status_info[role_name][_party_id] = response[ 'retcode'] detail = {_party_id: {}} detail[_party_id]['retcode'] = response['retcode'] detail[_party_id]['retmsg'] = response['retmsg'] deploy_status_info['detail'][role_name].update(detail) if response['retcode']: deploy_status = False deploy_status_msg = 'failed' except Exception as e: stat_logger.exception(e) deploy_status = False deploy_status_msg = 'failed' deploy_status_info[role_name][_party_id] = 100 deploy_status_info['model_id'] = request_data['model_id'] deploy_status_info['model_version'] = _job_id return get_json_result(retcode=(0 if deploy_status else 101), retmsg=deploy_status_msg, data=deploy_status_info)
def operate_model(model_operation): request_config = request.json or request.form.to_dict() job_id = job_utils.generate_job_id() if model_operation not in [ ModelOperation.STORE, ModelOperation.RESTORE, ModelOperation.EXPORT, ModelOperation.IMPORT ]: raise Exception( 'Can not support this operating now: {}'.format(model_operation)) required_arguments = ["model_id", "model_version", "role", "party_id"] check_config(request_config, required_arguments=required_arguments) request_config["model_id"] = gen_party_model_id( model_id=request_config["model_id"], role=request_config["role"], party_id=request_config["party_id"]) if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]: if model_operation == ModelOperation.IMPORT: try: file = request.files.get('file') file_path = os.path.join(TEMP_DIRECTORY, file.filename) # if not os.path.exists(file_path): # raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' # 'please check the path: {}'.format(file_path)) try: os.makedirs(os.path.dirname(file_path), exist_ok=True) file.save(file_path) except Exception as e: shutil.rmtree(file_path) raise e request_config['file'] = file_path model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) model.unpack_model(file_path) pipeline = model.read_component_model('pipeline', 'pipeline')['Pipeline'] train_runtime_conf = json_loads(pipeline.train_runtime_conf) permitted_party_id = [] for key, value in train_runtime_conf.get('role', {}).items(): for v in value: permitted_party_id.extend([v, str(v)]) if request_config["party_id"] not in permitted_party_id: shutil.rmtree(model.model_path) raise Exception( "party id {} is not in model roles, please check if the party id is valid." ) try: adapter = JobRuntimeConfigAdapter(train_runtime_conf) job_parameters = adapter.get_common_parameters().to_dict() with DB.connection_context(): db_model = MLModel.get_or_none( MLModel.f_job_id == job_parameters.get( "model_version"), MLModel.f_role == request_config["role"]) if not db_model: model_info = model_utils.gather_model_info_data(model) model_info['imported'] = 1 model_info['job_id'] = model_info['f_model_version'] model_info['size'] = model.calculate_model_file_size() model_info['role'] = request_config["model_id"].split( '#')[0] model_info['party_id'] = request_config[ "model_id"].split('#')[1] if model_utils.compare_version( model_info['f_fate_version'], '1.5.1') == 'lt': model_info['roles'] = model_info.get( 'f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_info[ 'work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True model_utils.save_model_info(model_info) else: stat_logger.info( f'job id: {job_parameters.get("model_version")}, ' f'role: {request_config["role"]} model info already existed in database.' ) except peewee.IntegrityError as e: stat_logger.exception(e) operation_record(request_config, "import", "success") return get_json_result() except Exception: operation_record(request_config, "import", "failed") raise else: try: model = pipelined_model.PipelinedModel( model_id=request_config["model_id"], model_version=request_config["model_version"]) if model.exists(): archive_file_path = model.packaging_model() operation_record(request_config, "export", "success") return send_file(archive_file_path, attachment_filename=os.path.basename( archive_file_path), as_attachment=True) else: operation_record(request_config, "export", "failed") res = error_response( response_code=210, retmsg="Model {} {} is not exist.".format( request_config.get("model_id"), request_config.get("model_version"))) return res except Exception as e: operation_record(request_config, "export", "failed") stat_logger.exception(e) return error_response(response_code=210, retmsg=str(e)) else: data = {} job_dsl, job_runtime_conf = gen_model_operation_job_config( request_config, model_operation) submit_result = DAGScheduler.submit( { 'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf }, job_id=job_id) data.update(submit_result) operation_record(data=job_runtime_conf, oper_type=model_operation, oper_status='') return get_json_result(job_id=job_id, data=data)
def migration(config_data: dict): try: party_model_id = model_utils.gen_party_model_id( model_id=config_data["model_id"], role=config_data["local"]["role"], party_id=config_data["local"]["party_id"]) model = pipelined_model.PipelinedModel( model_id=party_model_id, model_version=config_data["model_version"]) if not model.exists(): raise Exception("Can not found {} {} model local cache".format( config_data["model_id"], config_data["model_version"])) with DB.connection_context(): if MLModel.get_or_none(MLModel.f_model_version == config_data["unify_model_version"]): raise Exception( "Unify model version {} has been occupied in database. " "Please choose another unify model version and try again.". format(config_data["unify_model_version"])) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") migrate_model = pipelined_model.PipelinedModel( model_id=model_utils.gen_party_model_id( model_id=model_utils.gen_model_id(config_data["migrate_role"]), role=config_data["local"]["role"], party_id=config_data["local"]["migrate_party_id"]), model_version=config_data["unify_model_version"]) # migrate_model.create_pipelined_model() shutil.copytree(src=model.model_path, dst=migrate_model.model_path) pipeline = migrate_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # Utilize Pipeline_model collect model data. And modify related inner information of model train_runtime_conf = json_loads(pipeline.train_runtime_conf) train_runtime_conf["role"] = config_data["migrate_role"] train_runtime_conf["initiator"] = config_data["migrate_initiator"] adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_id=model_utils.gen_model_id(train_runtime_conf["role"]), model_version=migrate_model.model_version) # update pipeline.pb file pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) pipeline.model_id = bytes( adapter.get_common_parameters().to_dict().get("model_id"), "utf-8") pipeline.model_version = bytes( adapter.get_common_parameters().to_dict().get("model_version"), "utf-8") if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt': pipeline.initiator_role = config_data["migrate_initiator"]['role'] pipeline.initiator_party_id = config_data["migrate_initiator"][ 'party_id'] # save updated pipeline.pb file migrate_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(migrate_model.model_path, "pipeline.pb"), os.path.join(migrate_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) # modify proto with open( os.path.join(migrate_model.model_path, 'define', 'define_meta.yaml'), 'r') as fin: define_yaml = yaml.safe_load(fin) for key, value in define_yaml['model_proto'].items(): if key == 'pipeline': continue for v in value.keys(): buffer_obj = migrate_model.read_component_model(key, v) module_name = define_yaml['component_define'].get( key, {}).get('module_name') modified_buffer = model_migration( model_contents=buffer_obj, module_name=module_name, old_guest_list=config_data['role']['guest'], new_guest_list=config_data['migrate_role']['guest'], old_host_list=config_data['role']['host'], new_host_list=config_data['migrate_role']['host'], old_arbiter_list=config_data.get('role', {}).get('arbiter', None), new_arbiter_list=config_data.get('migrate_role', {}).get('arbiter', None)) migrate_model.save_component_model( component_name=key, component_module_name=module_name, model_alias=v, model_buffers=modified_buffer) archive_path = migrate_model.packaging_model() shutil.rmtree(os.path.abspath(migrate_model.model_path)) return (0, f"Migrating model successfully. " \ "The configuration of model has been modified automatically. " \ "New model id is: {}, model version is: {}. " \ "Model files can be found at '{}'.".format(adapter.get_common_parameters().to_dict().get("model_id"), migrate_model.model_version, os.path.abspath(archive_path)), {"model_id": migrate_model.model_id, "model_version": migrate_model.model_version, "path": os.path.abspath(archive_path)}) except Exception as e: stat_logger.exception(e) return 100, str(e), {}
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') try: party_model_id = model_utils.gen_party_model_id( model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path) pipeline = deploy_model.read_component_model('pipeline', 'pipeline')['Pipeline'] # modify two pipeline files (model version/ train_runtime_conf) train_runtime_conf = json_loads(pipeline.train_runtime_conf) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline.model_version = child_model_version pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) parser = get_dsl_parser_by_version( train_runtime_conf.get('dsl_version', '1')) train_dsl = json_loads(pipeline.train_dsl) parent_predict_dsl = json_loads(pipeline.inference_dsl) if str(train_runtime_conf.get('dsl_version', '1')) == '1': predict_dsl = json_loads(pipeline.inference_dsl) else: if config_data.get('dsl') or config_data.get('predict_dsl'): predict_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(predict_dsl, dict): predict_dsl = json_loads(predict_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) parser_version = train_runtime_conf.get('dsl_version', '1') if str(parser_version) == '1': predict_dsl = parent_predict_dsl else: parser = schedule_utils.get_dsl_parser_by_version( parser_version) predict_dsl = parser.deploy_component(cpn_list, train_dsl) # save predict dsl into child model file parser.verify_dsl(predict_dsl, "predict") inference_dsl = parser.get_predict_dsl( role=local_role, predict_dsl=predict_dsl, setting_conf_prefix=file_utils. get_federatedml_setting_conf_directory()) pipeline.inference_dsl = json_dumps(inference_dsl, byte=True) if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt': pipeline.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline.parent = False runtime_conf_on_party = json_loads(pipeline.runtime_conf_on_party) runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = model_utils.gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['work_mode'] = adapter.get_job_work_mode() model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if model_utils.compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') model_utils.save_model_info(model_info) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: return 0, f"deploy model of role {local_role} {local_party_id} success"
def deploy(config_data): model_id = config_data.get('model_id') model_version = config_data.get('model_version') local_role = config_data.get('local').get('role') local_party_id = config_data.get('local').get('party_id') child_model_version = config_data.get('child_model_version') components_checkpoint = config_data.get('components_checkpoint', {}) warning_msg = "" try: party_model_id = gen_party_model_id(model_id=model_id, role=local_role, party_id=local_party_id) model = PipelinedModel(model_id=party_model_id, model_version=model_version) model_data = model.collect_models(in_bytes=True) if "pipeline.pipeline:Pipeline" not in model_data: raise Exception("Can not found pipeline file in model.") # check if the model could be executed the deploy process (parent/child) if not check_before_deploy(model): raise Exception('Child model could not be deployed.') # copy proto content from parent model and generate a child model deploy_model = PipelinedModel(model_id=party_model_id, model_version=child_model_version) shutil.copytree(src=model.model_path, dst=deploy_model.model_path, ignore=lambda src, names: {'checkpoint'} if src == model.model_path else {}) pipeline_model = deploy_model.read_pipeline_model() train_runtime_conf = json_loads(pipeline_model.train_runtime_conf) runtime_conf_on_party = json_loads( pipeline_model.runtime_conf_on_party) dsl_version = train_runtime_conf.get("dsl_version", "1") parser = get_dsl_parser_by_version(dsl_version) train_dsl = json_loads(pipeline_model.train_dsl) parent_predict_dsl = json_loads(pipeline_model.inference_dsl) if config_data.get('dsl') or config_data.get('predict_dsl'): inference_dsl = config_data.get('dsl') if config_data.get( 'dsl') else config_data.get('predict_dsl') if not isinstance(inference_dsl, dict): inference_dsl = json_loads(inference_dsl) else: if config_data.get('cpn_list', None): cpn_list = config_data.pop('cpn_list') else: cpn_list = list(train_dsl.get('components', {}).keys()) if int(dsl_version) == 1: # convert v1 dsl to v2 dsl inference_dsl, warning_msg = parser.convert_dsl_v1_to_v2( parent_predict_dsl) else: parser = get_dsl_parser_by_version(dsl_version) inference_dsl = parser.deploy_component(cpn_list, train_dsl) # convert v1 conf to v2 conf if int(dsl_version) == 1: components = parser.get_components_light_weight(inference_dsl) from fate_flow.db.component_registry import ComponentRegistry job_providers = parser.get_job_providers( dsl=inference_dsl, provider_detail=ComponentRegistry.REGISTRY) cpn_role_parameters = dict() for cpn in components: cpn_name = cpn.get_name() role_params = parser.parse_component_role_parameters( component=cpn_name, dsl=inference_dsl, runtime_conf=train_runtime_conf, provider_detail=ComponentRegistry.REGISTRY, provider_name=job_providers[cpn_name]["provider"]["name"], provider_version=job_providers[cpn_name]["provider"] ["version"]) cpn_role_parameters[cpn_name] = role_params train_runtime_conf = parser.convert_conf_v1_to_v2( train_runtime_conf, cpn_role_parameters) adapter = JobRuntimeConfigAdapter(train_runtime_conf) train_runtime_conf = adapter.update_model_id_version( model_version=deploy_model.model_version) pipeline_model.model_version = child_model_version pipeline_model.train_runtime_conf = json_dumps(train_runtime_conf, byte=True) # save inference dsl into child model file parser = get_dsl_parser_by_version(2) parser.verify_dsl(inference_dsl, "predict") inference_dsl = JobSaver.fill_job_inference_dsl( job_id=model_version, role=local_role, party_id=local_party_id, dsl_parser=parser, origin_inference_dsl=inference_dsl) pipeline_model.inference_dsl = json_dumps(inference_dsl, byte=True) if compare_version(pipeline_model.fate_version, '1.5.0') == 'gt': pipeline_model.parent_info = json_dumps( { 'parent_model_id': model_id, 'parent_model_version': model_version }, byte=True) pipeline_model.parent = False runtime_conf_on_party['job_parameters'][ 'model_version'] = child_model_version pipeline_model.runtime_conf_on_party = json_dumps( runtime_conf_on_party, byte=True) # save model file deploy_model.save_pipeline(pipeline_model) shutil.copyfile( os.path.join(deploy_model.model_path, "pipeline.pb"), os.path.join(deploy_model.model_path, "variables", "data", "pipeline", "pipeline", "Pipeline")) model_info = gather_model_info_data(deploy_model) model_info['job_id'] = model_info['f_model_version'] model_info['size'] = deploy_model.calculate_model_file_size() model_info['role'] = local_role model_info['party_id'] = local_party_id model_info['parent'] = False if model_info.get( 'f_inference_dsl') else True if compare_version(model_info['f_fate_version'], '1.5.0') == 'eq': model_info['roles'] = model_info.get('f_train_runtime_conf', {}).get('role', {}) model_info['initiator_role'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('role') model_info['initiator_party_id'] = model_info.get( 'f_train_runtime_conf', {}).get('initiator', {}).get('party_id') save_model_info(model_info) for component_name, component in train_dsl.get('components', {}).items(): step_index = components_checkpoint.get(component_name, {}).get('step_index') step_name = components_checkpoint.get(component_name, {}).get('step_name') if step_index is not None: step_index = int(step_index) step_name = None elif step_name is None: continue checkpoint_manager = CheckpointManager( role=local_role, party_id=local_party_id, model_id=model_id, model_version=model_version, component_name=component_name, mkdir=False, ) checkpoint_manager.load_checkpoints_from_disk() if checkpoint_manager.latest_checkpoint is not None: checkpoint_manager.deploy( child_model_version, component['output']['model'][0] if component.get( 'output', {}).get('model') else 'default', step_index, step_name, ) except Exception as e: stat_logger.exception(e) return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}" else: msg = f"deploy model of role {local_role} {local_party_id} success" if warning_msg: msg = msg + f", warning: {warning_msg}" return 0, msg