def version_history(name_space, scene_id=None, my_party_id=None, partner_party_id=None, my_role=None, commit_id=None, tag=None, branch="master", limit=10): version_table, scene_key = get_version_table( name_space=name_space, scene_id=scene_id, my_party_id=my_party_id, partner_party_id=partner_party_id, my_role=my_role) data_table_infos = list() if commit_id: # Get this commit information data_table_infos.append( json_loads(version_table.get(commit_id, use_serialize=False))) else: branch_current_commit = version_table.get(branch, use_serialize=False) if branch_current_commit: commit_id = bytes_string(branch_current_commit) for i in range(10): info = version_table.get(commit_id, use_serialize=False) if info: commit_info = json_loads(info) data_table_infos.append(commit_info) commit_id = commit_info["parent"] else: break print(data_table_infos) return data_table_infos
def submit_job(job_data): job_id = generate_job_id() schedule_logger.info('submit job, job_id {}, body {}'.format(job_id, job_data)) job_dsl = job_data.get('job_dsl', {}) job_runtime_conf = job_data.get('job_runtime_conf', {}) job_utils.check_pipeline_job_runtime_conf(job_runtime_conf) job_parameters = job_runtime_conf['job_parameters'] job_initiator = job_runtime_conf['initiator'] job_type = job_parameters.get('job_type', '') if job_type != 'predict': # generate job model info job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model']) job_parameters['model_version'] = job_id train_runtime_conf = {} else: detect_utils.check_config(job_parameters, ['model_id', 'model_version']) # get inference dsl from pipeline model as job dsl job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'], model_id=job_parameters['model_id'], model_version=job_parameters['model_version']) pipeline_model = job_tracker.get_output_model('pipeline') job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl) train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf) job_dsl_path, job_runtime_conf_path = save_job_conf(job_id=job_id, job_dsl=job_dsl, job_runtime_conf=job_runtime_conf) job = Job() job.f_job_id = job_id job.f_roles = json_dumps(job_runtime_conf['role']) job.f_work_mode = job_parameters['work_mode'] job.f_initiator_party_id = job_initiator['party_id'] job.f_dsl = json_dumps(job_dsl) job.f_runtime_conf = json_dumps(job_runtime_conf) job.f_train_runtime_conf = json_dumps(train_runtime_conf) job.f_run_ip = '' job.f_status = JobStatus.WAITING job.f_progress = 0 job.f_create_time = current_timestamp() # save job info TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator) # push into queue RuntimeConfig.JOB_QUEUE.put_event({ 'job_id': job_id, "initiator_role": job_initiator['role'], "initiator_party_id": job_initiator['party_id'] } ) schedule_logger.info( 'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id'])) board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id']) return job_id, job_dsl_path, job_runtime_conf_path, {'model_id': job_parameters['model_id'], 'model_version': job_parameters[ 'model_version']}, board_url
def get_job_configuration(job_id, role, party_id): with DB.connection_context(): jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return json_loads(job.f_dsl), json_loads( job.f_runtime_conf), json_loads(job.f_train_runtime_conf) else: return {}, {}, {}
def update_job_status(job_id, role, party_id, job_info, create=False): job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id) job_info['f_run_ip'] = RuntimeConfig.JOB_SERVER_HOST if create: dsl = json_loads(job_info['f_dsl']) runtime_conf = json_loads(job_info['f_runtime_conf']) train_runtime_conf = json_loads(job_info['f_train_runtime_conf']) if USE_AUTHENTICATION: authentication_check(src_role=job_info.get('src_role', None), src_party_id=job_info.get('src_party_id', None), dsl=dsl, runtime_conf=runtime_conf, role=role, party_id=party_id) save_job_conf(job_id=job_id, job_dsl=dsl, job_runtime_conf=runtime_conf, train_runtime_conf=train_runtime_conf, pipeline_dsl=None) roles = json_loads(job_info['f_roles']) partner = {} show_role = {} is_initiator = job_info.get('f_is_initiator', 0) for _role, _role_party in roles.items(): if is_initiator or _role == role: show_role[_role] = show_role.get(_role, []) for _party_id in _role_party: if is_initiator or _party_id == party_id: show_role[_role].append(_party_id) if _role != role: partner[_role] = partner.get(_role, []) partner[_role].extend(_role_party) else: for _party_id in _role_party: if _party_id != party_id: partner[_role] = partner.get(_role, []) partner[_role].append(_party_id) dag = get_job_dsl_parser(dsl=dsl, runtime_conf=runtime_conf, train_runtime_conf=train_runtime_conf) job_args = dag.get_args_input() dataset = {} for _role, _role_party_args in job_args.items(): if is_initiator or _role == role: for _party_index in range(len(_role_party_args)): _party_id = roles[_role][_party_index] if is_initiator or _party_id == party_id: dataset[_role] = dataset.get(_role, {}) dataset[_role][_party_id] = dataset[_role].get(_party_id, {}) for _data_type, _data_location in _role_party_args[_party_index]['args']['data'].items(): dataset[_role][_party_id][_data_type] = '{}.{}'.format(_data_location['namespace'], _data_location['name']) job_tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role}) job_tracker.save_job_info(role=role, party_id=party_id, job_info=job_info, create=create)
def pipeline_dag_dependency(job_id): try: jobs = job_utils.query_job(job_id=job_id) if not jobs: raise Exception('query job {} failed'.format(job_id)) job = jobs[0] job_dsl_parser = job_utils.get_job_dsl_parser(dsl=json_loads(job.f_dsl), runtime_conf=json_loads(job.f_runtime_conf), train_runtime_conf=json_loads(job.f_train_runtime_conf)) return job_dsl_parser.get_dependency() except Exception as e: stat_logger.exception(e) raise e
def read_feature_header(commit_id=None, tag=None, branch="master"): version_table, data_table_info, scene_key, parent, commit_id = read_version( "feature_header_version", commit_id=commit_id, tag=tag, branch=branch) if commit_id: # Maybe param commit id or get commit id by current branch commit data_table_info = data_table_info if data_table_info else gen_data_table_info( "feature_header", scene_key=scene_key, commit_id=commit_id) data_table = get_data_table(data_table_info=data_table_info, create_if_missing=False) return json_loads(data_table.get( "features", use_serialize=False)), json_loads( data_table.get("labels", use_serialize=False)) else: return None, None
def get_job_dsl_parser_by_job_id(job_id): with DB.connection_context(): jobs = Job.select( Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where(Job.f_job_id == job_id) if jobs: job = jobs[0] job_dsl_parser = get_job_dsl_parser( dsl=json_loads(job.f_dsl), runtime_conf=json_loads(job.f_runtime_conf), train_runtime_conf=json_loads(job.f_train_runtime_conf)) return job_dsl_parser else: return None
def job_config(): jobs = job_utils.query_job(**request.json) if not jobs: return get_json_result(retcode=101, retmsg='find job failed') else: job = jobs[0] response_data = dict() response_data['job_id'] = job.f_job_id response_data['dsl'] = json_loads(job.f_dsl) response_data['runtime_conf'] = json_loads(job.f_runtime_conf) response_data['train_runtime_conf'] = json_loads(job.f_train_runtime_conf) response_data['model_info'] = {'model_id': response_data['runtime_conf']['job_parameters']['model_id'], 'model_version': response_data['runtime_conf']['job_parameters'][ 'model_version']} return get_json_result(retcode=0, retmsg='success', data=response_data)
def cancel_job(job_id, role, party_id, job_initiator): schedule_logger(job_id).info( '{} {} get cancel waiting job {} command'.format( role, party_id, job_id)) jobs = job_utils.query_job(job_id=job_id, is_initiator=1) if jobs: job = jobs[0] job_runtime_conf = json_loads(job.f_runtime_conf) event = job_utils.job_event( job.f_job_id, job_runtime_conf['initiator']['role'], job_runtime_conf['initiator']['party_id']) try: RuntimeConfig.JOB_QUEUE.del_event(event) except: return False schedule_logger(job_id).info( 'cancel waiting job successfully, job id is {}'.format( job.f_job_id)) return True else: jobs = job_utils.query_job(job_id=job_id) if jobs: raise Exception( 'role {} party id {} cancel waiting job {} failed, not is initiator' .format(role, party_id, job_id)) raise Exception( 'role {} party id {} cancel waiting job failed, no find jod {}' .format(role, party_id, job_id))
def test_model(role1, role2): with open("%s_runtime_conf.json" % role1) as conf_fr: runtime_conf = json_loads(conf_fr.read()) federation.init(job_id=job_id, runtime_conf=runtime_conf) print(federation.get_field("role")) model_meta_save = ModelMeta() model_meta_save.name = "HeteroLR%s" % (role2) commit_id = save_model("model_meta", model_meta_save, commit_log="xxx") print("save guest model success, commit id is %s" % commit_id) model_meta_read = ModelMeta() read_model("model_meta", model_meta_read) print(model_meta_read) model_param_save = ModelParam() model_param_save.weight["k1"] = 1 model_param_save.weight["k2"] = 2 commit_id = save_model("model_param", model_param_save, commit_log="xxx") print("save guest model success, commit id is %s" % commit_id) # read model_param_read = ModelParam() read_model("model_param", model_param_read) print(model_param_read) data_transform = DataTransformServer() data_transform.missing_replace_method = "xxxx" save_model("data_transform", data_transform)
def get_data_table_meta(key, data_table_name, data_table_namespace): """ get data table meta information :param key: :param data_table_name: table name of this data table :param data_table_namespace: table name of this data table :return: """ from arch.api.utils.core import json_loads data_meta_table = FateSession.get_instance().table( name="%s.meta" % data_table_name, namespace=data_table_namespace, create_if_missing=True, error_if_exist=False, in_place_computing=False, persistent=True, partition=1) if data_meta_table: value_bytes = data_meta_table.get(key, use_serialize=False) if value_bytes: return json_loads(value_bytes) else: return None else: return None
def get_commit_tmp(commit_id, data_table_namespace): version_tmp_table = get_commit_tmp_table(data_table_namespace=data_table_namespace) commit_tmp_info = version_tmp_table.get(commit_id, use_serialize=False) if commit_tmp_info: commit_tmp = json_loads(commit_tmp_info) return commit_tmp["tag"], commit_tmp["branch"] else: return None, "master"
def check_request_parameters(request_data): with DB.connection_context(): if 'role' not in request_data and 'party_id' not in request_data: jobs = Job.select(Job.f_runtime_conf).where(Job.f_job_id == request_data.get('job_id', ''), Job.f_is_initiator == 1) if jobs: job = jobs[0] job_runtime_conf = json_loads(job.f_runtime_conf) job_initiator = job_runtime_conf.get('initiator', {}) role = job_initiator.get('role', '') party_id = job_initiator.get('party_id', 0) request_data['role'] = role request_data['party_id'] = party_id
def pipeline_dag_dependency(job_info): try: if job_info.get('job_id'): jobs = job_utils.query_job(job_id=job_info.get('job_id', '')) if not jobs: raise Exception('query job {} failed'.format( job_info.get('job_id', ''))) job = jobs[0] job_dsl_parser = job_utils.get_job_dsl_parser( dsl=json_loads(job.f_dsl), runtime_conf=json_loads(job.f_runtime_conf), train_runtime_conf=json_loads(job.f_train_runtime_conf)) else: job_dsl_parser = job_utils.get_job_dsl_parser( dsl=job_info.get('job_dsl', {}), runtime_conf=job_info.get('job_runtime_conf', {}), train_runtime_conf=job_info.get('job_train_runtime_conf', {})) return job_dsl_parser.get_dependency(role=job_info.get('role', ''), party_id=job_info.get( 'party_id', '')) except Exception as e: stat_logger.exception(e) raise e
def stop_job(job_id, end_status=JobStatus.FAILED): schedule_logger(job_id).info('get {} job {} command'.format("cancel" if end_status == JobStatus.CANCELED else "stop", job_id)) jobs = job_utils.query_job(job_id=job_id, is_initiator=1) cancel_success = False is_cancel = (end_status == JobStatus.CANCELED) if jobs: initiator_job = jobs[0] job_info = {'f_job_id': job_id, 'f_status': end_status} roles = json_loads(initiator_job.f_roles) job_work_mode = initiator_job.f_work_mode initiator_party_id = initiator_job.f_party_id # set status first TaskScheduler.sync_job_status(job_id=job_id, roles=roles, initiator_party_id=initiator_party_id, initiator_role=initiator_job.f_role, work_mode=job_work_mode, job_info=job_info) for role, partys in roles.items(): for party_id in partys: response = federated_api(job_id=job_id, method='POST', endpoint='/{}/schedule/{}/{}/{}/{}'.format( API_VERSION, job_id, role, party_id, "cancel" if is_cancel else "kill" ), src_party_id=initiator_party_id, dest_party_id=party_id, src_role=initiator_job.f_role, json_body={'job_initiator': {'party_id': initiator_job.f_party_id, 'role': initiator_job.f_role}, 'timeout': end_status == JobStatus.TIMEOUT }, work_mode=job_work_mode) if response['retcode'] == 0: cancel_success = True schedule_logger(job_id).info( 'send {} {} {} job {} command successfully'.format(role, party_id, "cancel" if is_cancel else "kill", job_id)) if is_cancel: break else: schedule_logger(job_id).info( 'send {} {} {} job {} command failed: {}'.format(role, party_id, "cancel" if is_cancel else "kill", job_id, response['retmsg'])) if is_cancel: return cancel_success else: schedule_logger(job_id).info('send {} job {} command failed'.format("cancel" if is_cancel else "kill", job_id)) raise Exception('can not found job: {}'.format(job_id))
def get_job_configuration(job_id, role, party_id, tasks=None): with DB.connection_context(): if tasks: jobs_run_conf = {} for task in tasks: jobs = Job.select( Job.f_job_id, Job.f_runtime_conf, Job.f_description).where(Job.f_job_id == task.f_job_id) job = jobs[0] jobs_run_conf[job.f_job_id] = json_loads( job.f_runtime_conf)["role_parameters"]["local"]["upload_0"] jobs_run_conf[job.f_job_id]["notes"] = job.f_description return jobs_run_conf else: jobs = Job.select(Job.f_dsl, Job.f_runtime_conf, Job.f_train_runtime_conf).where( Job.f_job_id == job_id, Job.f_role == role, Job.f_party_id == party_id) if jobs: job = jobs[0] return json_loads(job.f_dsl), json_loads( job.f_runtime_conf), json_loads(job.f_train_runtime_conf) else: return {}, {}, {}
def stop_job(job_id): schedule_logger.info('get stop job {} command'.format(job_id)) jobs = job_utils.query_job(job_id=job_id, is_initiator=1) if jobs: initiator_job = jobs[0] job_info = {'f_job_id': job_id, 'f_status': JobStatus.FAILED} roles = json_loads(initiator_job.f_roles) job_work_mode = initiator_job.f_work_mode initiator_party_id = initiator_job.f_party_id # set status first TaskScheduler.sync_job_status( job_id=job_id, roles=roles, initiator_party_id=initiator_party_id, work_mode=job_work_mode, job_info=job_info) for role, partys in roles.items(): for party_id in partys: response = federated_api( job_id=job_id, method='POST', endpoint='/{}/job/{}/{}/{}/kill'.format( API_VERSION, job_id, role, party_id), src_party_id=initiator_party_id, dest_party_id=party_id, json_body={ 'job_initiator': { 'party_id': initiator_job.f_party_id, 'role': initiator_job.f_role } }, work_mode=job_work_mode) if response['retcode'] == 0: schedule_logger.info( 'send {} {} kill job {} command successfully'. format(role, party_id, job_id)) else: schedule_logger.info( 'send {} {} kill job {} command failed: {}'.format( role, party_id, job_id, response['retmsg'])) else: schedule_logger.info( 'send stop job {} command failed'.format(job_id)) raise Exception('can not found job: {}'.format(job_id))
def get_data_table_metas(data_table_name, data_table_namespace): """ get data table meta information :param data_table_name: table name of this data table :param data_table_namespace: table name of this data table :return: """ data_meta_table = eggroll.table(name="%s.meta" % data_table_name, namespace=data_table_namespace, create_if_missing=True, error_if_exist=False) if data_meta_table: metas = dict() for k, v in data_meta_table.collect(use_serialize=False): metas[k] = json_loads(v) return metas else: return None
def test_model(role): with open("%s_runtime_conf.json" % role) as conf_fr: runtime_conf = json_loads(conf_fr.read()) model_table_name = runtime_conf.get("WorkFlowParam").get("model_table") model_table_namespace = runtime_conf.get("WorkFlowParam").get( "model_namespace") print(model_table_name, model_table_namespace) model_meta_save = ModelMeta() model_meta_save.name = "HeteroLR%s" % (camel_to_pascal(role)) save_model("model_meta", model_meta_save, name=model_table_name, namespace=model_table_namespace) model_meta_read = ModelMeta() read_model("model_meta", model_meta_read, name=model_table_name, namespace=model_table_namespace) print(model_meta_read) model_param_save = ModelParam() model_param_save.weight["k1"] = 1 model_param_save.weight["k2"] = 2 save_model("model_param", model_param_save, name=model_table_name, namespace=model_table_namespace) # read model_param_read = ModelParam() read_model("model_param", model_param_read, name=model_table_name, namespace=model_table_namespace) print(model_param_read) data_transform = DataTransformServer() data_transform.missing_replace_method = "xxxx" save_model("data_transform", data_transform, name=model_table_name, namespace=model_table_namespace)
def get_data_table_meta(key, data_table_name, data_table_namespace): """ get data table meta information :param key: :param data_table_name: table name of this data table :param data_table_namespace: table name of this data table :return: """ data_meta_table = eggroll.table(name="%s.meta" % data_table_name, namespace=data_table_namespace, create_if_missing=True, error_if_exist=False) if data_meta_table: value_bytes = data_meta_table.get(key, use_serialize=False) if value_bytes: return json_loads(value_bytes) else: return None else: return None
def get_data_table_metas(data_table_name, data_table_namespace): """ get data table meta information :param data_table_name: table name of this data table :param data_table_namespace: table name of this data table :return: """ from arch.api.utils.core import json_loads data_meta_table = FateSession.get_instance().table( name="%s.meta" % data_table_name, namespace=data_table_namespace, partition=1, persistent=True, in_place_computing=False, create_if_missing=True, error_if_exist=False) if data_meta_table: metas = dict() for k, v in data_meta_table.collect(use_serialize=False): metas[k] = json_loads(v) return metas else: return None
def get_version_info(version_table, commit_id): info = version_table.get(commit_id, use_serialize=False) if info: return json_loads(info) else: return dict()
def submit_job(job_data): job_id = generate_job_id() schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data)) job_dsl = job_data.get('job_dsl', {}) job_runtime_conf = job_data.get('job_runtime_conf', {}) job_utils.check_pipeline_job_runtime_conf(job_runtime_conf) job_parameters = job_runtime_conf['job_parameters'] job_initiator = job_runtime_conf['initiator'] job_type = job_parameters.get('job_type', '') if job_type != 'predict': # generate job model info job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model']) job_parameters['model_version'] = job_id train_runtime_conf = {} else: detect_utils.check_config(job_parameters, ['model_id', 'model_version']) # get inference dsl from pipeline model as job dsl job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'], model_id=job_parameters['model_id'], model_version=job_parameters['model_version']) pipeline_model = job_tracker.get_output_model('pipeline') job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl) train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf) path_dict = save_job_conf(job_id=job_id, job_dsl=job_dsl, job_runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf, pipeline_dsl=None) job = Job() job.f_job_id = job_id job.f_roles = json_dumps(job_runtime_conf['role']) job.f_work_mode = job_parameters['work_mode'] job.f_initiator_party_id = job_initiator['party_id'] job.f_dsl = json_dumps(job_dsl) job.f_runtime_conf = json_dumps(job_runtime_conf) job.f_train_runtime_conf = json_dumps(train_runtime_conf) job.f_run_ip = '' job.f_status = JobStatus.WAITING job.f_progress = 0 job.f_create_time = current_timestamp() initiator_role = job_initiator['role'] initiator_party_id = job_initiator['party_id'] if initiator_party_id not in job_runtime_conf['role'][initiator_role]: schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id)) raise Exception("initiator party id error {}".format(initiator_party_id)) get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf, train_runtime_conf=train_runtime_conf) TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator) # push into queue job_event = job_utils.job_event(job_id, initiator_role, initiator_party_id) try: RuntimeConfig.JOB_QUEUE.put_event(job_event) except Exception as e: raise Exception('push job into queue failed') schedule_logger(job_id).info( 'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id'])) board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id']) logs_directory = get_job_log_directory(job_id) return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \ {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url