def import_id(): eggroll.init(job_id=generate_job_id(), mode=WORK_MODE) request_data = request.json table_name_space = "id_library" try: id_library_info = eggroll.table("info", table_name_space, partition=10, create_if_missing=True, error_if_exist=False) if request_data.request("rangeStart") == 0: data_id = generate_job_id() id_library_info.put("tmp_data_id", data_id) else: data_id = id_library_info.request("tmp_data_id") data_table = eggroll.table(data_id, table_name_space, partition=50, create_if_missing=True, error_if_exist=False) for i in request_data.request("ids", []): data_table.put(i, "") if request_data.request("rangeEnd") and request_data.request( "total") and (request_data.request("total") - request_data.request("rangeEnd") == 1): # end new_id_count = data_table.count() if new_id_count == request_data["total"]: id_library_info.put( data_id, json.dumps({ "salt": request_data.request("salt"), "saltMethod": request_data.request("saltMethod") })) old_data_id = id_library_info.request("use_data_id") id_library_info.put("use_data_id", data_id) logger.info( "import id success, dtable name is {}, namespace is {}", data_id, table_name_space) # TODO: destroy DTable, should be use a lock old_data_table = eggroll.table(old_data_id, table_name_space, partition=50, create_if_missing=True, error_if_exist=False) old_data_table.destroy() id_library_info.delete(old_data_id) else: data_table.destroy() return get_json_result( 2, "the actual amount of data is not equal to total.") return get_json_result() except Exception as e: logger.exception(e) return get_json_result(1, "import error.")
def import_offline_feature(): eggroll.init(job_id=generate_job_id(), mode=WORK_MODE) request_data = request.json try: if not request_data.get("jobId"): return get_json_result(status=2, msg="no job id") job_id = request_data.get("jobId") job_data = query_job_by_id(job_id=job_id) if not job_data: return get_json_result(status=3, msg="can not found this job id: %s" % request_data.get("jobId", "")) response = GetFeature.import_data(request_data, json.loads(job_data[0]["config"])) if response.get("status", 1) == 0: update_job_by_id(job_id=job_id, update_data={ "status": "success", "end_date": datetime.datetime.now() }) return get_json_result() else: return get_json_result(status=1, msg="request offline feature error: %s" % response.get("msg", "")) except Exception as e: logger.exception(e) return get_json_result(status=1, msg="request offline feature error: %s" % e)
def submit_job(): _data = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, _data)) try: push_into_job_queue(job_id=_job_id, config=_data) return get_json_result(0, "success, job_id {}".format(_job_id)) except Exception as e: return get_json_result(1, "failed, error: {}".format(e))
def download_data(data_func): _data = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, _data)) _job_dir = get_job_directory(_job_id) os.makedirs(_job_dir, exist_ok=True) _download_module = os.path.join(file_utils.get_project_base_directory(), "arch/api/utils/download.py") _upload_module = os.path.join(file_utils.get_project_base_directory(), "arch/api/utils/upload.py") if data_func == "download": _module = _download_module else: _module = _upload_module try: if data_func == "download": progs = [ "python3", _module, "-j", _job_id, "-c", os.path.abspath(_data.get("config_path")) ] else: progs = [ "python3", _module, "-c", os.path.abspath(_data.get("config_path")) ] logger.info('Starting progs: {}'.format(progs)) std_log = open(os.path.join(_job_dir, 'std.log'), 'w') task_pid_path = os.path.join(_job_dir, 'pids') if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.wShowWindow = subprocess.SW_HIDE else: startupinfo = None p = subprocess.Popen(progs, stdout=std_log, stderr=std_log, startupinfo=startupinfo) os.makedirs(task_pid_path, exist_ok=True) with open(os.path.join(task_pid_path, data_func + ".pid"), 'w') as f: f.truncate() f.write(str(p.pid) + "\n") f.flush() return get_json_result(0, "success, job_id {}".format(_job_id)) except Exception as e: print(e) return get_json_result(-104, "failed, job_id {}".format(_job_id))
def load_model(): request_config = request.json _job_id = generate_job_id() all_party = set() for _party_ids in request_config.get('role').values(): all_party.update(set(_party_ids)) for _party_id in all_party: st, msg = federated_api(job_id=_job_id, method='POST', url='/model/load/do', party_id=_party_id, json_body=request_config) return get_json_result(job_id=_job_id)
def new_federated_job(request, overall_timeout=DEFAULT_GRPC_OVERALL_TIMEOUT): request_config = request.json _job_id = generate_job_id() st, msg = federated_api(job_id=_job_id, method='POST', url='/{}/do'.format(request.base_url.replace(request.host_url, '')), party_id=request_config.get('local', {}).get('party_id', PARTY_ID), json_body=request_config, overall_timeout=overall_timeout ) if st == 0: json_body = json.loads(msg) return get_json_result(status=json_body['status'], msg=json_body['msg'], data=json_body.get('data'), job_id=json_body['jobId']) else: return get_json_result(status=st, msg=msg, job_id=_job_id)
def load_model(): request_config = request.json _job_id = generate_job_id() if request_config.get('gen_table_info', False): publish_model.generate_model_info(request_config) for role_name, role_partys in request_config.get("role").items(): if role_name == 'arbiter': continue for _party_id in role_partys: request_config['local'] = { 'role': role_name, 'party_id': _party_id } st, msg = federated_api(job_id=_job_id, method='POST', url='/model/load/do', party_id=_party_id, json_body=request_config) return get_json_result(job_id=_job_id)
def load_model(): config = file_utils.load_json_conf(request.json.get("config_path")) _job_id = generate_job_id() channel, stub = get_proxy_data_channel() for _party_id in config.get("party_ids"): config['my_party_id'] = _party_id _method = 'POST' _url = '/model/load/do' _packet = wrap_grpc_packet(config, _method, _url, _party_id, _job_id) logger.info( 'Starting load model job_id:{} party_id:{} method:{} url:{}'.format(_job_id, _party_id,_method, _url)) try: _return = stub.unaryCall(_packet) logger.info("Grpc unary response: {}".format(_return)) except grpc.RpcError as e: msg = 'job_id:{} party_id:{} method:{} url:{} Failed to start load model'.format(_job_id, _party_id, _method, _url) logger.exception(msg) return get_json_result(-101, 'UnaryCall submit to remote manager failed') return get_json_result()
def download_upload(data_func): request_config = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, request_config)) _job_dir = get_job_directory(_job_id) os.makedirs(_job_dir, exist_ok=True) module = data_func if module == "upload": if not os.path.isabs(request_config.get("file", "")): request_config["file"] = os.path.join(file_utils.get_project_base_directory(), request_config["file"]) try: request_config["work_mode"] = request_config.get('work_mode', WORK_MODE) table_name, namespace = dtable_utils.get_table_info(config=request_config, create=(True if module == 'upload' else False)) if not table_name or not namespace: return get_json_result(status=102, msg='no table name and namespace') request_config['table_name'] = table_name request_config['namespace'] = namespace conf_file_path = new_runtime_conf(job_dir=_job_dir, method=data_func, module=module, role=request_config.get('local', {}).get("role"), party_id=request_config.get('local', {}).get("party_id", PARTY_ID)) file_utils.dump_json_conf(request_config, conf_file_path) if module == "download": progs = ["python3", os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]), "-j", _job_id, "-c", conf_file_path ] else: progs = ["python3", os.path.join(file_utils.get_project_base_directory(), JOB_MODULE_CONF[module]["module_path"]), "-c", conf_file_path ] p = run_subprocess(job_dir=_job_dir, job_role=data_func, progs=progs) return get_json_result(job_id=_job_id, data={'pid': p.pid, 'table_name': request_config['table_name'], 'namespace': request_config['namespace']}) except Exception as e: logger.exception(e) return get_json_result(status=-104, msg="failed", job_id=_job_id)
def submit_workflow_job(): _data = request.json _job_id = generate_job_id() logger.info('generated job_id {}, body {}'.format(_job_id, _data)) push_into_job_queue(job_id=_job_id, config=_data) return get_json_result(job_id=_job_id)