示例#1
0
def rerun_job(job_id, role, party_id):
    DAGScheduler.set_job_rerun(
        job_id=job_id,
        initiator_role=role,
        initiator_party_id=party_id,
        component_name=request.json.get("component_name"),
        force=request.json.get("force", False),
        auto=False)
    #todo: 判断状态
    return get_json_result(retcode=0, retmsg='success')
示例#2
0
def submit_job():
    submit_result = DAGScheduler.submit(JobConfigurationBase(**request.json))
    return get_json_result(retcode=submit_result["code"],
                           retmsg=submit_result["message"],
                           job_id=submit_result["job_id"],
                           data=submit_result if submit_result["code"]
                           == RetCode.SUCCESS else None)
示例#3
0
def download_upload(access_module):
    job_id = job_utils.generate_job_id()
    if access_module == "upload" and UPLOAD_DATA_FROM_CLIENT and not (request.json and request.json.get("use_local_data") == 0):
        file = request.files['file']
        filename = os.path.join(job_utils.get_job_directory(job_id), 'fate_upload_tmp', file.filename)
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        try:
            file.save(filename)
        except Exception as e:
            shutil.rmtree(os.path.join(job_utils.get_job_directory(job_id), 'fate_upload_tmp'))
            raise e
        job_config = request.args.to_dict()
        if "namespace" in job_config and "table_name" in job_config:
            pass
        else:
            # higher than version 1.5.1, support eggroll run parameters
            job_config = json_loads(list(job_config.keys())[0])
        job_config['file'] = filename
    else:
        job_config = request.json
    required_arguments = ['work_mode', 'namespace', 'table_name']
    if access_module == 'upload':
        required_arguments.extend(['file', 'head', 'partition'])
    elif access_module == 'download':
        required_arguments.extend(['output_path'])
    else:
        raise Exception('can not support this operating: {}'.format(access_module))
    detect_utils.check_config(job_config, required_arguments=required_arguments)
    data = {}
    # compatibility
    if "table_name" in job_config:
        job_config["name"] = job_config["table_name"]
    if "backend" not in job_config:
        job_config["backend"] = 0
    for _ in ["work_mode", "backend", "head", "partition", "drop"]:
        if _ in job_config:
            job_config[_] = int(job_config[_])
    if access_module == "upload":
        if job_config.get('drop', 0) == 1:
            job_config["destroy"] = True
        else:
            job_config["destroy"] = False
        data['table_name'] = job_config["table_name"]
        data['namespace'] = job_config["namespace"]
        data_table_meta = storage.StorageTableMeta(name=job_config["table_name"], namespace=job_config["namespace"])
        if data_table_meta and not job_config["destroy"]:
            return get_json_result(retcode=100,
                                   retmsg='The data table already exists.'
                                          'If you still want to continue uploading, please add the parameter -drop.'
                                          ' 0 means not to delete and continue uploading, '
                                          '1 means to upload again after deleting the table')
    job_dsl, job_runtime_conf = gen_data_access_job_config(job_config, access_module)
    submit_result = DAGScheduler.submit({'job_dsl': job_dsl, 'job_runtime_conf': job_runtime_conf}, job_id=job_id)
    data.update(submit_result)
    return get_json_result(job_id=job_id, data=data)
示例#4
0
def submit_job():
    work_mode = JobRuntimeConfigAdapter(
        request.json.get('job_runtime_conf', {})).get_job_work_mode()
    detect_utils.check_config({'work_mode': work_mode},
                              required_arguments=[('work_mode',
                                                   (WorkMode.CLUSTER,
                                                    WorkMode.STANDALONE))])
    submit_result = DAGScheduler.submit(request.json)
    return get_json_result(retcode=0,
                           retmsg='success',
                           job_id=submit_result.get("job_id"),
                           data=submit_result)
示例#5
0
def update_parameters():
    job_info = request.json
    component_parameters = job_info.pop("component_parameters", None)
    job_parameters = job_info.pop("job_parameters", None)
    job_info["is_initiator"] = True
    jobs = JobSaver.query_job(**job_info)
    if not jobs:
        return get_json_result(
            retcode=RetCode.DATA_ERROR,
            retmsg=log_utils.failed_log(f"query job by {job_info}"))
    else:
        retcode, retdata = DAGScheduler.update_parameters(
            jobs[0], job_parameters, component_parameters)
        return get_json_result(retcode=retcode, data=retdata)
示例#6
0
def operate_model(model_operation):
    request_config = request.json or request.form.to_dict()
    job_id = job_utils.generate_job_id()
    if model_operation not in [
            ModelOperation.STORE, ModelOperation.RESTORE,
            ModelOperation.EXPORT, ModelOperation.IMPORT
    ]:
        raise Exception(
            'Can not support this operating now: {}'.format(model_operation))
    required_arguments = ["model_id", "model_version", "role", "party_id"]
    check_config(request_config, required_arguments=required_arguments)
    request_config["model_id"] = gen_party_model_id(
        model_id=request_config["model_id"],
        role=request_config["role"],
        party_id=request_config["party_id"])
    if model_operation in [ModelOperation.EXPORT, ModelOperation.IMPORT]:
        if model_operation == ModelOperation.IMPORT:
            try:
                file = request.files.get('file')
                file_path = os.path.join(TEMP_DIRECTORY, file.filename)
                # if not os.path.exists(file_path):
                #     raise Exception('The file is obtained from the fate flow client machine, but it does not exist, '
                #                     'please check the path: {}'.format(file_path))
                try:
                    os.makedirs(os.path.dirname(file_path), exist_ok=True)
                    file.save(file_path)
                except Exception as e:
                    shutil.rmtree(file_path)
                    raise e
                request_config['file'] = file_path
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                model.unpack_model(file_path)

                pipeline = model.read_component_model('pipeline',
                                                      'pipeline')['Pipeline']
                train_runtime_conf = json_loads(pipeline.train_runtime_conf)
                permitted_party_id = []
                for key, value in train_runtime_conf.get('role', {}).items():
                    for v in value:
                        permitted_party_id.extend([v, str(v)])
                if request_config["party_id"] not in permitted_party_id:
                    shutil.rmtree(model.model_path)
                    raise Exception(
                        "party id {} is not in model roles, please check if the party id is valid."
                    )
                try:
                    adapter = JobRuntimeConfigAdapter(train_runtime_conf)
                    job_parameters = adapter.get_common_parameters().to_dict()
                    with DB.connection_context():
                        db_model = MLModel.get_or_none(
                            MLModel.f_job_id == job_parameters.get(
                                "model_version"),
                            MLModel.f_role == request_config["role"])
                    if not db_model:
                        model_info = model_utils.gather_model_info_data(model)
                        model_info['imported'] = 1
                        model_info['job_id'] = model_info['f_model_version']
                        model_info['size'] = model.calculate_model_file_size()
                        model_info['role'] = request_config["model_id"].split(
                            '#')[0]
                        model_info['party_id'] = request_config[
                            "model_id"].split('#')[1]
                        if model_utils.compare_version(
                                model_info['f_fate_version'], '1.5.1') == 'lt':
                            model_info['roles'] = model_info.get(
                                'f_train_runtime_conf', {}).get('role', {})
                            model_info['initiator_role'] = model_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('role')
                            model_info['initiator_party_id'] = model_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('party_id')
                            model_info[
                                'work_mode'] = adapter.get_job_work_mode()
                            model_info['parent'] = False if model_info.get(
                                'f_inference_dsl') else True
                        model_utils.save_model_info(model_info)
                    else:
                        stat_logger.info(
                            f'job id: {job_parameters.get("model_version")}, '
                            f'role: {request_config["role"]} model info already existed in database.'
                        )
                except peewee.IntegrityError as e:
                    stat_logger.exception(e)
                operation_record(request_config, "import", "success")
                return get_json_result()
            except Exception:
                operation_record(request_config, "import", "failed")
                raise
        else:
            try:
                model = pipelined_model.PipelinedModel(
                    model_id=request_config["model_id"],
                    model_version=request_config["model_version"])
                if model.exists():
                    archive_file_path = model.packaging_model()
                    operation_record(request_config, "export", "success")
                    return send_file(archive_file_path,
                                     attachment_filename=os.path.basename(
                                         archive_file_path),
                                     as_attachment=True)
                else:
                    operation_record(request_config, "export", "failed")
                    res = error_response(
                        response_code=210,
                        retmsg="Model {} {} is not exist.".format(
                            request_config.get("model_id"),
                            request_config.get("model_version")))
                    return res
            except Exception as e:
                operation_record(request_config, "export", "failed")
                stat_logger.exception(e)
                return error_response(response_code=210, retmsg=str(e))
    else:
        data = {}
        job_dsl, job_runtime_conf = gen_model_operation_job_config(
            request_config, model_operation)
        submit_result = DAGScheduler.submit(
            {
                'job_dsl': job_dsl,
                'job_runtime_conf': job_runtime_conf
            },
            job_id=job_id)
        data.update(submit_result)
        operation_record(data=job_runtime_conf,
                         oper_type=model_operation,
                         oper_status='')
        return get_json_result(job_id=job_id, data=data)
示例#7
0
def stop_job(job_id, role, party_id, stop_status):
    retcode, retmsg = DAGScheduler.stop_job(job_id=job_id,
                                            role=role,
                                            party_id=party_id,
                                            stop_status=stop_status)
    return get_json_result(retcode=retcode, retmsg=retmsg)
示例#8
0
def rerun_job(job_id, role, party_id):
    DAGScheduler.rerun_job(job_id=job_id, initiator_role=role, initiator_party_id=party_id,
                           component_name=request.json.get("component_name"))
    return get_json_result(retcode=0, retmsg='success')
示例#9
0
    init_arch_db()
    # init runtime config
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--standalone_node',
                        default=False,
                        help="if standalone node mode or not ",
                        action='store_true')
    args = parser.parse_args()
    RuntimeConfig.init_env()
    RuntimeConfig.set_process_role(ProcessRole.DRIVER)
    PrivilegeAuth.init()
    ServiceUtils.register()
    ResourceManager.initialize()
    Detector(interval=5 * 1000).start()
    DAGScheduler(interval=2 * 1000).start()
    thread_pool_executor = ThreadPoolExecutor(
        max_workers=GRPC_SERVER_MAX_WORKERS)
    stat_logger.info(
        f"start grpc server thread pool by {thread_pool_executor._max_workers} max workers"
    )
    server = grpc.server(
        thread_pool=thread_pool_executor,
        options=[(cygrpc.ChannelArgKey.max_send_message_length, -1),
                 (cygrpc.ChannelArgKey.max_receive_message_length, -1)])

    proxy_pb2_grpc.add_DataTransferServiceServicer_to_server(
        UnaryService(), server)
    server.add_insecure_port("{}:{}".format(IP, GRPC_PORT))
    server.start()
    stat_logger.info("FATE Flow grpc server start successfully")