Пример #1
0
    def update_job_status(job_id, role, party_id, job_info, create=False):
        job_info['f_run_ip'] = RuntimeConfig.JOB_SERVER_HOST
        if create:
            dsl = json_loads(job_info['f_dsl'])
            runtime_conf = json_loads(job_info['f_runtime_conf'])
            train_runtime_conf = json_loads(job_info['f_train_runtime_conf'])
            if USE_AUTHENTICATION:
                authentication_check(src_role=job_info.get('src_role', None), src_party_id=job_info.get('src_party_id', None),
                                     dsl=dsl, runtime_conf=runtime_conf, role=role, party_id=party_id)
            save_job_conf(job_id=job_id,
                          job_dsl=dsl,
                          job_runtime_conf=runtime_conf,
                          train_runtime_conf=train_runtime_conf,
                          pipeline_dsl=None)

            job_parameters = runtime_conf['job_parameters']
            job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id,
                                   model_id=job_parameters["model_id"],
                                   model_version=job_parameters["model_version"])
            if job_parameters.get("job_type", "") != "predict":
                job_tracker.init_pipelined_model()
            roles = json_loads(job_info['f_roles'])
            partner = {}
            show_role = {}
            is_initiator = job_info.get('f_is_initiator', 0)
            for _role, _role_party in roles.items():
                if is_initiator or _role == role:
                    show_role[_role] = show_role.get(_role, [])
                    for _party_id in _role_party:
                        if is_initiator or _party_id == party_id:
                            show_role[_role].append(_party_id)

                if _role != role:
                    partner[_role] = partner.get(_role, [])
                    partner[_role].extend(_role_party)
                else:
                    for _party_id in _role_party:
                        if _party_id != party_id:
                            partner[_role] = partner.get(_role, [])
                            partner[_role].append(_party_id)

            dag = get_job_dsl_parser(dsl=dsl,
                                     runtime_conf=runtime_conf,
                                     train_runtime_conf=train_runtime_conf)
            job_args = dag.get_args_input()
            dataset = {}
            for _role, _role_party_args in job_args.items():
                if is_initiator or _role == role:
                    for _party_index in range(len(_role_party_args)):
                        _party_id = roles[_role][_party_index]
                        if is_initiator or _party_id == party_id:
                            dataset[_role] = dataset.get(_role, {})
                            dataset[_role][_party_id] = dataset[_role].get(_party_id, {})
                            for _data_type, _data_location in _role_party_args[_party_index]['args']['data'].items():
                                dataset[_role][_party_id][_data_type] = '{}.{}'.format(_data_location['namespace'],
                                                                                       _data_location['name'])
            job_tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role})
        else:
            job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id)
        job_tracker.save_job_info(role=role, party_id=party_id, job_info=job_info, create=create)
Пример #2
0
    def create_job(cls, job_id, role, party_id, job_info):
        # parse job configuration
        dsl = job_info['dsl']
        runtime_conf = job_info['runtime_conf']
        train_runtime_conf = job_info['train_runtime_conf']
        if USE_AUTHENTICATION:
            authentication_check(src_role=job_info.get('src_role', None),
                                 src_party_id=job_info.get(
                                     'src_party_id', None),
                                 dsl=dsl,
                                 runtime_conf=runtime_conf,
                                 role=role,
                                 party_id=party_id)
        job_parameters = RunParameters(**runtime_conf['job_parameters'])
        job_initiator = runtime_conf['initiator']

        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=dsl,
            runtime_conf=runtime_conf,
            train_runtime_conf=train_runtime_conf)

        # save new job into db
        if role == job_initiator['role'] and party_id == job_initiator[
                'party_id']:
            is_initiator = True
        else:
            is_initiator = False
        job_info["status"] = JobStatus.WAITING
        roles = job_info['roles']
        # this party configuration
        job_info["role"] = role
        job_info["party_id"] = party_id
        job_info["is_initiator"] = is_initiator
        job_info["progress"] = 0
        engines_info = cls.get_job_engines_address(
            job_parameters=job_parameters)
        cls.special_role_parameters(role=role, job_parameters=job_parameters)
        cls.check_parameters(job_parameters=job_parameters,
                             engines_info=engines_info)
        runtime_conf["job_parameters"] = job_parameters.to_dict()

        JobSaver.create_job(job_info=job_info)
        job_utils.save_job_conf(job_id=job_id,
                                job_dsl=dsl,
                                job_runtime_conf=runtime_conf,
                                train_runtime_conf=train_runtime_conf,
                                pipeline_dsl=None)

        cls.initialize_tasks(job_id, role, party_id, True, job_initiator,
                             job_parameters, dsl_parser)
        cls.initialize_job_tracker(job_id=job_id,
                                   role=role,
                                   party_id=party_id,
                                   job_info=job_info,
                                   is_initiator=is_initiator,
                                   dsl_parser=dsl_parser)
Пример #3
0
    def update_job_status(job_id, role, party_id, job_info, create=False):
        job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id)
        job_info['f_run_ip'] = RuntimeConfig.JOB_SERVER_HOST
        if create:
            dsl = json_loads(job_info['f_dsl'])
            runtime_conf = json_loads(job_info['f_runtime_conf'])
            train_runtime_conf = json_loads(job_info['f_train_runtime_conf'])
            save_job_conf(job_id=job_id,
                          job_dsl=dsl,
                          job_runtime_conf=runtime_conf)
            roles = json_loads(job_info['f_roles'])
            partner = {}
            show_role = {}
            is_initiator = job_info.get('f_is_initiator', 0)
            for _role, _role_party in roles.items():
                if is_initiator or _role == role:
                    show_role[_role] = show_role.get(_role, [])
                    for _party_id in _role_party:
                        if is_initiator or _party_id == party_id:
                            show_role[_role].append(_party_id)

                if _role != role:
                    partner[_role] = partner.get(_role, [])
                    partner[_role].extend(_role_party)
                else:
                    for _party_id in _role_party:
                        if _party_id != party_id:
                            partner[_role] = partner.get(_role, [])
                            partner[_role].append(_party_id)

            dag = get_job_dsl_parser(dsl=dsl,
                                     runtime_conf=runtime_conf,
                                     train_runtime_conf=train_runtime_conf)
            job_args = dag.get_args_input()
            dataset = {}
            for _role, _role_party_args in job_args.items():
                if is_initiator or _role == role:
                    for _party_index in range(len(_role_party_args)):
                        _party_id = roles[_role][_party_index]
                        if is_initiator or _party_id == party_id:
                            dataset[_role] = dataset.get(_role, {})
                            dataset[_role][_party_id] = dataset[_role].get(_party_id, {})
                            for _data_type, _data_location in _role_party_args[_party_index]['args']['data'].items():
                                dataset[_role][_party_id][_data_type] = '{}.{}'.format(_data_location['namespace'],
                                                                                       _data_location['name'])
            job_tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role})
        job_tracker.save_job_info(role=role, party_id=party_id, job_info=job_info, create=create)
Пример #4
0
    def create_job(cls, job_id, role, party_id, job_info):
        # parse job configuration
        dsl = job_info['dsl']
        runtime_conf = job_info['runtime_conf']
        train_runtime_conf = job_info['train_runtime_conf']
        if USE_AUTHENTICATION:
            authentication_check(src_role=job_info.get('src_role', None), src_party_id=job_info.get('src_party_id', None),
                                 dsl=dsl, runtime_conf=runtime_conf, role=role, party_id=party_id)

        dsl_parser = schedule_utils.get_job_dsl_parser(dsl=dsl,
                                                       runtime_conf=runtime_conf,
                                                       train_runtime_conf=train_runtime_conf)
        job_parameters = dsl_parser.get_job_parameters().get(role, {}).get(party_id, {})
        schedule_logger(job_id).info('job parameters:{}'.format(job_parameters))
        job_parameters = RunParameters(**job_parameters)

        # save new job into db
        if role == job_info["initiator_role"] and party_id == job_info["initiator_party_id"]:
            is_initiator = True
        else:
            is_initiator = False
        job_info["status"] = JobStatus.WAITING
        # this party configuration
        job_info["role"] = role
        job_info["party_id"] = party_id
        job_info["is_initiator"] = is_initiator
        job_info["progress"] = 0
        cls.adapt_job_parameters(role=role, job_parameters=job_parameters)
        engines_info = cls.get_job_engines_address(job_parameters=job_parameters)
        cls.check_parameters(job_parameters=job_parameters, role=role, party_id=party_id, engines_info=engines_info)
        job_info["runtime_conf_on_party"]["job_parameters"] = job_parameters.to_dict()
        job_utils.save_job_conf(job_id=job_id,
                                role=role,
                                job_dsl=dsl,
                                job_runtime_conf=runtime_conf,
                                job_runtime_conf_on_party=job_info["runtime_conf_on_party"],
                                train_runtime_conf=train_runtime_conf,
                                pipeline_dsl=None)

        cls.initialize_tasks(job_id=job_id, role=role, party_id=party_id, run_on_this_party=True, initiator_role=job_info["initiator_role"], initiator_party_id=job_info["initiator_party_id"], job_parameters=job_parameters, dsl_parser=dsl_parser)
        job_parameters = job_info['runtime_conf_on_party']['job_parameters']
        roles = job_info['roles']
        cls.initialize_job_tracker(job_id=job_id, role=role, party_id=party_id, job_parameters=job_parameters, roles=roles, is_initiator=is_initiator, dsl_parser=dsl_parser)
        JobSaver.create_job(job_info=job_info)
Пример #5
0
    def submit_job(job_data):
        job_id = generate_job_id()
        schedule_logger.info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_pipeline_job_runtime_conf(job_runtime_conf)
        job_parameters = job_runtime_conf['job_parameters']
        job_initiator = job_runtime_conf['initiator']
        job_type = job_parameters.get('job_type', '')
        if job_type != 'predict':
            # generate job model info
            job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model'])
            job_parameters['model_version'] = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters, ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                                   model_id=job_parameters['model_id'], model_version=job_parameters['model_version'])
            pipeline_model = job_tracker.get_output_model('pipeline')
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)
        job_dsl_path, job_runtime_conf_path = save_job_conf(job_id=job_id,
                                                            job_dsl=job_dsl,
                                                            job_runtime_conf=job_runtime_conf)

        job = Job()
        job.f_job_id = job_id
        job.f_roles = json_dumps(job_runtime_conf['role'])
        job.f_work_mode = job_parameters['work_mode']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_dsl = json_dumps(job_dsl)
        job.f_runtime_conf = json_dumps(job_runtime_conf)
        job.f_train_runtime_conf = json_dumps(train_runtime_conf)
        job.f_run_ip = ''
        job.f_status = JobStatus.WAITING
        job.f_progress = 0
        job.f_create_time = current_timestamp()

        # save job info
        TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator)

        # push into queue
        RuntimeConfig.JOB_QUEUE.put_event({
            'job_id': job_id,
            "initiator_role": job_initiator['role'],
            "initiator_party_id": job_initiator['party_id']
        }
        )
        schedule_logger.info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id']))
        board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id'])
        return job_id, job_dsl_path, job_runtime_conf_path, {'model_id': job_parameters['model_id'],
                                                             'model_version': job_parameters[
                                                                 'model_version']}, board_url
Пример #6
0
    def create_job(cls, job_id, role, party_id, job_info):
        # parse job configuration
        dsl = job_info['dsl']
        runtime_conf = job_info['runtime_conf']
        train_runtime_conf = job_info['train_runtime_conf']
        if USE_AUTHENTICATION:
            authentication_check(src_role=job_info.get('src_role', None),
                                 src_party_id=job_info.get(
                                     'src_party_id', None),
                                 dsl=dsl,
                                 runtime_conf=runtime_conf,
                                 role=role,
                                 party_id=party_id)

        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=dsl,
            runtime_conf=runtime_conf,
            train_runtime_conf=train_runtime_conf)
        job_parameters = dsl_parser.get_job_parameters(runtime_conf)
        schedule_logger(job_id).info(
            'job parameters:{}'.format(job_parameters))
        dest_user = job_parameters.get(role, {}).get(party_id,
                                                     {}).get('user', '')
        user = {}
        src_party_id = int(
            job_info['src_party_id']) if job_info.get('src_party_id') else 0
        src_role = job_info.get('src_role', '')
        src_user = job_parameters.get(src_role, {}).get(src_party_id, {}).get(
            'user', '') if src_role else ''
        for _role, party_id_item in job_parameters.items():
            user[_role] = {}
            for _party_id, _parameters in party_id_item.items():
                user[_role][_party_id] = _parameters.get("user", "")
        schedule_logger(job_id).info('job user:{}'.format(user))
        if USE_DATA_AUTHENTICATION:
            job_args = dsl_parser.get_args_input()
            schedule_logger(job_id).info('job args:{}'.format(job_args))
            dataset_dict = cls.get_dataset(False, role, party_id,
                                           runtime_conf.get("role"), job_args)
            dataset_list = []
            if dataset_dict.get(role, {}).get(party_id):
                for k, v in dataset_dict[role][party_id].items():
                    dataset_list.append({
                        "namespace": v.split('.')[0],
                        "table_name": v.split('.')[1]
                    })
            data_authentication_check(
                src_role=job_info.get('src_role'),
                src_party_id=job_info.get('src_party_id'),
                src_user=src_user,
                dest_user=dest_user,
                dataset_list=dataset_list)
        job_parameters = RunParameters(
            **job_parameters.get(role, {}).get(party_id, {}))

        # save new job into db
        if role == job_info["initiator_role"] and party_id == job_info[
                "initiator_party_id"]:
            is_initiator = True
        else:
            is_initiator = False
        job_info["status"] = JobStatus.READY
        job_info["user_id"] = dest_user
        job_info["src_user"] = src_user
        job_info["user"] = user
        # this party configuration
        job_info["role"] = role
        job_info["party_id"] = party_id
        job_info["is_initiator"] = is_initiator
        job_info["progress"] = 0
        cls.create_job_parameters_on_party(role=role,
                                           party_id=party_id,
                                           job_parameters=job_parameters)
        # update job parameters on party
        job_info["runtime_conf_on_party"][
            "job_parameters"] = job_parameters.to_dict()
        JobSaver.create_job(job_info=job_info)
        schedule_logger(job_id).info("start initialize tasks")
        initialized_result, provider_group = cls.initialize_tasks(
            job_id=job_id,
            role=role,
            party_id=party_id,
            run_on_this_party=True,
            initiator_role=job_info["initiator_role"],
            initiator_party_id=job_info["initiator_party_id"],
            job_parameters=job_parameters,
            dsl_parser=dsl_parser)
        schedule_logger(job_id).info("initialize tasks success")
        for provider_key, group_info in provider_group.items():
            for cpn in group_info["components"]:
                dsl["components"][cpn]["provider"] = provider_key

        roles = job_info['roles']
        cls.initialize_job_tracker(job_id=job_id,
                                   role=role,
                                   party_id=party_id,
                                   job_parameters=job_parameters,
                                   roles=roles,
                                   is_initiator=is_initiator,
                                   dsl_parser=dsl_parser)

        job_utils.save_job_conf(
            job_id=job_id,
            role=role,
            party_id=party_id,
            dsl=dsl,
            runtime_conf=runtime_conf,
            runtime_conf_on_party=job_info["runtime_conf_on_party"],
            train_runtime_conf=train_runtime_conf,
            pipeline_dsl=None)
        return {"components": initialized_result}
Пример #7
0
    def submit_job(job_data):
        job_id = generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_pipeline_job_runtime_conf(job_runtime_conf)
        job_parameters = job_runtime_conf['job_parameters']
        job_initiator = job_runtime_conf['initiator']
        job_type = job_parameters.get('job_type', '')
        if job_type != 'predict':
            # generate job model info
            job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model'])
            job_parameters['model_version'] = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters, ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                                   model_id=job_parameters['model_id'], model_version=job_parameters['model_version'])
            pipeline_model = job_tracker.get_output_model('pipeline')
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)
        path_dict = save_job_conf(job_id=job_id,
                                  job_dsl=job_dsl,
                                  job_runtime_conf=job_runtime_conf,
                                  train_runtime_conf=train_runtime_conf,
                                  pipeline_dsl=None)

        job = Job()
        job.f_job_id = job_id
        job.f_roles = json_dumps(job_runtime_conf['role'])
        job.f_work_mode = job_parameters['work_mode']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_dsl = json_dumps(job_dsl)
        job.f_runtime_conf = json_dumps(job_runtime_conf)
        job.f_train_runtime_conf = json_dumps(train_runtime_conf)
        job.f_run_ip = ''
        job.f_status = JobStatus.WAITING
        job.f_progress = 0
        job.f_create_time = current_timestamp()

        initiator_role = job_initiator['role']
        initiator_party_id = job_initiator['party_id']
        if initiator_party_id not in job_runtime_conf['role'][initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id))
            raise Exception("initiator party id error {}".format(initiator_party_id))

        get_job_dsl_parser(dsl=job_dsl,
                           runtime_conf=job_runtime_conf,
                           train_runtime_conf=train_runtime_conf)

        TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator)

        # push into queue
        job_event = job_utils.job_event(job_id, initiator_role,  initiator_party_id)
        try:
            RuntimeConfig.JOB_QUEUE.put_event(job_event)
        except Exception as e:
            raise Exception('push job into queue failed')

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id']))
        board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id'])
        logs_directory = get_job_log_directory(job_id)
        return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \
               {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url
Пример #8
0
    def submit(cls, job_data, job_id=None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(
            job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_job_runtime_conf(job_runtime_conf)
        authentication_utils.check_constraint(job_runtime_conf, job_dsl)

        job_initiator = job_runtime_conf['initiator']
        conf_adapter = JobRuntimeConfigAdapter(job_runtime_conf)
        common_job_parameters = conf_adapter.get_common_parameters()

        if common_job_parameters.job_type != 'predict':
            # generate job model info
            common_job_parameters.model_id = model_utils.gen_model_id(
                job_runtime_conf['role'])
            common_job_parameters.model_version = job_id
            train_runtime_conf = {}
        else:
            # check predict job parameters
            detect_utils.check_config(common_job_parameters.to_dict(),
                                      ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            tracker = Tracker(
                job_id=job_id,
                role=job_initiator['role'],
                party_id=job_initiator['party_id'],
                model_id=common_job_parameters.model_id,
                model_version=common_job_parameters.model_version)
            pipeline_model = tracker.get_output_model('pipeline')
            train_runtime_conf = json_loads(
                pipeline_model['Pipeline'].train_runtime_conf)
            if not model_utils.check_if_deployed(
                    role=job_initiator['role'],
                    party_id=job_initiator['party_id'],
                    model_id=common_job_parameters.model_id,
                    model_version=common_job_parameters.model_version):
                raise Exception(
                    f"Model {common_job_parameters.model_id} {common_job_parameters.model_version} has not been deployed yet."
                )
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)

        job = Job()
        job.f_job_id = job_id
        job.f_dsl = job_dsl
        job.f_train_runtime_conf = train_runtime_conf
        job.f_roles = job_runtime_conf['role']
        job.f_work_mode = common_job_parameters.work_mode
        job.f_initiator_role = job_initiator['role']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_role = job_initiator['role']
        job.f_party_id = job_initiator['party_id']

        path_dict = job_utils.save_job_conf(
            job_id=job_id,
            role=job.f_initiator_role,
            job_dsl=job_dsl,
            job_runtime_conf=job_runtime_conf,
            job_runtime_conf_on_party={},
            train_runtime_conf=train_runtime_conf,
            pipeline_dsl=None)

        if job.f_initiator_party_id not in job_runtime_conf['role'][
                job.f_initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(
                job.f_initiator_party_id))
            raise Exception("initiator party id error {}".format(
                job.f_initiator_party_id))

        # create common parameters on initiator
        JobController.backend_compatibility(
            job_parameters=common_job_parameters)
        JobController.adapt_job_parameters(
            role=job.f_initiator_role,
            job_parameters=common_job_parameters,
            create_initiator_baseline=True)

        job.f_runtime_conf = conf_adapter.update_common_parameters(
            common_parameters=common_job_parameters)
        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=job.f_dsl,
            runtime_conf=job.f_runtime_conf,
            train_runtime_conf=job.f_train_runtime_conf)

        # initiator runtime conf as template
        job.f_runtime_conf_on_party = job.f_runtime_conf.copy()
        job.f_runtime_conf_on_party[
            "job_parameters"] = common_job_parameters.to_dict()

        if common_job_parameters.work_mode == WorkMode.CLUSTER:
            # Save the status information of all participants in the initiator for scheduling
            for role, party_ids in job.f_roles.items():
                for party_id in party_ids:
                    if role == job.f_initiator_role and party_id == job.f_initiator_party_id:
                        continue
                    JobController.initialize_tasks(job_id, role, party_id,
                                                   False, job.f_initiator_role,
                                                   job.f_initiator_party_id,
                                                   common_job_parameters,
                                                   dsl_parser)

        status_code, response = FederatedScheduler.create_job(job=job)
        if status_code != FederatedSchedulingStatusCode.SUCCESS:
            job.f_status = JobStatus.FAILED
            job.f_tag = "submit_failed"
            FederatedScheduler.sync_job_status(job=job)
            raise Exception("create job failed", response)

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(
                job.f_job_id, common_job_parameters.model_id))
        logs_directory = job_utils.get_job_log_directory(job_id)
        submit_result = {
            "job_id":
            job_id,
            "model_info": {
                "model_id": common_job_parameters.model_id,
                "model_version": common_job_parameters.model_version
            },
            "logs_directory":
            logs_directory,
            "board_url":
            job_utils.get_board_url(job_id, job_initiator['role'],
                                    job_initiator['party_id'])
        }
        submit_result.update(path_dict)
        return submit_result
Пример #9
0
    def submit(cls, submit_job_conf: JobConfigurationBase, job_id: str = None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        submit_result = {"job_id": job_id}
        schedule_logger(job_id).info(
            f"submit job, body {submit_job_conf.to_dict()}")
        try:
            dsl = submit_job_conf.dsl
            runtime_conf = deepcopy(submit_job_conf.runtime_conf)
            job_utils.check_job_runtime_conf(runtime_conf)
            authentication_utils.check_constraint(runtime_conf, dsl)
            job_initiator = runtime_conf["initiator"]
            conf_adapter = JobRuntimeConfigAdapter(runtime_conf)
            common_job_parameters = conf_adapter.get_common_parameters()

            if common_job_parameters.job_type != "predict":
                # generate job model info
                conf_version = schedule_utils.get_conf_version(runtime_conf)
                if conf_version != 2:
                    raise Exception(
                        "only the v2 version runtime conf is supported")
                common_job_parameters.model_id = model_utils.gen_model_id(
                    runtime_conf["role"])
                common_job_parameters.model_version = job_id
                train_runtime_conf = {}
            else:
                # check predict job parameters
                detect_utils.check_config(common_job_parameters.to_dict(),
                                          ["model_id", "model_version"])
                # get inference dsl from pipeline model as job dsl
                tracker = Tracker(
                    job_id=job_id,
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"],
                    model_id=common_job_parameters.model_id,
                    model_version=common_job_parameters.model_version)
                pipeline_model = tracker.get_pipeline_model()
                train_runtime_conf = json_loads(
                    pipeline_model.train_runtime_conf)
                if not model_utils.check_if_deployed(
                        role=job_initiator["role"],
                        party_id=job_initiator["party_id"],
                        model_id=common_job_parameters.model_id,
                        model_version=common_job_parameters.model_version):
                    raise Exception(
                        f"Model {common_job_parameters.model_id} {common_job_parameters.model_version} has not been deployed yet."
                    )
                dsl = json_loads(pipeline_model.inference_dsl)
            # dsl = ProviderManager.fill_fate_flow_provider(dsl)

            job = Job()
            job.f_job_id = job_id
            job.f_dsl = dsl
            job.f_train_runtime_conf = train_runtime_conf
            job.f_roles = runtime_conf["role"]
            job.f_initiator_role = job_initiator["role"]
            job.f_initiator_party_id = job_initiator["party_id"]
            job.f_role = job_initiator["role"]
            job.f_party_id = job_initiator["party_id"]

            path_dict = job_utils.save_job_conf(
                job_id=job_id,
                role=job.f_initiator_role,
                party_id=job.f_initiator_party_id,
                dsl=dsl,
                runtime_conf=runtime_conf,
                runtime_conf_on_party={},
                train_runtime_conf=train_runtime_conf,
                pipeline_dsl=None)

            if job.f_initiator_party_id not in runtime_conf["role"][
                    job.f_initiator_role]:
                msg = f"initiator party id {job.f_initiator_party_id} not in roles {runtime_conf['role']}"
                schedule_logger(job_id).info(msg)
                raise Exception(msg)

            # create common parameters on initiator
            JobController.create_common_job_parameters(
                job_id=job.f_job_id,
                initiator_role=job.f_initiator_role,
                common_job_parameters=common_job_parameters)
            job.f_runtime_conf = conf_adapter.update_common_parameters(
                common_parameters=common_job_parameters)
            dsl_parser = schedule_utils.get_job_dsl_parser(
                dsl=job.f_dsl,
                runtime_conf=job.f_runtime_conf,
                train_runtime_conf=job.f_train_runtime_conf)

            # initiator runtime conf as template
            job.f_runtime_conf_on_party = job.f_runtime_conf.copy()
            job.f_runtime_conf_on_party[
                "job_parameters"] = common_job_parameters.to_dict()

            # inherit job
            job.f_inheritance_info = common_job_parameters.inheritance_info
            job.f_inheritance_status = JobInheritanceStatus.WAITING if common_job_parameters.inheritance_info else JobInheritanceStatus.PASS
            if job.f_inheritance_info:
                inheritance_jobs = JobSaver.query_job(
                    job_id=job.f_inheritance_info.get("job_id"),
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"])
                inheritance_tasks = JobSaver.query_task(
                    job_id=job.f_inheritance_info.get("job_id"),
                    role=job_initiator["role"],
                    party_id=job_initiator["party_id"],
                    only_latest=True)
                job_utils.check_job_inheritance_parameters(
                    job, inheritance_jobs, inheritance_tasks)

            status_code, response = FederatedScheduler.create_job(job=job)
            if status_code != FederatedSchedulingStatusCode.SUCCESS:
                job.f_status = JobStatus.FAILED
                job.f_tag = "submit_failed"
                FederatedScheduler.sync_job_status(job=job)
                raise Exception("create job failed", response)
            else:
                need_run_components = {}
                for role in response:
                    need_run_components[role] = {}
                    for party, res in response[role].items():
                        need_run_components[role][party] = [
                            name for name, value in response[role][party]
                            ["data"]["components"].items()
                            if value["need_run"] is True
                        ]
                if common_job_parameters.federated_mode == FederatedMode.MULTIPLE:
                    # create the task holder in db to record information of all participants in the initiator for scheduling
                    for role, party_ids in job.f_roles.items():
                        for party_id in party_ids:
                            if role == job.f_initiator_role and party_id == job.f_initiator_party_id:
                                continue
                            if not need_run_components[role][party_id]:
                                continue
                            JobController.initialize_tasks(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                run_on_this_party=False,
                                initiator_role=job.f_initiator_role,
                                initiator_party_id=job.f_initiator_party_id,
                                job_parameters=common_job_parameters,
                                dsl_parser=dsl_parser,
                                components=need_run_components[role][party_id])
                job.f_status = JobStatus.WAITING
                status_code, response = FederatedScheduler.sync_job_status(
                    job=job)
                if status_code != FederatedSchedulingStatusCode.SUCCESS:
                    raise Exception("set job to waiting status failed")

            schedule_logger(job_id).info(
                f"submit job successfully, job id is {job.f_job_id}, model id is {common_job_parameters.model_id}"
            )
            logs_directory = job_utils.get_job_log_directory(job_id)
            result = {
                "code":
                RetCode.SUCCESS,
                "message":
                "success",
                "model_info": {
                    "model_id": common_job_parameters.model_id,
                    "model_version": common_job_parameters.model_version
                },
                "logs_directory":
                logs_directory,
                "board_url":
                job_utils.get_board_url(job_id, job_initiator["role"],
                                        job_initiator["party_id"])
            }
            warn_parameter = JobRuntimeConfigAdapter(
                submit_job_conf.runtime_conf).check_removed_parameter()
            if warn_parameter:
                result[
                    "message"] = f"[WARN]{warn_parameter} is removed,it does not take effect!"
            submit_result.update(result)
            submit_result.update(path_dict)
        except Exception as e:
            submit_result["code"] = RetCode.OPERATING_ERROR
            submit_result["message"] = exception_to_trace_string(e)
            schedule_logger(job_id).exception(e)
        return submit_result
Пример #10
0
    def create_job(cls, job_id, role, party_id, job_info):
        # parse job configuration
        dsl = job_info['dsl']
        runtime_conf = job_info['runtime_conf']
        train_runtime_conf = job_info['train_runtime_conf']
        if USE_AUTHENTICATION:
            authentication_check(src_role=job_info.get('src_role', None),
                                 src_party_id=job_info.get(
                                     'src_party_id', None),
                                 dsl=dsl,
                                 runtime_conf=runtime_conf,
                                 role=role,
                                 party_id=party_id)

        dsl_parser = schedule_utils.get_job_dsl_parser(
            dsl=dsl,
            runtime_conf=runtime_conf,
            train_runtime_conf=train_runtime_conf)
        job_parameters = dsl_parser.get_job_parameters().get(role, {}).get(
            party_id, {})
        schedule_logger(job_id).info(
            'job parameters:{}'.format(job_parameters))
        dest_user = dsl_parser.get_job_parameters().get(role, {}).get(
            party_id, {}).get("user", '')
        user = {}
        src_party_id = int(job_info.get('src_party_id')) if job_info.get(
            'src_party_id') else 0
        src_user = dsl_parser.get_job_parameters().get(
            job_info.get('src_role'), {}).get(src_party_id,
                                              {}).get("user", '')
        for _role, party_id_item in dsl_parser.get_job_parameters().items():
            user[_role] = {}
            for _party_id, _parameters in party_id_item.items():
                user[_role][_party_id] = _parameters.get("user", "")
        schedule_logger(job_id).info('job user:{}'.format(user))
        if USE_DATA_AUTHENTICATION:
            job_args = dsl_parser.get_args_input()
            schedule_logger(job_id).info('job args:{}'.format(job_args))
            dataset_dict = cls.get_dataset(False, role, party_id,
                                           runtime_conf.get("role"), job_args)
            dataset_list = []
            if dataset_dict.get(role, {}).get(party_id):
                for k, v in dataset_dict[role][party_id].items():
                    dataset_list.append({
                        "namespace": v.split('.')[0],
                        "table_name": v.split('.')[1]
                    })
            data_authentication_check(
                src_role=job_info.get('src_role'),
                src_party_id=job_info.get('src_party_id'),
                src_user=src_user,
                dest_user=dest_user,
                dataset_list=dataset_list)
        job_parameters = RunParameters(**job_parameters)

        # save new job into db
        if role == job_info["initiator_role"] and party_id == job_info[
                "initiator_party_id"]:
            is_initiator = True
        else:
            is_initiator = False
        job_info["status"] = JobStatus.WAITING
        job_info["user_id"] = dest_user
        job_info["src_user"] = src_user
        job_info["user"] = user
        # this party configuration
        job_info["role"] = role
        job_info["party_id"] = party_id
        job_info["is_initiator"] = is_initiator
        job_info["progress"] = 0
        cls.adapt_job_parameters(role=role, job_parameters=job_parameters)
        engines_info = cls.get_job_engines_address(
            job_parameters=job_parameters)
        cls.check_parameters(job_parameters=job_parameters,
                             role=role,
                             party_id=party_id,
                             engines_info=engines_info)
        job_info["runtime_conf_on_party"][
            "job_parameters"] = job_parameters.to_dict()
        job_utils.save_job_conf(
            job_id=job_id,
            role=role,
            job_dsl=dsl,
            job_runtime_conf=runtime_conf,
            job_runtime_conf_on_party=job_info["runtime_conf_on_party"],
            train_runtime_conf=train_runtime_conf,
            pipeline_dsl=None)

        cls.initialize_tasks(job_id=job_id,
                             role=role,
                             party_id=party_id,
                             run_on_this_party=True,
                             initiator_role=job_info["initiator_role"],
                             initiator_party_id=job_info["initiator_party_id"],
                             job_parameters=job_parameters,
                             dsl_parser=dsl_parser)
        job_parameters = job_info['runtime_conf_on_party']['job_parameters']
        roles = job_info['roles']
        cls.initialize_job_tracker(job_id=job_id,
                                   role=role,
                                   party_id=party_id,
                                   job_parameters=job_parameters,
                                   roles=roles,
                                   is_initiator=is_initiator,
                                   dsl_parser=dsl_parser)
        JobSaver.create_job(job_info=job_info)
Пример #11
0
    def submit(cls, job_data, job_id=None):
        if not job_id:
            job_id = job_utils.generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_initiator = job_runtime_conf['initiator']
        job_parameters = RunParameters(**job_runtime_conf['job_parameters'])
        cls.backend_compatibility(job_parameters=job_parameters)

        job_utils.check_job_runtime_conf(job_runtime_conf)
        if job_parameters.job_type != 'predict':
            # generate job model info
            job_parameters.model_id = model_utils.gen_model_id(job_runtime_conf['role'])
            job_parameters.model_version = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters.to_dict(), ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            tracker = Tracker(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                              model_id=job_parameters.model_id, model_version=job_parameters.model_version)
            pipeline_model = tracker.get_output_model('pipeline')
            if not job_dsl:
                job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)

        path_dict = job_utils.save_job_conf(job_id=job_id,
                                            job_dsl=job_dsl,
                                            job_runtime_conf=job_runtime_conf,
                                            train_runtime_conf=train_runtime_conf,
                                            pipeline_dsl=None)

        job = Job()
        job.f_job_id = job_id
        job.f_dsl = job_dsl
        job_runtime_conf["job_parameters"] = job_parameters.to_dict()
        job.f_runtime_conf = job_runtime_conf
        job.f_train_runtime_conf = train_runtime_conf
        job.f_roles = job_runtime_conf['role']
        job.f_work_mode = job_parameters.work_mode
        job.f_initiator_role = job_initiator['role']
        job.f_initiator_party_id = job_initiator['party_id']

        initiator_role = job_initiator['role']
        initiator_party_id = job_initiator['party_id']
        if initiator_party_id not in job_runtime_conf['role'][initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id))
            raise Exception("initiator party id error {}".format(initiator_party_id))

        dsl_parser = schedule_utils.get_job_dsl_parser(dsl=job_dsl,
                                                       runtime_conf=job_runtime_conf,
                                                       train_runtime_conf=train_runtime_conf)

        cls.adapt_job_parameters(job_parameters=job_parameters)

        # update runtime conf
        job_runtime_conf["job_parameters"] = job_parameters.to_dict()
        job.f_runtime_conf = job_runtime_conf

        status_code, response = FederatedScheduler.create_job(job=job)
        if status_code != FederatedSchedulingStatusCode.SUCCESS:
            raise Exception("create job failed: {}".format(response))

        if job_parameters.work_mode == WorkMode.CLUSTER:
            # Save the status information of all participants in the initiator for scheduling
            for role, party_ids in job_runtime_conf["role"].items():
                for party_id in party_ids:
                    if role == job_initiator['role'] and party_id == job_initiator['party_id']:
                        continue
                    JobController.initialize_tasks(job_id, role, party_id, False, job_initiator, job_parameters, dsl_parser)

        # push into queue
        try:
            JobQueue.create_event(job_id=job_id, initiator_role=initiator_role, initiator_party_id=initiator_party_id)
        except Exception as e:
            raise Exception(f'push job into queue failed:\n{e}')

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters.model_id))
        board_url = "http://{}:{}{}".format(
            ServiceUtils.get_item("fateboard", "host"),
            ServiceUtils.get_item("fateboard", "port"),
            FATE_BOARD_DASHBOARD_ENDPOINT).format(job_id, job_initiator['role'], job_initiator['party_id'])
        logs_directory = job_utils.get_job_log_directory(job_id)
        return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \
               {'model_id': job_parameters.model_id, 'model_version': job_parameters.model_version}, board_url