def restore(self, model_id: str, model_version: str, store_address: dict):
        """
        Restore model from cos to local cache
        :param model_id:
        :param model_version:
        :param store_address:
        :return:
        """
        store_key = self.store_key(model_id, model_version) + '.zip'
        model = PipelinedModel(model_id, model_version)
        cos = self.get_connection(store_address)

        try:
            cos.download_file(
                Bucket=store_address["Bucket"],
                Key=store_key,
                DestFilePath=model.archive_model_file_path,
                EnableCRC=True,
            )

            model.unpack_model(model.archive_model_file_path)
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Restore model {model_id} {model_version} from Tencent COS failed."
            )
        else:
            LOGGER.info(
                f"Restore model {model_id} {model_version} from Tencent COS successfully. "
                f"Archive path: {model.archive_model_file_path} Key: {store_key}"
            )
Exemplo n.º 2
0
    def restore(self, model_id: str, model_version: str, store_address: dict):
        """
        Restore model from redis to local cache
        :param model_id:
        :param model_version:
        :param store_address:
        :return:
        """
        store_key = self.store_key(model_id, model_version)
        model = PipelinedModel(model_id, model_version)
        red = self.get_connection(store_address)

        try:
            archive_data = red.get(name=store_key)
            if not archive_data:
                raise TypeError(
                    f"The key {store_key} does not exists or is empty.")

            with open(model.archive_model_file_path, "wb") as fw:
                fw.write(archive_data)
            model.unpack_model(model.archive_model_file_path)
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Restore model {model_id} {model_version} from redis failed.")
        else:
            LOGGER.info(
                f"Restore model {model_id} {model_version} from redis successfully. "
                f"Archive path: {model.archive_model_file_path} Key: {store_key}"
            )
Exemplo n.º 3
0
 def restore(self, model_id: str, model_version: str, store_address: dict):
     """
     Restore model from redis to local cache
     :param model_id:
     :param model_version:
     :param store_address:
     :return:
     """
     try:
         red = self.get_connection(config=store_address)
         model = PipelinedModel(model_id=model_id,
                                model_version=model_version)
         redis_store_key = self.store_key(model_id=model_id,
                                          model_version=model_version)
         model_archive_data = red.get(name=redis_store_key)
         if not model_archive_data:
             raise Exception(
                 "Restore model {} {} to redis failed: {}".format(
                     model_id, model_version,
                     "can not found model archive data"))
         with open(model.archive_model_file_path, "wb") as fw:
             fw.write(model_archive_data)
         model.unpack_model(model.archive_model_file_path)
         LOGGER.info(
             "Restore model to {} from redis successfully using key {}".
             format(model.archive_model_file_path, redis_store_key))
     except Exception as e:
         LOGGER.exception(e)
         raise Exception("Restore model {} {} from redis failed".format(
             model_id, model_version))
Exemplo n.º 4
0
 def store(self,
           model_id: str,
           model_version: str,
           store_address: dict,
           force_update: bool = False):
     """
     Store the model from local cache to redis
     :param model_id:
     :param model_version:
     :param store_address:
     :param force_update:
     :return:
     """
     try:
         red = self.get_connection(config=store_address)
         model = PipelinedModel(model_id=model_id,
                                model_version=model_version)
         redis_store_key = self.store_key(model_id=model_id,
                                          model_version=model_version)
         with open(model.packaging_model(), "rb") as fr:
             red.set(name=redis_store_key,
                     value=fr.read(),
                     ex=store_address.get("ex", None),
                     nx=True if not force_update else False)
         LOGGER.info(
             "Store model {} {} to redis successfully using key {}".format(
                 model_id, model_version, redis_store_key))
     except Exception as e:
         LOGGER.exception(e)
         raise Exception("Store model {} {} to redis failed".format(
             model_id, model_version))
Exemplo n.º 5
0
def query_model_info_from_file(model_id=None,
                               model_version=None,
                               role=None,
                               party_id=None,
                               query_filters=None,
                               to_dict=False,
                               **kwargs):
    res = {} if to_dict else []
    model_dir = os.path.join(get_fate_flow_directory(), 'model_local_cache')
    glob_dir = f"{model_dir}{os.sep}{role if role else '*'}#{party_id if party_id else '*'}#{model_id if model_id else '*'}{os.sep}{model_version if model_version else '*'}"
    stat_logger.info(f'glob model dir: {glob_dir}')
    model_fp_list = glob.glob(glob_dir)
    if model_fp_list:
        for fp in model_fp_list:
            pipeline_model = PipelinedModel(model_id=fp.split(os.path.sep)[-2],
                                            model_version=fp.split(
                                                os.path.sep)[-1])
            model_info = gather_model_info_data(pipeline_model,
                                                query_filters=query_filters)
            if model_info:
                _role = fp.split('/')[-2].split('#')[0]
                _party_id = fp.split('/')[-2].split('#')[1]
                model_info["f_role"] = _role
                model_info["f_party_id"] = _party_id
                if isinstance(res, dict):
                    res[fp] = model_info
                else:
                    res.append(model_info)

                if kwargs.get('save'):
                    try:
                        insert_info = gather_model_info_data(
                            pipeline_model).copy()
                        insert_info['role'] = _role
                        insert_info['party_id'] = _party_id
                        insert_info['job_id'] = insert_info.get(
                            'f_model_version')
                        insert_info[
                            'size'] = pipeline_model.calculate_model_file_size(
                            )
                        if compare_version(insert_info['f_fate_version'],
                                           '1.5.1') == 'lt':
                            insert_info['roles'] = insert_info.get(
                                'f_train_runtime_conf', {}).get('role', {})
                            insert_info['initiator_role'] = insert_info.get(
                                'f_train_runtime_conf',
                                {}).get('initiator', {}).get('role')
                            insert_info[
                                'initiator_party_id'] = insert_info.get(
                                    'f_train_runtime_conf',
                                    {}).get('initiator', {}).get('party_id')
                        save_model_info(insert_info)
                    except Exception as e:
                        stat_logger.exception(e)
    if res:
        return 0, 'Query model info from local model success.', res
    return 100, 'Query model info failed, cannot find model from local model files.', res
Exemplo n.º 6
0
    def setUp(self):
        shutil.rmtree(TEMP_DIRECTORY, True)

        self.pipelined_model = PipelinedModel('foobar', 'v1')
        shutil.rmtree(self.pipelined_model.model_path, True)
        self.pipelined_model.create_pipelined_model()

        with open(self.pipelined_model.define_meta_path, 'w',
                  encoding='utf8') as f:
            yaml.dump(data_define_meta, f)
Exemplo n.º 7
0
def check_if_deployed(role, party_id, model_id, model_version):
    party_model_id = gen_party_model_id(model_id=model_id,
                                        role=role,
                                        party_id=party_id)
    pipeline_model = PipelinedModel(model_id=party_model_id,
                                    model_version=model_version)
    if not pipeline_model.exists():
        raise Exception(
            f"Model {party_model_id} {model_version} not exists in model local cache."
        )

    pipeline = pipeline_model.read_pipeline_model()
    if compare_version(pipeline.fate_version, '1.5.0') == 'gt':
        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        if str(train_runtime_conf.get('dsl_version', '1')) != '1':
            if pipeline.parent:
                return False
    return True
Exemplo n.º 8
0
def check_before_deploy(pipeline_model: PipelinedModel):
    pipeline = pipeline_model.read_pipeline_model()

    if compare_version(pipeline.fate_version, '1.5.0') == 'gt':
        if pipeline.parent:
            return True
    elif compare_version(pipeline.fate_version, '1.5.0') == 'eq':
        return True
    return False
Exemplo n.º 9
0
    def store(self,
              model_id: str,
              model_version: str,
              store_address: dict,
              force_update: bool = False):
        """
        Store the model from local cache to redis
        :param model_id:
        :param model_version:
        :param store_address:
        :param force_update:
        :return:
        """
        store_key = self.store_key(model_id, model_version)
        if not force_update and self.exists(model_id, model_version,
                                            store_address):
            raise FileExistsError(f"The key {store_key} already exists.")

        model = PipelinedModel(model_id, model_version)
        red = self.get_connection(store_address)

        try:
            model.packaging_model()

            with open(model.archive_model_file_path, "rb") as fr:
                res = red.set(store_key,
                              fr.read(),
                              nx=not force_update,
                              ex=store_address.get("ex", None))
            if res is not True:
                if not force_update:
                    raise FileExistsError(
                        f"The key {store_key} already exists.")
                raise TypeError(f"Execute command failed.")
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Store model {model_id} {model_version} to redis failed.")
        else:
            LOGGER.info(
                f"Store model {model_id} {model_version} to redis successfully."
                f"Archive path: {model.archive_model_file_path} Key: {store_key}"
            )
Exemplo n.º 10
0
def get_predict_conf():
    request_data = request.json
    required_parameters = ['model_id', 'model_version']
    check_config(request_data, required_parameters)
    model_dir = os.path.join(get_project_base_directory(), 'model_local_cache')
    model_fp_list = glob.glob(
        model_dir +
        f"/guest#*#{request_data['model_id']}/{request_data['model_version']}")
    if model_fp_list:
        fp = model_fp_list[0]
        pipeline_model = PipelinedModel(model_id=fp.split('/')[-2],
                                        model_version=fp.split('/')[-1])
        pipeline = pipeline_model.read_component_model('pipeline',
                                                       'pipeline')['Pipeline']
        predict_dsl = json_loads(pipeline.inference_dsl)

        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        parser = schedule_utils.get_dsl_parser_by_version(
            train_runtime_conf.get('dsl_version', '1'))
        predict_conf = parser.generate_predict_conf_template(
            predict_dsl=predict_dsl,
            train_conf=train_runtime_conf,
            model_id=request_data['model_id'],
            model_version=request_data['model_version'])
    else:
        predict_conf = ''
    if predict_conf:
        if request_data.get("filename"):
            os.makedirs(TEMP_DIRECTORY, exist_ok=True)
            temp_filepath = os.path.join(TEMP_DIRECTORY,
                                         request_data.get("filename"))
            with open(temp_filepath, "w") as fout:

                fout.write(json_dumps(predict_conf, indent=4))
            return send_file(open(temp_filepath, "rb"),
                             as_attachment=True,
                             attachment_filename=request_data.get("filename"))
        else:
            return get_json_result(data=predict_conf)
    return error_response(
        210,
        "No model found, please check if arguments are specified correctly.")
Exemplo n.º 11
0
 def store(self, model_id: str, model_version: str, store_address: dict, force_update: bool = False):
     """
     Store the model from local cache to mysql
     :param model_id:
     :param model_version:
     :param store_address:
     :param force_update:
     :return:
     """
     try:
         self.get_connection(config=store_address)
         DB.create_tables([MachineLearningModel])
         model = PipelinedModel(model_id=model_id, model_version=model_version)
         LOGGER.info("start store model {} {}".format(model_id, model_version))
         with DB.connection_context():
             with open(model.packaging_model(), "rb") as fr:
                 slice_index = 0
                 while True:
                     content = fr.read(SLICE_MAX_SIZE)
                     if content:
                         model_in_table = MachineLearningModel()
                         model_in_table.f_create_time = current_timestamp()
                         model_in_table.f_model_id = model_id
                         model_in_table.f_model_version = model_version
                         model_in_table.f_content = serialize_b64(content, to_str=True)
                         model_in_table.f_size = sys.getsizeof(model_in_table.f_content)
                         model_in_table.f_slice_index = slice_index
                         if force_update:
                             model_in_table.save(only=[MachineLearningModel.f_content, MachineLearningModel.f_size,
                                                       MachineLearningModel.f_update_time, MachineLearningModel.f_slice_index])
                             LOGGER.info("update model {} {} slice index {} content".format(model_id, model_version, slice_index))
                         else:
                             model_in_table.save(force_insert=True)
                         slice_index += 1
                         LOGGER.info("insert model {} {} slice index {} content".format(model_id, model_version, slice_index))
                     else:
                         break
                 LOGGER.info("Store model {} {} to mysql successfully".format(model_id,  model_version))
         self.close_connection()
     except Exception as e:
         LOGGER.exception(e)
         raise Exception("Store model {} {} to mysql failed".format(model_id, model_version))
Exemplo n.º 12
0
    def read_component_model(self):
        pipelined_model = PipelinedModel(
            gen_party_model_id(self.model_id, self.tracker.role,
                               self.tracker.party_id), self.model_version)

        component_model = pipelined_model._read_component_model(
            self.component_name, self.model_alias)
        if not component_model:
            raise ValueError('The component model is empty.')

        self.model_output = component_model
        self.tracker.set_metric_meta(
            'model_loader', f'{self.component_name}-{self.model_alias}',
            MetricMeta(
                'component_model', 'component_model_info', {
                    'model_id': self.model_id,
                    'model_version': self.model_version,
                    'component_name': self.component_name,
                    'model_alias': self.model_alias,
                }))
Exemplo n.º 13
0
def gather_model_info_data(model: PipelinedModel, query_filters=None):
    if model.exists():
        pipeline = model.read_pipeline_model()
        model_info = OrderedDict()
        if query_filters and isinstance(query_filters, list):
            for attr, field in pipeline.ListFields():
                if attr.name in query_filters:
                    if isinstance(field, bytes):
                        model_info["f_" + attr.name] = json_loads(
                            field, OrderedDict)
                    else:
                        model_info["f_" + attr.name] = field
        else:
            for attr, field in pipeline.ListFields():
                if isinstance(field, bytes):
                    model_info["f_" + attr.name] = json_loads(
                        field, OrderedDict)
                else:
                    model_info["f_" + attr.name] = field
        return model_info
    return []
    def store(self,
              model_id: str,
              model_version: str,
              store_address: dict,
              force_update: bool = False):
        """
        Store the model from local cache to cos
        :param model_id:
        :param model_version:
        :param store_address:
        :param force_update:
        :return:
        """
        store_key = self.store_key(model_id, model_version) + '.zip'
        if not force_update and self.exists(model_id, model_version,
                                            store_address):
            raise FileExistsError(f"The object {store_key} already exists.")

        model = PipelinedModel(model_id, model_version)
        cos = self.get_connection(store_address)

        try:
            model.packaging_model()

            response = cos.upload_file(
                Bucket=store_address["Bucket"],
                LocalFilePath=model.archive_model_file_path,
                Key=store_key,
                EnableMD5=True,
            )
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Store model {model_id} {model_version} to Tencent COS failed."
            )
        else:
            LOGGER.info(
                f"Store model {model_id} {model_version} to Tencent COS successfully. "
                f"Archive path: {model.archive_model_file_path} Key: {store_key} ETag: {response['ETag']}"
            )
Exemplo n.º 15
0
 def output_model_reload(cls, job, source_job):
     source_model_id = model_utils.gen_party_model_id(
         source_job.f_runtime_conf.get("job_parameters").get("common").get(
             "model_id"), job.f_role, job.f_party_id)
     model_id = model_utils.gen_party_model_id(
         job.f_runtime_conf.get("job_parameters").get("common").get(
             "model_id"), job.f_role, job.f_party_id)
     PipelinedModel(
         model_id=model_id,
         model_version=job.f_job_id).reload_component_model(
             model_id=source_model_id,
             model_version=job.f_inheritance_info.get("job_id"),
             component_list=job.f_inheritance_info.get("component_list"))
Exemplo n.º 16
0
    def restore(self, model_id: str, model_version: str, store_address: dict):
        """
        Restore model from mysql to local cache
        :param model_id:
        :param model_version:
        :param store_address:
        :return:
        """
        model = PipelinedModel(model_id, model_version)
        self.get_connection(store_address)

        try:
            with DB.connection_context():
                models_in_tables = MachineLearningModel.select().where(
                    MachineLearningModel.f_model_id == model_id,
                    MachineLearningModel.f_model_version == model_version,
                ).order_by(MachineLearningModel.f_slice_index)
            if not models_in_tables:
                raise ValueError(f"Cannot found model in table.")

            model_archive_data = b''.join(
                deserialize_b64(models_in_table.f_content)
                for models_in_table in models_in_tables)
            if not model_archive_data:
                raise ValueError(f"Cannot get model archive data.")

            with open(model.archive_model_file_path, "wb") as fw:
                fw.write(model_archive_data)
            model.unpack_model(model.archive_model_file_path)
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Restore model {model_id} {model_version} from mysql failed.")
        else:
            LOGGER.info(
                f"Restore model to {model.archive_model_file_path} from mysql successfully."
            )
        finally:
            self.close_connection()
Exemplo n.º 17
0
 def restore(self, model_id: str, model_version: str, store_address: dict):
     """
     Restore model from mysql to local cache
     :param model_id:
     :param model_version:
     :param store_address:
     :return:
     """
     try:
         self.get_connection(config=store_address)
         model = PipelinedModel(model_id=model_id, model_version=model_version)
         with DB.connection_context():
             models_in_tables = MachineLearningModel.select().where(MachineLearningModel.f_model_id == model_id,
                                                                    MachineLearningModel.f_model_version == model_version).\
                 order_by(MachineLearningModel.f_slice_index)
             if not models_in_tables:
                 raise Exception("Restore model {} {} from mysql failed: {}".format(
                     model_id, model_version, "can not found model in table"))
             f_content = ''
             for models_in_table in models_in_tables:
                 if not f_content:
                     f_content = models_in_table.f_content
                 else:
                     f_content += models_in_table.f_content
             model_archive_data = deserialize_b64(f_content)
             if not model_archive_data:
                 raise Exception("Restore model {} {} from mysql failed: {}".format(
                     model_id, model_version, "can not get model archive data"))
             with open(model.archive_model_file_path, "wb") as fw:
                 fw.write(model_archive_data)
             model.unpack_model(model.archive_model_file_path)
             LOGGER.info("Restore model to {} from mysql successfully".format(model.archive_model_file_path))
         self.close_connection()
     except Exception as e:
         LOGGER.exception(e)
         raise Exception("Restore model {} {} from mysql failed".format(model_id, model_version))
Exemplo n.º 18
0
    def store(self,
              model_id: str,
              model_version: str,
              store_address: dict,
              force_update: bool = False):
        """
        Store the model from local cache to mysql
        :param model_id:
        :param model_version:
        :param store_address:
        :param force_update:
        :return:
        """
        if not force_update and self.exists(model_id, model_version,
                                            store_address):
            raise FileExistsError(
                f"The model {model_id} {model_version} already exists in the database."
            )

        model = PipelinedModel(model_id, model_version)
        self.get_connection(store_address)

        try:
            DB.create_tables([MachineLearningModel])

            LOGGER.info(f"Starting store model {model_id} {model_version}.")
            with open(model.packaging_model(),
                      "rb") as fr, DB.connection_context():
                slice_index = 0
                while True:
                    content = fr.read(SLICE_MAX_SIZE)
                    if not content:
                        break

                    model_in_table = MachineLearningModel()
                    model_in_table.f_create_time = current_timestamp()
                    model_in_table.f_model_id = model_id
                    model_in_table.f_model_version = model_version
                    model_in_table.f_content = serialize_b64(content,
                                                             to_str=True)
                    model_in_table.f_size = sys.getsizeof(
                        model_in_table.f_content)
                    model_in_table.f_slice_index = slice_index

                    rows = 0
                    if force_update:
                        rows = model_in_table.save(only=[
                            MachineLearningModel.f_content,
                            MachineLearningModel.f_size,
                            MachineLearningModel.f_update_time,
                            MachineLearningModel.f_slice_index,
                        ])
                    if not rows:
                        rows = model_in_table.save(force_insert=True)
                    if not rows:
                        raise Exception(
                            f"Save slice index {slice_index} failed")

                    LOGGER.info(
                        f"Saved slice index {slice_index} of model {model_id} {model_version}."
                    )
                    slice_index += 1
        except Exception as e:
            LOGGER.exception(e)
            raise Exception(
                f"Store model {model_id} {model_version} to mysql failed.")
        else:
            LOGGER.info(
                f"Store model {model_id} {model_version} to mysql successfully."
            )
        finally:
            self.close_connection()
Exemplo n.º 19
0
def deploy(config_data):
    model_id = config_data.get('model_id')
    model_version = config_data.get('model_version')
    local_role = config_data.get('local').get('role')
    local_party_id = config_data.get('local').get('party_id')
    child_model_version = config_data.get('child_model_version')
    components_checkpoint = config_data.get('components_checkpoint', {})
    warning_msg = ""

    try:
        party_model_id = gen_party_model_id(model_id=model_id,
                                            role=local_role,
                                            party_id=local_party_id)
        model = PipelinedModel(model_id=party_model_id,
                               model_version=model_version)
        model_data = model.collect_models(in_bytes=True)
        if "pipeline.pipeline:Pipeline" not in model_data:
            raise Exception("Can not found pipeline file in model.")

        # check if the model could be executed the deploy process (parent/child)
        if not check_before_deploy(model):
            raise Exception('Child model could not be deployed.')

        # copy proto content from parent model and generate a child model
        deploy_model = PipelinedModel(model_id=party_model_id,
                                      model_version=child_model_version)
        shutil.copytree(src=model.model_path,
                        dst=deploy_model.model_path,
                        ignore=lambda src, names: {'checkpoint'}
                        if src == model.model_path else {})
        pipeline_model = deploy_model.read_pipeline_model()

        train_runtime_conf = json_loads(pipeline_model.train_runtime_conf)
        runtime_conf_on_party = json_loads(
            pipeline_model.runtime_conf_on_party)
        dsl_version = train_runtime_conf.get("dsl_version", "1")

        parser = get_dsl_parser_by_version(dsl_version)
        train_dsl = json_loads(pipeline_model.train_dsl)
        parent_predict_dsl = json_loads(pipeline_model.inference_dsl)

        if config_data.get('dsl') or config_data.get('predict_dsl'):
            inference_dsl = config_data.get('dsl') if config_data.get(
                'dsl') else config_data.get('predict_dsl')
            if not isinstance(inference_dsl, dict):
                inference_dsl = json_loads(inference_dsl)
        else:
            if config_data.get('cpn_list', None):
                cpn_list = config_data.pop('cpn_list')
            else:
                cpn_list = list(train_dsl.get('components', {}).keys())
            if int(dsl_version) == 1:
                # convert v1 dsl to v2 dsl
                inference_dsl, warning_msg = parser.convert_dsl_v1_to_v2(
                    parent_predict_dsl)
            else:
                parser = get_dsl_parser_by_version(dsl_version)
                inference_dsl = parser.deploy_component(cpn_list, train_dsl)

        # convert v1 conf to v2 conf
        if int(dsl_version) == 1:
            components = parser.get_components_light_weight(inference_dsl)

            from fate_flow.db.component_registry import ComponentRegistry
            job_providers = parser.get_job_providers(
                dsl=inference_dsl, provider_detail=ComponentRegistry.REGISTRY)
            cpn_role_parameters = dict()
            for cpn in components:
                cpn_name = cpn.get_name()
                role_params = parser.parse_component_role_parameters(
                    component=cpn_name,
                    dsl=inference_dsl,
                    runtime_conf=train_runtime_conf,
                    provider_detail=ComponentRegistry.REGISTRY,
                    provider_name=job_providers[cpn_name]["provider"]["name"],
                    provider_version=job_providers[cpn_name]["provider"]
                    ["version"])
                cpn_role_parameters[cpn_name] = role_params
            train_runtime_conf = parser.convert_conf_v1_to_v2(
                train_runtime_conf, cpn_role_parameters)

        adapter = JobRuntimeConfigAdapter(train_runtime_conf)
        train_runtime_conf = adapter.update_model_id_version(
            model_version=deploy_model.model_version)
        pipeline_model.model_version = child_model_version
        pipeline_model.train_runtime_conf = json_dumps(train_runtime_conf,
                                                       byte=True)

        #  save inference dsl into child model file
        parser = get_dsl_parser_by_version(2)
        parser.verify_dsl(inference_dsl, "predict")
        inference_dsl = JobSaver.fill_job_inference_dsl(
            job_id=model_version,
            role=local_role,
            party_id=local_party_id,
            dsl_parser=parser,
            origin_inference_dsl=inference_dsl)
        pipeline_model.inference_dsl = json_dumps(inference_dsl, byte=True)

        if compare_version(pipeline_model.fate_version, '1.5.0') == 'gt':
            pipeline_model.parent_info = json_dumps(
                {
                    'parent_model_id': model_id,
                    'parent_model_version': model_version
                },
                byte=True)
            pipeline_model.parent = False
            runtime_conf_on_party['job_parameters'][
                'model_version'] = child_model_version
            pipeline_model.runtime_conf_on_party = json_dumps(
                runtime_conf_on_party, byte=True)

        # save model file
        deploy_model.save_pipeline(pipeline_model)
        shutil.copyfile(
            os.path.join(deploy_model.model_path, "pipeline.pb"),
            os.path.join(deploy_model.model_path, "variables", "data",
                         "pipeline", "pipeline", "Pipeline"))

        model_info = gather_model_info_data(deploy_model)
        model_info['job_id'] = model_info['f_model_version']
        model_info['size'] = deploy_model.calculate_model_file_size()
        model_info['role'] = local_role
        model_info['party_id'] = local_party_id
        model_info['parent'] = False if model_info.get(
            'f_inference_dsl') else True
        if compare_version(model_info['f_fate_version'], '1.5.0') == 'eq':
            model_info['roles'] = model_info.get('f_train_runtime_conf',
                                                 {}).get('role', {})
            model_info['initiator_role'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator', {}).get('role')
            model_info['initiator_party_id'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator',
                                                {}).get('party_id')
        save_model_info(model_info)

        for component_name, component in train_dsl.get('components',
                                                       {}).items():
            step_index = components_checkpoint.get(component_name,
                                                   {}).get('step_index')
            step_name = components_checkpoint.get(component_name,
                                                  {}).get('step_name')
            if step_index is not None:
                step_index = int(step_index)
                step_name = None
            elif step_name is None:
                continue

            checkpoint_manager = CheckpointManager(
                role=local_role,
                party_id=local_party_id,
                model_id=model_id,
                model_version=model_version,
                component_name=component_name,
                mkdir=False,
            )
            checkpoint_manager.load_checkpoints_from_disk()
            if checkpoint_manager.latest_checkpoint is not None:
                checkpoint_manager.deploy(
                    child_model_version,
                    component['output']['model'][0] if component.get(
                        'output', {}).get('model') else 'default',
                    step_index,
                    step_name,
                )
    except Exception as e:
        stat_logger.exception(e)
        return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}"
    else:
        msg = f"deploy model of role {local_role} {local_party_id} success"
        if warning_msg:
            msg = msg + f", warning: {warning_msg}"
        return 0, msg
Exemplo n.º 20
0
class TestPipelinedModel(unittest.TestCase):
    def setUp(self):
        shutil.rmtree(TEMP_DIRECTORY, True)

        self.pipelined_model = PipelinedModel('foobar', 'v1')
        shutil.rmtree(self.pipelined_model.model_path, True)
        self.pipelined_model.create_pipelined_model()

        with open(self.pipelined_model.define_meta_path, 'w',
                  encoding='utf8') as f:
            yaml.dump(data_define_meta, f)

    def tearDown(self):
        shutil.rmtree(TEMP_DIRECTORY, True)
        shutil.rmtree(self.pipelined_model.model_path, True)

    def test_write_read_file_same_time(self):
        fw = open(self.pipelined_model.define_meta_path, 'r+', encoding='utf8')
        self.assertEqual(yaml.safe_load(fw), data_define_meta)
        fw.seek(0)
        fw.write('foobar')

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as fr:
            self.assertEqual(yaml.safe_load(fr), data_define_meta)

        fw.truncate()

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as fr:
            self.assertEqual(fr.read(), 'foobar')

        fw.seek(0)
        fw.write('abc')
        fw.close()

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as fr:
            self.assertEqual(fr.read(), 'abcbar')

    def test_update_component_meta_with_changes(self):
        with patch('ruamel.yaml.dump', side_effect=yaml.dump) as yaml_dump:
            self.pipelined_model.update_component_meta(
                'dataio_0', 'DataIO_v0', 'dataio', {
                    'DataIOMeta': 'DataIOMeta_v0',
                    'DataIOParam': 'DataIOParam_v0',
                })
        yaml_dump.assert_called_once()

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as tmp:
            define_index = yaml.safe_load(tmp)

        _data = deepcopy(data_define_meta)
        _data['component_define']['dataio_0']['module_name'] = 'DataIO_v0'
        _data['model_proto']['dataio_0']['dataio'] = {
            'DataIOMeta': 'DataIOMeta_v0',
            'DataIOParam': 'DataIOParam_v0',
        }

        self.assertEqual(define_index, _data)

    def test_update_component_meta_without_changes(self):
        with open(self.pipelined_model.define_meta_path, 'w',
                  encoding='utf8') as f:
            yaml.dump(data_define_meta, f, Dumper=yaml.RoundTripDumper)

        with patch('ruamel.yaml.dump', side_effect=yaml.dump) as yaml_dump:
            self.pipelined_model.update_component_meta(
                *args_update_component_meta)
        yaml_dump.assert_not_called()

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as tmp:
            define_index = yaml.safe_load(tmp)
        self.assertEqual(define_index, data_define_meta)

    def test_update_component_meta_multi_thread(self):
        with patch('ruamel.yaml.safe_load', side_effect=yaml.safe_load) as yaml_load, \
                patch('ruamel.yaml.dump', side_effect=yaml.dump) as yaml_dump, \
                concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
            for _ in range(100):
                executor.submit(self.pipelined_model.update_component_meta,
                                *args_update_component_meta)
        self.assertEqual(yaml_load.call_count, 100)
        self.assertEqual(yaml_dump.call_count, 0)

        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as tmp:
            define_index = yaml.safe_load(tmp)
        self.assertEqual(define_index, data_define_meta)

    def test_update_component_meta_empty_file(self):
        open(self.pipelined_model.define_meta_path, 'w').close()
        with self.assertRaisesRegex(ValueError, 'Invalid meta file'):
            self.pipelined_model.update_component_meta(
                *args_update_component_meta)

    def test_packaging_model(self):
        archive_file_path = self.pipelined_model.packaging_model()
        self.assertEqual(archive_file_path,
                         self.pipelined_model.archive_model_file_path)
        self.assertTrue(Path(archive_file_path).is_file())
        self.assertTrue(Path(archive_file_path + '.sha1').is_file())

        with ZipFile(archive_file_path) as z:
            with io.TextIOWrapper(z.open('define/define_meta.yaml'),
                                  encoding='utf8') as f:
                define_index = yaml.safe_load(f)
        self.assertEqual(define_index, data_define_meta)

        with open(archive_file_path,
                  'rb') as f, open(archive_file_path + '.sha1',
                                   encoding='utf8') as g:
            sha1 = hashlib.sha1(f.read()).hexdigest()
            sha1_orig = g.read().strip()
        self.assertEqual(sha1, sha1_orig)

    def test_packaging_model_not_exists(self):
        shutil.rmtree(self.pipelined_model.model_path, True)
        with self.assertRaisesRegex(
                FileNotFoundError,
                'Can not found foobar v1 model local cache'):
            self.pipelined_model.packaging_model()

    def test_unpack_model(self):
        archive_file_path = self.pipelined_model.packaging_model()
        self.assertTrue(Path(archive_file_path + '.sha1').is_file())

        shutil.rmtree(self.pipelined_model.model_path, True)
        self.assertFalse(Path(self.pipelined_model.model_path).exists())

        self.pipelined_model.unpack_model(archive_file_path)
        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as tmp:
            define_index = yaml.safe_load(tmp)
        self.assertEqual(define_index, data_define_meta)

    def test_unpack_model_local_cache_exists(self):
        archive_file_path = self.pipelined_model.packaging_model()

        with self.assertRaisesRegex(
                FileExistsError,
                'Model foobar v1 local cache already existed'):
            self.pipelined_model.unpack_model(archive_file_path)

    def test_unpack_model_no_hash_file(self):
        archive_file_path = self.pipelined_model.packaging_model()
        Path(archive_file_path + '.sha1').unlink()
        self.assertFalse(Path(archive_file_path + '.sha1').exists())

        shutil.rmtree(self.pipelined_model.model_path, True)
        self.assertFalse(os.path.exists(self.pipelined_model.model_path))

        self.pipelined_model.unpack_model(archive_file_path)
        with open(self.pipelined_model.define_meta_path,
                  encoding='utf8') as tmp:
            define_index = yaml.safe_load(tmp)
        self.assertEqual(define_index, data_define_meta)

    def test_unpack_model_hash_not_match(self):
        archive_file_path = self.pipelined_model.packaging_model()
        self.assertTrue(Path(archive_file_path + '.sha1').is_file())
        with open(archive_file_path + '.sha1', 'w', encoding='utf8') as f:
            f.write('abc123')

        shutil.rmtree(self.pipelined_model.model_path, True)
        self.assertFalse(Path(self.pipelined_model.model_path).exists())

        with self.assertRaisesRegex(ValueError, 'Hash not match.'):
            self.pipelined_model.unpack_model(archive_file_path)
Exemplo n.º 21
0
def deploy(config_data):
    model_id = config_data.get('model_id')
    model_version = config_data.get('model_version')
    local_role = config_data.get('local').get('role')
    local_party_id = config_data.get('local').get('party_id')
    child_model_version = config_data.get('child_model_version')

    try:
        party_model_id = model_utils.gen_party_model_id(
            model_id=model_id, role=local_role, party_id=local_party_id)
        model = PipelinedModel(model_id=party_model_id,
                               model_version=model_version)
        model_data = model.collect_models(in_bytes=True)
        if "pipeline.pipeline:Pipeline" not in model_data:
            raise Exception("Can not found pipeline file in model.")

        # check if the model could be executed the deploy process (parent/child)
        if not check_before_deploy(model):
            raise Exception('Child model could not be deployed.')

        # copy proto content from parent model and generate a child model
        deploy_model = PipelinedModel(model_id=party_model_id,
                                      model_version=child_model_version)
        shutil.copytree(src=model.model_path, dst=deploy_model.model_path)
        pipeline = deploy_model.read_component_model('pipeline',
                                                     'pipeline')['Pipeline']

        # modify two pipeline files (model version/ train_runtime_conf)
        train_runtime_conf = json_loads(pipeline.train_runtime_conf)
        adapter = JobRuntimeConfigAdapter(train_runtime_conf)
        train_runtime_conf = adapter.update_model_id_version(
            model_version=deploy_model.model_version)
        pipeline.model_version = child_model_version
        pipeline.train_runtime_conf = json_dumps(train_runtime_conf, byte=True)

        parser = get_dsl_parser_by_version(
            train_runtime_conf.get('dsl_version', '1'))
        train_dsl = json_loads(pipeline.train_dsl)
        parent_predict_dsl = json_loads(pipeline.inference_dsl)

        if str(train_runtime_conf.get('dsl_version', '1')) == '1':
            predict_dsl = json_loads(pipeline.inference_dsl)
        else:
            if config_data.get('dsl') or config_data.get('predict_dsl'):
                predict_dsl = config_data.get('dsl') if config_data.get(
                    'dsl') else config_data.get('predict_dsl')
                if not isinstance(predict_dsl, dict):
                    predict_dsl = json_loads(predict_dsl)
            else:
                if config_data.get('cpn_list', None):
                    cpn_list = config_data.pop('cpn_list')
                else:
                    cpn_list = list(train_dsl.get('components', {}).keys())
                parser_version = train_runtime_conf.get('dsl_version', '1')
                if str(parser_version) == '1':
                    predict_dsl = parent_predict_dsl
                else:
                    parser = schedule_utils.get_dsl_parser_by_version(
                        parser_version)
                    predict_dsl = parser.deploy_component(cpn_list, train_dsl)

        #  save predict dsl into child model file
        parser.verify_dsl(predict_dsl, "predict")
        inference_dsl = parser.get_predict_dsl(
            role=local_role,
            predict_dsl=predict_dsl,
            setting_conf_prefix=file_utils.
            get_federatedml_setting_conf_directory())
        pipeline.inference_dsl = json_dumps(inference_dsl, byte=True)
        if model_utils.compare_version(pipeline.fate_version, '1.5.0') == 'gt':
            pipeline.parent_info = json_dumps(
                {
                    'parent_model_id': model_id,
                    'parent_model_version': model_version
                },
                byte=True)
            pipeline.parent = False
            runtime_conf_on_party = json_loads(pipeline.runtime_conf_on_party)
            runtime_conf_on_party['job_parameters'][
                'model_version'] = child_model_version
            pipeline.runtime_conf_on_party = json_dumps(runtime_conf_on_party,
                                                        byte=True)

        # save model file
        deploy_model.save_pipeline(pipeline)
        shutil.copyfile(
            os.path.join(deploy_model.model_path, "pipeline.pb"),
            os.path.join(deploy_model.model_path, "variables", "data",
                         "pipeline", "pipeline", "Pipeline"))

        model_info = model_utils.gather_model_info_data(deploy_model)
        model_info['job_id'] = model_info['f_model_version']
        model_info['size'] = deploy_model.calculate_model_file_size()
        model_info['role'] = local_role
        model_info['party_id'] = local_party_id
        model_info['work_mode'] = adapter.get_job_work_mode()
        model_info['parent'] = False if model_info.get(
            'f_inference_dsl') else True
        if model_utils.compare_version(model_info['f_fate_version'],
                                       '1.5.0') == 'eq':
            model_info['roles'] = model_info.get('f_train_runtime_conf',
                                                 {}).get('role', {})
            model_info['initiator_role'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator', {}).get('role')
            model_info['initiator_party_id'] = model_info.get(
                'f_train_runtime_conf', {}).get('initiator',
                                                {}).get('party_id')
        model_utils.save_model_info(model_info)

    except Exception as e:
        stat_logger.exception(e)
        return 100, f"deploy model of role {local_role} {local_party_id} failed, details: {str(e)}"
    else:
        return 0, f"deploy model of role {local_role} {local_party_id} success"