Пример #1
0
    def record_step_best(self, step_best, host_mask, guest_mask, data_instances, model):
        metas = {"host_mask": host_mask.tolist(), "guest_mask": guest_mask.tolist(),
                 "score_name": self.score_name}
        metas["number_in"] = int(sum(host_mask) + sum(guest_mask))
        metas["direction"] = self.direction
        metas["n_count"] = int(self.n_count)

        host_anonym = [
            anonymous_generator.generate_anonymous(
                fid=i,
                role='host',
                model=model) for i in range(
                len(host_mask))]
        guest_anonym = [
            anonymous_generator.generate_anonymous(
                fid=i,
                role='guest',
                model=model) for i in range(
                len(guest_mask))]
        metas["host_features_anonym"] = host_anonym
        metas["guest_features_anonym"] = guest_anonym

        model_info = self.models_trained[step_best]
        loss = model_info.get_loss()
        ic_val = model_info.get_score()
        metas["loss"] = loss
        metas["current_ic_val"] = ic_val
        metas["fit_intercept"] = model.fit_intercept

        model_key = model_info.get_key()
        model_dict = self._get_model(model_key)

        if self.role != consts.ARBITER:
            all_features = data_instances.schema.get('header')
            metas["all_features"] = all_features
            metas["to_enter"] = self.get_to_enter(host_mask, guest_mask, all_features)
            model_param = list(model_dict.get('model').values())[0].get(
                model.model_param_name)
            param_dict = MessageToDict(model_param)
            metas["intercept"] = param_dict.get("intercept", None)
            metas["weight"] = param_dict.get("weight", {})
            metas["header"] = param_dict.get("header", [])
            if self.n_step == 0 and self.direction == "forward":
                metas["intercept"] = self.intercept
            self.update_summary_client(model, host_mask, guest_mask, all_features, host_anonym, guest_anonym)
        else:
            self.update_summary_arbiter(model, loss, ic_val)
        metric_name = f"stepwise_{self.n_step}"
        metric = [Metric(metric_name, float(self.n_step))]
        model.callback_metric(metric_name=metric_name, metric_namespace=self.metric_namespace, metric_data=metric)
        model.tracker.set_metric_meta(metric_name=metric_name, metric_namespace=self.metric_namespace,
                                      metric_meta=MetricMeta(name=metric_name, metric_type=self.metric_type,
                                                             extra_metas=metas))
        LOGGER.info(f"metric_name: {metric_name}, metas: {metas}")
        return
Пример #2
0
    def get_model_param(self):

        model_param = BoostingTreeModelParam()
        model_param.tree_num = len(self.boosting_model_list)
        model_param.tree_dim = self.booster_dim
        model_param.trees_.extend(self.boosting_model_list)

        anonymous_name_mapping = {}
        party_id = self.component_properties.local_partyid
        for fid, name in self.feature_name_fid_mapping.items():
            anonymous_name_mapping[generate_anonymous(
                fid,
                role=consts.HOST,
                party_id=party_id,
            )] = name

        model_param.anonymous_name_mapping.update(anonymous_name_mapping)
        model_param.feature_name_fid_mapping.update(
            self.feature_name_fid_mapping)
        model_param.model_name = consts.HETERO_SBT

        model_param.anonymous_name_mapping.update(anonymous_name_mapping)
        model_param.feature_name_fid_mapping.update(
            self.feature_name_fid_mapping)
        model_param.model_name = consts.HETERO_SBT
        model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration

        param_name = "HeteroSecureBoostingTreeHostParam"

        return param_name, model_param
Пример #3
0
 def encode_col_name_list(self, col_name_list: list, model):
     result = []
     for x in col_name_list:
         col_index = self.col_name_maps.get(x)
         result.append(
             anonymous_generator.generate_anonymous(col_index, model=model))
     return result
Пример #4
0
def feature_importance_with_anonymous_converter(model_meta, model_param):

    # extract feature importance from model param

    fid_mapping = dict(model_param.feature_name_fid_mapping)
    feat_importance_list = list(model_param.feature_importances)
    guest_fids = list(fid_mapping.keys())
    guest_cols, guest_val = [], []

    # key is int party id, value is a dict, which has two key: col_name and value
    host_side_data = {}

    for feat_importance in feat_importance_list:
        fid = feat_importance.fid
        importance = feat_importance.importance
        site_name = feat_importance.sitename
        site_name = site_name.split(':')
        if site_name[0] == consts.HOST:
            host_id = int(site_name[1])
            if host_id not in host_side_data:
                host_side_data[host_id] = {'col_name': [], 'value': []}
            host_col_name = generate_anonymous(fid, host_id, role=consts.HOST)
            host_side_data[host_id]['col_name'].append(host_col_name)
            host_side_data[host_id]['value'].append(importance)
        else:
            guest_cols.append(fid_mapping[fid])
            guest_val.append(importance)

    for fid in guest_fids:
        if fid_mapping[fid] not in guest_cols:
            guest_cols.append(fid_mapping[fid])
            guest_val.append(0)

    host_party_ids = []
    host_values = []
    host_col_names = []
    for hid in host_side_data:
        host_party_ids.append(hid)
        host_values.append(host_side_data[hid]['value'])
        host_col_names.append(host_side_data[hid]['col_name'])

    single_info = isometric_model.SingleMetricInfo(
        values=np.array(guest_val),
        col_names=guest_cols,
        host_party_ids=host_party_ids,
        host_values=host_values,
        host_col_names=host_col_names)
    result = isometric_model.IsometricModel()
    result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE,
                            metric_info=single_info)
    return result
Пример #5
0
    def _get_param(self):
        LOGGER.debug(
            "curt_select_properties.left_col_name: {}, completed_selection_result: {}"
            .format(self.curt_select_properties.left_col_names,
                    self.completed_selection_result.all_left_col_names))
        LOGGER.debug("Length of left cols: {}".format(
            len(self.completed_selection_result.all_left_col_names)))
        # left_cols = {x: True for x in self.curt_select_properties.left_col_names}
        left_cols = {
            x: True
            for x in self.completed_selection_result.all_left_col_names
        }
        final_left_cols = feature_selection_param_pb2.LeftCols(
            original_cols=self.completed_selection_result.get_select_col_names(
            ),
            left_cols=left_cols)

        host_col_names = []
        if self.role == consts.GUEST:
            for host_id, this_host_name in enumerate(
                    self.completed_selection_result.get_host_sorted_col_names(
                    )):
                party_id = self.component_properties.host_party_idlist[host_id]
                LOGGER.debug(
                    "In _get_param, this_host_name: {}, party_id: {}".format(
                        this_host_name, party_id))

                host_col_names.append(
                    feature_selection_param_pb2.HostColNames(
                        col_names=this_host_name, party_id=str(party_id)))
        else:
            party_id = self.component_properties.local_partyid
            anonymous_names = [
                anonymous_generator.generate_anonymous(fid, model=self)
                for fid in range(len(self.header))
            ]
            host_col_names.append(
                feature_selection_param_pb2.HostColNames(
                    col_names=anonymous_names, party_id=str(party_id)))

        result_obj = feature_selection_param_pb2.FeatureSelectionParam(
            results=self.completed_selection_result.filter_results,
            final_left_cols=final_left_cols,
            col_names=self.completed_selection_result.get_sorted_col_names(),
            host_col_names=host_col_names,
            header=self.curt_select_properties.header)

        # json_result = json_format.MessageToJson(result_obj)
        # LOGGER.debug("json_result: {}".format(json_result))
        return result_obj
Пример #6
0
    def get_model_param(self):

        model_param = BoostingTreeModelParam()
        model_param.tree_num = len(self.boosting_model_list)
        model_param.tree_dim = self.booster_dim
        model_param.trees_.extend(self.boosting_model_list)
        model_param.init_score.extend(self.init_score)
        model_param.losses.extend(self.history_loss)
        model_param.classes_.extend(map(str, self.classes_))
        model_param.num_classes = self.num_classes
        if self.boosting_strategy == consts.STD_TREE:
            model_param.model_name = consts.HETERO_SBT
        elif self.boosting_strategy == consts.LAYERED_TREE:
            model_param.model_name = consts.HETERO_FAST_SBT_LAYERED
        elif self.boosting_strategy == consts.MIX_TREE:
            model_param.model_name = consts.HETERO_FAST_SBT_MIX
        model_param.best_iteration = self.callback_variables.best_iteration

        feature_importances = list(self.feature_importances_.items())
        feature_importances = sorted(feature_importances,
                                     key=itemgetter(1),
                                     reverse=True)
        feature_importance_param = []

        for (sitename, fid), importance in feature_importances:
            if consts.GUEST in sitename:
                fullname = self.feature_name_fid_mapping[fid]
            else:
                role_name, party_id = sitename.split(':')
                fullname = generate_anonymous(fid=fid,
                                              party_id=party_id,
                                              role=role_name)

            feature_importance_param.append(
                FeatureImportanceInfo(
                    sitename=sitename,  # sitename to distinguish sites
                    fid=fid,
                    importance=importance.importance,
                    fullname=fullname,
                    importance2=importance.importance_2,
                    main=importance.main_type))
        model_param.feature_importances.extend(feature_importance_param)
        model_param.feature_name_fid_mapping.update(
            self.feature_name_fid_mapping)
        model_param.tree_plan.extend(plan.encode_plan(self.tree_plan))
        param_name = consts.HETERO_SBT_GUEST_MODEL + "Param"

        return param_name, model_param
Пример #7
0
 def _sync_select_info(self, suffix):
     if not self.select_federated:
         return
     if self.role == consts.GUEST:
         assert isinstance(self.sync_obj, selection_info_sync.Guest)
         self.host_selection_properties = self.sync_obj.sync_select_cols(suffix=suffix)
     else:
         encoded_names = []
         for col_name in self.selection_properties.select_col_names:
             fid = self.selection_properties.col_name_maps[col_name]
             encoded_names.append(anonymous_generator.generate_anonymous(
                 fid=fid, role=self.role, party_id=self.party_id
             ))
         LOGGER.debug(f"Before send, encoded_names: {encoded_names},"
                      f"select_names: {self.selection_properties.select_col_names}")
         self.sync_obj.sync_select_cols(encoded_names, suffix=suffix)
Пример #8
0
    def get_model_param(self):

        model_param = BoostingTreeModelParam()
        model_param.tree_num = len(self.boosting_model_list)
        model_param.tree_dim = self.booster_dim
        model_param.trees_.extend(self.boosting_model_list)

        anonymous_name_mapping = {}
        party_id = self.component_properties.local_partyid
        for fid, name in self.feature_name_fid_mapping.items():
            anonymous_name_mapping[generate_anonymous(
                fid,
                role=consts.HOST,
                party_id=party_id,
            )] = name

        model_param.anonymous_name_mapping.update(anonymous_name_mapping)
        model_param.feature_name_fid_mapping.update(
            self.feature_name_fid_mapping)
        if self.boosting_strategy == consts.STD_TREE:
            model_param.model_name = consts.HETERO_SBT
        elif self.boosting_strategy == consts.LAYERED_TREE:
            model_param.model_name = consts.HETERO_FAST_SBT_LAYERED
        elif self.boosting_strategy == consts.MIX_TREE:
            model_param.model_name = consts.HETERO_FAST_SBT_MIX
        model_param.best_iteration = self.callback_variables.best_iteration
        model_param.tree_plan.extend(plan.encode_plan(self.tree_plan))

        feature_importances = list(self.feature_importances_.items())
        feature_importances = sorted(feature_importances,
                                     key=itemgetter(1),
                                     reverse=True)
        feature_importance_param = []
        LOGGER.debug('host feat importance is {}'.format(feature_importances))
        for fid, importance in feature_importances:
            feature_importance_param.append(
                FeatureImportanceInfo(
                    sitename=consts.HOST_LOCAL,  # host local feat
                    fid=fid,
                    importance=importance.importance,
                    fullname=self.feature_name_fid_mapping[fid],
                    main=importance.main_type))
        model_param.feature_importances.extend(feature_importance_param)

        param_name = "HeteroSecureBoostingTreeHostParam"

        return param_name, model_param
Пример #9
0
    def make_readable_feature_importance(fid_mapping, feature_importances):
        """
        replace feature id by real feature name
        """
        new_fi = {}
        for id_ in feature_importances:

            if type(id_) == tuple:
                if consts.GUEST in id_[0]:
                    new_fi[fid_mapping[id_[1]]] = feature_importances[id_].importance
                else:
                    role, party_id = id_[0].split(':')
                    new_fi[generate_anonymous(role=role, fid=id_[1], party_id=party_id)] = feature_importances[id_].importance
            else:
                new_fi[fid_mapping[id_]] = feature_importances[id_].importance

        return new_fi
Пример #10
0
    def _get_param(self):
        from federatedml.protobuf.generated import pearson_model_param_pb2

        param_pb = pearson_model_param_pb2.PearsonModelParam()

        # local
        param_pb.party = f"({self.local_party.role},{self.local_party.party_id})"
        param_pb.shape = self.local_corr.shape[0]
        for v in self.local_corr.reshape(-1):
            param_pb.local_corr.append(max(-1.0, min(float(v), 1.0)))
        for idx, name in enumerate(self.names):
            param_pb.names.append(name)
            anonymous = param_pb.anonymous_map.add()
            anonymous.name = name
            anonymous.anonymous = generate_anonymous(
                fid=idx,
                party_id=self.local_party.party_id,
                role=self.local_party.role)

        if self.model_param.calc_local_vif:
            for vif_value in self.local_vif:
                param_pb.local_vif.append(vif_value)

        # global
        for shape, party in zip(self.shapes, self.parties):
            param_pb.shapes.append(shape)
            param_pb.parties.append(f"({party.role},{party.party_id})")

            _names = param_pb.all_names.add()
            if party == self.local_party:
                for name in self.names:
                    _names.names.append(name)
            else:
                for i in range(shape):
                    _names.names.append(f"{party.role}_{party.party_id}_{i}")

        if self.model_param.cross_parties:
            for v in self.corr.reshape(-1):
                param_pb.corr.append(max(-1.0, min(float(v), 1.0)))

        param_pb.model_name = "HeteroPearson"

        return param_pb
Пример #11
0
    def get_model_param(self):

        model_param = BoostingTreeModelParam()
        model_param.tree_num = len(self.boosting_model_list)
        model_param.tree_dim = self.booster_dim
        model_param.trees_.extend(self.boosting_model_list)
        model_param.init_score.extend(self.init_score)
        model_param.losses.extend(self.history_loss)
        model_param.classes_.extend(map(str, self.classes_))
        model_param.num_classes = self.num_classes
        model_param.model_name = consts.HETERO_SBT
        model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration

        feature_importances = list(self.feature_importances_.items())
        feature_importances = sorted(feature_importances,
                                     key=itemgetter(1),
                                     reverse=True)
        feature_importance_param = []
        for (sitename, fid), _importance in feature_importances:
            if consts.GUEST in sitename:
                fullname = self.feature_name_fid_mapping[fid]
            else:
                role_name, party_id = sitename.split(':')
                fullname = generate_anonymous(fid=fid,
                                              party_id=party_id,
                                              role=role_name)
            feature_importance_param.append(
                FeatureImportanceInfo(sitename=sitename,
                                      fid=fid,
                                      importance=_importance,
                                      fullname=fullname))
        model_param.feature_importances.extend(feature_importance_param)

        model_param.feature_name_fid_mapping.update(
            self.feature_name_fid_mapping)

        param_name = "HeteroSecureBoostingTreeGuestParam"

        return param_name, model_param
Пример #12
0
 def encode_col_name_dict(self, col_name_dict: dict, model):
     result = {}
     for x, y in col_name_dict.items():
         col_index = self.col_name_maps.get(x)
         result[anonymous_generator.generate_anonymous(col_index, model=model)] = y
     return result
Пример #13
0
 def encode_col_name_dict(col_name, v, model, col_name_maps: dict):
     col_index = col_name_maps.get(col_name)
     return anonymous_generator.generate_anonymous(col_index,
                                                   model=model), v
Пример #14
0
 def anonymous_format(self, string: str):
     """{role}_{party_id}_{idx}"""
     role, party_id, idx = string.split('_')
     mapping = self.get_mapping(role)
     new_party_id = mapping[int(party_id)]
     return generate_anonymous(idx, new_party_id, role)