def record_step_best(self, step_best, host_mask, guest_mask, data_instances, model): metas = {"host_mask": host_mask.tolist(), "guest_mask": guest_mask.tolist(), "score_name": self.score_name} metas["number_in"] = int(sum(host_mask) + sum(guest_mask)) metas["direction"] = self.direction metas["n_count"] = int(self.n_count) host_anonym = [ anonymous_generator.generate_anonymous( fid=i, role='host', model=model) for i in range( len(host_mask))] guest_anonym = [ anonymous_generator.generate_anonymous( fid=i, role='guest', model=model) for i in range( len(guest_mask))] metas["host_features_anonym"] = host_anonym metas["guest_features_anonym"] = guest_anonym model_info = self.models_trained[step_best] loss = model_info.get_loss() ic_val = model_info.get_score() metas["loss"] = loss metas["current_ic_val"] = ic_val metas["fit_intercept"] = model.fit_intercept model_key = model_info.get_key() model_dict = self._get_model(model_key) if self.role != consts.ARBITER: all_features = data_instances.schema.get('header') metas["all_features"] = all_features metas["to_enter"] = self.get_to_enter(host_mask, guest_mask, all_features) model_param = list(model_dict.get('model').values())[0].get( model.model_param_name) param_dict = MessageToDict(model_param) metas["intercept"] = param_dict.get("intercept", None) metas["weight"] = param_dict.get("weight", {}) metas["header"] = param_dict.get("header", []) if self.n_step == 0 and self.direction == "forward": metas["intercept"] = self.intercept self.update_summary_client(model, host_mask, guest_mask, all_features, host_anonym, guest_anonym) else: self.update_summary_arbiter(model, loss, ic_val) metric_name = f"stepwise_{self.n_step}" metric = [Metric(metric_name, float(self.n_step))] model.callback_metric(metric_name=metric_name, metric_namespace=self.metric_namespace, metric_data=metric) model.tracker.set_metric_meta(metric_name=metric_name, metric_namespace=self.metric_namespace, metric_meta=MetricMeta(name=metric_name, metric_type=self.metric_type, extra_metas=metas)) LOGGER.info(f"metric_name: {metric_name}, metas: {metas}") return
def get_model_param(self): model_param = BoostingTreeModelParam() model_param.tree_num = len(self.boosting_model_list) model_param.tree_dim = self.booster_dim model_param.trees_.extend(self.boosting_model_list) anonymous_name_mapping = {} party_id = self.component_properties.local_partyid for fid, name in self.feature_name_fid_mapping.items(): anonymous_name_mapping[generate_anonymous( fid, role=consts.HOST, party_id=party_id, )] = name model_param.anonymous_name_mapping.update(anonymous_name_mapping) model_param.feature_name_fid_mapping.update( self.feature_name_fid_mapping) model_param.model_name = consts.HETERO_SBT model_param.anonymous_name_mapping.update(anonymous_name_mapping) model_param.feature_name_fid_mapping.update( self.feature_name_fid_mapping) model_param.model_name = consts.HETERO_SBT model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration param_name = "HeteroSecureBoostingTreeHostParam" return param_name, model_param
def encode_col_name_list(self, col_name_list: list, model): result = [] for x in col_name_list: col_index = self.col_name_maps.get(x) result.append( anonymous_generator.generate_anonymous(col_index, model=model)) return result
def feature_importance_with_anonymous_converter(model_meta, model_param): # extract feature importance from model param fid_mapping = dict(model_param.feature_name_fid_mapping) feat_importance_list = list(model_param.feature_importances) guest_fids = list(fid_mapping.keys()) guest_cols, guest_val = [], [] # key is int party id, value is a dict, which has two key: col_name and value host_side_data = {} for feat_importance in feat_importance_list: fid = feat_importance.fid importance = feat_importance.importance site_name = feat_importance.sitename site_name = site_name.split(':') if site_name[0] == consts.HOST: host_id = int(site_name[1]) if host_id not in host_side_data: host_side_data[host_id] = {'col_name': [], 'value': []} host_col_name = generate_anonymous(fid, host_id, role=consts.HOST) host_side_data[host_id]['col_name'].append(host_col_name) host_side_data[host_id]['value'].append(importance) else: guest_cols.append(fid_mapping[fid]) guest_val.append(importance) for fid in guest_fids: if fid_mapping[fid] not in guest_cols: guest_cols.append(fid_mapping[fid]) guest_val.append(0) host_party_ids = [] host_values = [] host_col_names = [] for hid in host_side_data: host_party_ids.append(hid) host_values.append(host_side_data[hid]['value']) host_col_names.append(host_side_data[hid]['col_name']) single_info = isometric_model.SingleMetricInfo( values=np.array(guest_val), col_names=guest_cols, host_party_ids=host_party_ids, host_values=host_values, host_col_names=host_col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.FEATURE_IMPORTANCE, metric_info=single_info) return result
def _get_param(self): LOGGER.debug( "curt_select_properties.left_col_name: {}, completed_selection_result: {}" .format(self.curt_select_properties.left_col_names, self.completed_selection_result.all_left_col_names)) LOGGER.debug("Length of left cols: {}".format( len(self.completed_selection_result.all_left_col_names))) # left_cols = {x: True for x in self.curt_select_properties.left_col_names} left_cols = { x: True for x in self.completed_selection_result.all_left_col_names } final_left_cols = feature_selection_param_pb2.LeftCols( original_cols=self.completed_selection_result.get_select_col_names( ), left_cols=left_cols) host_col_names = [] if self.role == consts.GUEST: for host_id, this_host_name in enumerate( self.completed_selection_result.get_host_sorted_col_names( )): party_id = self.component_properties.host_party_idlist[host_id] LOGGER.debug( "In _get_param, this_host_name: {}, party_id: {}".format( this_host_name, party_id)) host_col_names.append( feature_selection_param_pb2.HostColNames( col_names=this_host_name, party_id=str(party_id))) else: party_id = self.component_properties.local_partyid anonymous_names = [ anonymous_generator.generate_anonymous(fid, model=self) for fid in range(len(self.header)) ] host_col_names.append( feature_selection_param_pb2.HostColNames( col_names=anonymous_names, party_id=str(party_id))) result_obj = feature_selection_param_pb2.FeatureSelectionParam( results=self.completed_selection_result.filter_results, final_left_cols=final_left_cols, col_names=self.completed_selection_result.get_sorted_col_names(), host_col_names=host_col_names, header=self.curt_select_properties.header) # json_result = json_format.MessageToJson(result_obj) # LOGGER.debug("json_result: {}".format(json_result)) return result_obj
def get_model_param(self): model_param = BoostingTreeModelParam() model_param.tree_num = len(self.boosting_model_list) model_param.tree_dim = self.booster_dim model_param.trees_.extend(self.boosting_model_list) model_param.init_score.extend(self.init_score) model_param.losses.extend(self.history_loss) model_param.classes_.extend(map(str, self.classes_)) model_param.num_classes = self.num_classes if self.boosting_strategy == consts.STD_TREE: model_param.model_name = consts.HETERO_SBT elif self.boosting_strategy == consts.LAYERED_TREE: model_param.model_name = consts.HETERO_FAST_SBT_LAYERED elif self.boosting_strategy == consts.MIX_TREE: model_param.model_name = consts.HETERO_FAST_SBT_MIX model_param.best_iteration = self.callback_variables.best_iteration feature_importances = list(self.feature_importances_.items()) feature_importances = sorted(feature_importances, key=itemgetter(1), reverse=True) feature_importance_param = [] for (sitename, fid), importance in feature_importances: if consts.GUEST in sitename: fullname = self.feature_name_fid_mapping[fid] else: role_name, party_id = sitename.split(':') fullname = generate_anonymous(fid=fid, party_id=party_id, role=role_name) feature_importance_param.append( FeatureImportanceInfo( sitename=sitename, # sitename to distinguish sites fid=fid, importance=importance.importance, fullname=fullname, importance2=importance.importance_2, main=importance.main_type)) model_param.feature_importances.extend(feature_importance_param) model_param.feature_name_fid_mapping.update( self.feature_name_fid_mapping) model_param.tree_plan.extend(plan.encode_plan(self.tree_plan)) param_name = consts.HETERO_SBT_GUEST_MODEL + "Param" return param_name, model_param
def _sync_select_info(self, suffix): if not self.select_federated: return if self.role == consts.GUEST: assert isinstance(self.sync_obj, selection_info_sync.Guest) self.host_selection_properties = self.sync_obj.sync_select_cols(suffix=suffix) else: encoded_names = [] for col_name in self.selection_properties.select_col_names: fid = self.selection_properties.col_name_maps[col_name] encoded_names.append(anonymous_generator.generate_anonymous( fid=fid, role=self.role, party_id=self.party_id )) LOGGER.debug(f"Before send, encoded_names: {encoded_names}," f"select_names: {self.selection_properties.select_col_names}") self.sync_obj.sync_select_cols(encoded_names, suffix=suffix)
def get_model_param(self): model_param = BoostingTreeModelParam() model_param.tree_num = len(self.boosting_model_list) model_param.tree_dim = self.booster_dim model_param.trees_.extend(self.boosting_model_list) anonymous_name_mapping = {} party_id = self.component_properties.local_partyid for fid, name in self.feature_name_fid_mapping.items(): anonymous_name_mapping[generate_anonymous( fid, role=consts.HOST, party_id=party_id, )] = name model_param.anonymous_name_mapping.update(anonymous_name_mapping) model_param.feature_name_fid_mapping.update( self.feature_name_fid_mapping) if self.boosting_strategy == consts.STD_TREE: model_param.model_name = consts.HETERO_SBT elif self.boosting_strategy == consts.LAYERED_TREE: model_param.model_name = consts.HETERO_FAST_SBT_LAYERED elif self.boosting_strategy == consts.MIX_TREE: model_param.model_name = consts.HETERO_FAST_SBT_MIX model_param.best_iteration = self.callback_variables.best_iteration model_param.tree_plan.extend(plan.encode_plan(self.tree_plan)) feature_importances = list(self.feature_importances_.items()) feature_importances = sorted(feature_importances, key=itemgetter(1), reverse=True) feature_importance_param = [] LOGGER.debug('host feat importance is {}'.format(feature_importances)) for fid, importance in feature_importances: feature_importance_param.append( FeatureImportanceInfo( sitename=consts.HOST_LOCAL, # host local feat fid=fid, importance=importance.importance, fullname=self.feature_name_fid_mapping[fid], main=importance.main_type)) model_param.feature_importances.extend(feature_importance_param) param_name = "HeteroSecureBoostingTreeHostParam" return param_name, model_param
def make_readable_feature_importance(fid_mapping, feature_importances): """ replace feature id by real feature name """ new_fi = {} for id_ in feature_importances: if type(id_) == tuple: if consts.GUEST in id_[0]: new_fi[fid_mapping[id_[1]]] = feature_importances[id_].importance else: role, party_id = id_[0].split(':') new_fi[generate_anonymous(role=role, fid=id_[1], party_id=party_id)] = feature_importances[id_].importance else: new_fi[fid_mapping[id_]] = feature_importances[id_].importance return new_fi
def _get_param(self): from federatedml.protobuf.generated import pearson_model_param_pb2 param_pb = pearson_model_param_pb2.PearsonModelParam() # local param_pb.party = f"({self.local_party.role},{self.local_party.party_id})" param_pb.shape = self.local_corr.shape[0] for v in self.local_corr.reshape(-1): param_pb.local_corr.append(max(-1.0, min(float(v), 1.0))) for idx, name in enumerate(self.names): param_pb.names.append(name) anonymous = param_pb.anonymous_map.add() anonymous.name = name anonymous.anonymous = generate_anonymous( fid=idx, party_id=self.local_party.party_id, role=self.local_party.role) if self.model_param.calc_local_vif: for vif_value in self.local_vif: param_pb.local_vif.append(vif_value) # global for shape, party in zip(self.shapes, self.parties): param_pb.shapes.append(shape) param_pb.parties.append(f"({party.role},{party.party_id})") _names = param_pb.all_names.add() if party == self.local_party: for name in self.names: _names.names.append(name) else: for i in range(shape): _names.names.append(f"{party.role}_{party.party_id}_{i}") if self.model_param.cross_parties: for v in self.corr.reshape(-1): param_pb.corr.append(max(-1.0, min(float(v), 1.0))) param_pb.model_name = "HeteroPearson" return param_pb
def get_model_param(self): model_param = BoostingTreeModelParam() model_param.tree_num = len(self.boosting_model_list) model_param.tree_dim = self.booster_dim model_param.trees_.extend(self.boosting_model_list) model_param.init_score.extend(self.init_score) model_param.losses.extend(self.history_loss) model_param.classes_.extend(map(str, self.classes_)) model_param.num_classes = self.num_classes model_param.model_name = consts.HETERO_SBT model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration feature_importances = list(self.feature_importances_.items()) feature_importances = sorted(feature_importances, key=itemgetter(1), reverse=True) feature_importance_param = [] for (sitename, fid), _importance in feature_importances: if consts.GUEST in sitename: fullname = self.feature_name_fid_mapping[fid] else: role_name, party_id = sitename.split(':') fullname = generate_anonymous(fid=fid, party_id=party_id, role=role_name) feature_importance_param.append( FeatureImportanceInfo(sitename=sitename, fid=fid, importance=_importance, fullname=fullname)) model_param.feature_importances.extend(feature_importance_param) model_param.feature_name_fid_mapping.update( self.feature_name_fid_mapping) param_name = "HeteroSecureBoostingTreeGuestParam" return param_name, model_param
def encode_col_name_dict(self, col_name_dict: dict, model): result = {} for x, y in col_name_dict.items(): col_index = self.col_name_maps.get(x) result[anonymous_generator.generate_anonymous(col_index, model=model)] = y return result
def encode_col_name_dict(col_name, v, model, col_name_maps: dict): col_index = col_name_maps.get(col_name) return anonymous_generator.generate_anonymous(col_index, model=model), v
def anonymous_format(self, string: str): """{role}_{party_id}_{idx}""" role, party_id, idx = string.split('_') mapping = self.get_mapping(role) new_party_id = mapping[int(party_id)] return generate_anonymous(idx, new_party_id, role)