def get_model_param(self): model_param = BoostingTreeModelParam() model_param.tree_num = len(self.boosting_model_list) model_param.tree_dim = self.booster_dim model_param.trees_.extend(self.boosting_model_list) model_param.init_score.extend(self.init_score) model_param.losses.extend(self.history_loss) model_param.classes_.extend(map(str, self.classes_)) model_param.num_classes = self.num_classes model_param.model_name = consts.HETERO_SBT model_param.best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration feature_importances = list(self.feature_importances_.items()) feature_importances = sorted(feature_importances, key=itemgetter(1), reverse=True) feature_importance_param = [] for (sitename, fid), importance in feature_importances: if consts.GUEST in sitename: fullname = self.feature_name_fid_mapping[fid] else: role_name, party_id = sitename.split(':') fullname = generate_anonymous(fid=fid, party_id=party_id, role=role_name) feature_importance_param.append(FeatureImportanceInfo(sitename=sitename, fid=fid, importance=importance.importance, fullname=fullname, importance2=importance.importance_2, main=importance.main_type )) model_param.feature_importances.extend(feature_importance_param) LOGGER.debug('feat importance param {}'.format(feature_importance_param)) model_param.feature_name_fid_mapping.update(self.feature_name_fid_mapping) param_name = consts.HETERO_SBT_GUEST_MODEL + "Param" return param_name, model_param
def run(self, data): LOGGER.info("Start repeated id processing.") id_map_federation = self.transfer_variable.id_map_from_guest party_role = consts.HOST if self.repeated_id_owner == consts.HOST: id_map_federation = self.transfer_variable.id_map_from_host party_role = consts.GUEST LOGGER.info("repeated_id_owner:{}".format(self.repeated_id_owner)) original_schema = data.schema if self.repeated_id_owner == self.role: id_map = self.__generate_id_map(data) LOGGER.info("finish generate id_map, id_map:{}".format(id_map)) id_map_federation.remote(id_map, role=party_role, idx=-1) one_feature = data.first() if isinstance(one_feature[1], Instance): data = data.mapValues( lambda v: Instance(features=np.array(v.features[1:], dtype=np.float), label=v.label, inst_id=v.inst_id, weight=v.weight)) else: data = data.mapValues(lambda v: v[1:]) data.schema = original_schema if data.schema.get('header') is not None: data.schema['header'] = data.schema['header'][1:] else: id_map = id_map_federation.get(idx=0) LOGGER.info("Get id_map from owner.") data = data.flatMap(functools.partial(self.__func_restructure_id, id_map=id_map)) data.schema = original_schema LOGGER.info("Finish repeated id process for owner") return data
def convert(self, model_meta, model_param): local_vif = model_param.local_vif col_names = list(model_param.names) local_corr = np.array(model_param.local_corr).reshape( model_param.shape, model_param.shape) from federatedml.util import LOGGER for idx in range(local_corr.shape[0]): corr_col = local_corr[idx, :] LOGGER.debug(f"local_col_idx: {idx}, corr_col: {corr_col}") if model_param.corr: corr = np.array(model_param.corr).reshape(*model_param.shapes) for idx in range(corr.shape[1]): corr_col = corr[:, idx] LOGGER.debug(f"col_idx: {idx}, corr_col: {corr_col}") host_names = list(list(model_param.all_names)[1].names) parties = list(model_param.parties) else: corr = None host_names = None parties = None pearson_metric = PearsonMetricInfo(local_corr=local_corr, col_names=col_names, corr=corr, host_col_names=host_names, parties=parties) single_info = isometric_model.SingleMetricInfo(values=local_vif, col_names=col_names) result = isometric_model.IsometricModel() result.add_metric_value(metric_name=consts.VIF, metric_info=single_info) result.add_metric_value(metric_name=consts.PEARSON, metric_info=pearson_metric) return result
def transform_data_label(data, label_encoder): data_type = data.schema.get("content_type") if data_type == "cluster_result": return data.mapValues( lambda v: LabelTransformer.replace_predict_label_cluster( v, label_encoder)) elif data_type == "predict_result": predict_detail = data.first()[1].features[3] if predict_detail == 1 and list( predict_detail.keys())[0] == "label": LOGGER.info( f"Regression prediction result provided. Original data returned." ) return data return data.mapValues(lambda v: LabelTransformer. replace_predict_label(v, label_encoder)) elif data_type is None: return data.mapValues(lambda v: LabelTransformer. replace_instance_label(v, label_encoder)) else: raise ValueError( f"unknown data type: {data_type} encountered. Label transform aborted." )
def backward(self, output_gradient, epoch, batch): LOGGER.debug( "interactive layer start backward propagation of epoch {} batch {}" .format(epoch, batch)) activation_backward = self.host_model.backward_activation()[0] activation_gradient = output_gradient * activation_backward LOGGER.debug( "interactive layer update guest weight of epoch {} batch {}". format(epoch, batch)) guest_input_gradient = self.update_guest(activation_gradient) host_weight_gradient, acc_noise = self.backward_interactive( activation_gradient, epoch, batch) host_input_gradient = self.update_host(activation_gradient, host_weight_gradient, acc_noise) self.send_host_backward_to_host(host_input_gradient.get_obj(), epoch, batch) return guest_input_gradient
def mini_batch_data_generator(self, result='data'): """ Generate mini-batch data or index Parameters ---------- result : str, 'data' or 'index', default: 'data' Specify you want batch data or batch index. Returns ------- A generator that might generate data or index. """ LOGGER.debug("Currently, batch_num is: {}".format(self.batch_nums)) if result == 'index': for index_table in self.all_index_data: yield index_table elif result == "data": for batch_data in self.all_batch_data: yield batch_data else: for batch_data, index_table in zip(self.all_batch_data, self.all_index_data): yield batch_data, index_table
def check(self): descr = "secure information retrieval param's " self.check_decimal_float(self.security_level, descr + "security_level") self.oblivious_transfer_protocol = self.check_and_change_lower(self.oblivious_transfer_protocol, [consts.OT_HAUCK.lower()], descr + "oblivious_transfer_protocol") self.commutative_encryption = self.check_and_change_lower(self.commutative_encryption, [consts.CE_PH.lower()], descr + "commutative_encryption") self.non_committing_encryption = self.check_and_change_lower(self.non_committing_encryption, [consts.AES.lower()], descr + "non_committing_encryption") if self._warn_to_deprecate_param("key_size", descr, "dh_param's key_length"): self.dh_params.key_length = self.key_size self.dh_params.check() if self._warn_to_deprecate_param("raw_retrieval", descr, "dh_param's security_level = 0"): self.check_boolean(self.raw_retrieval, descr) if not isinstance(self.target_cols, list): self.target_cols = [self.target_cols] for col in self.target_cols: self.check_string(col, descr + "target_cols") if len(self.target_cols) == 0: LOGGER.warning(f"Both 'target_cols' and 'target_indexes' are empty. Label will be retrieved.")
def __generate_id_map(self, data) -> dict: if not self.repeated_id_owner: LOGGER.warning("Not a repeated id owner, will not generate id map") return {} one_feature = data.first() if isinstance(one_feature[1], Instance): data = data.mapValues(lambda v: v.features[0]) else: data = data.mapValues(lambda v: v[0]) local_data = data.collect() all_id_map = defaultdict(list) final_id_map = {} for _data in local_data: all_id_map[str(_data[1])].append(_data[0]) for k, v in all_id_map.items(): if len(v) >= 2: final_id_map[k] = v return final_id_map
def _func(*args, **kwargs): input_with_inst_id = None all_args = [] all_args.extend(args) all_args.extend(kwargs.values()) for arg in all_args: if is_table(arg): input_with_inst_id = check_with_inst_id(arg) break result = func(*args, **kwargs) if input_with_inst_id is not None and is_table(result): if check_is_instance(result): result_with_inst_id = check_with_inst_id(result) LOGGER.debug( f"Input with match id: {input_with_inst_id} -> output with match id: {result_with_inst_id}" ) if input_with_inst_id and not result_with_inst_id: raise EnvironmentError( f"Input with match id: {input_with_inst_id} -> output with match id: {result_with_inst_id}," f"func: {func}") return result
def get_intersect_doubly_encrypted_id(self, data_instances): self._sync_commutative_cipher_public_knowledge() self.commutative_cipher.init() # 1st ID encrypt: (Eh, (h, Instance)) self.id_list_local_first = self._encrypt_id( data_instances, self.commutative_cipher, reserve_original_key=True, hash_operator=self.hash_operator, salt=self.salt, reserve_original_value=True) LOGGER.info("encrypted local id for the 1st time") # send (Eh, -1), get (Eg, -1) id_list_remote_first = self._exchange_id_list(self.id_list_local_first) # 2nd ID encrypt & send doubly encrypted guest ID list to guest id_list_remote_second = self._encrypt_id( id_list_remote_first, self.commutative_cipher, reserve_original_key=True) # (EEg, Eg) LOGGER.info("encrypted guest id for the 2nd time") self._sync_doubly_encrypted_id_list(id_list_remote_second)
def decrypt_intersect_doubly_encrypted_id(self, id_list_intersect_cipher_cipher): # EEi -> Ei from Eg id_list_intersect_cipher = self.get_intersect_cipher( id_list_intersect_cipher_cipher) # find intersect ids: (Ei, original key) encrypt_intersect_ids = [ self.extract_intersect_ids(id_list_intersect_cipher[i], self.id_list_local_first[i]) for i in range(len(self.id_list_local_first)) ] # map encrypted intersect ids to original ids intersect_ids = self.filter_intersect_ids(encrypt_intersect_ids, keep_encrypt_ids=True) LOGGER.info(f"intersection found") if self.sync_intersect_ids: self.send_intersect_ids(intersect_ids) else: LOGGER.info("Skip sync intersect ids with Host(s).") return intersect_ids
def _init_model(self, boosting_param: BoostingParam): self.task_type = boosting_param.task_type self.objective_param = boosting_param.objective_param self.learning_rate = boosting_param.learning_rate self.boosting_round = boosting_param.num_trees self.n_iter_no_change = boosting_param.n_iter_no_change self.tol = boosting_param.tol self.bin_num = boosting_param.bin_num self.predict_param = boosting_param.predict_param self.cv_param = boosting_param.cv_param self.validation_freqs = boosting_param.validation_freqs self.metrics = boosting_param.metrics self.subsample_feature_rate = boosting_param.subsample_feature_rate self.binning_error = boosting_param.binning_error if boosting_param.random_seed is not None: self.random_seed = boosting_param.random_seed # initialize random seed here LOGGER.debug('setting random seed done, random seed is {}'.format( self.random_seed)) np.random.seed(self.random_seed)
def _init_model(self, params: FeatureBinningParam): self.model_param = params self.transform_type = self.model_param.transform_param.transform_type if self.model_param.method == consts.QUANTILE: self.binning_obj = QuantileBinning(self.model_param) elif self.model_param.method == consts.BUCKET: self.binning_obj = BucketBinning(self.model_param) elif self.model_param.method == consts.OPTIMAL: if self.role == consts.HOST: self.model_param.bin_num = self.model_param.optimal_binning_param.init_bin_nums self.binning_obj = QuantileBinning(self.model_param) else: self.binning_obj = OptimalBinning(self.model_param) else: # self.binning_obj = QuantileBinning(self.bin_param) raise ValueError("Binning method: {} is not supported yet".format( self.model_param.method)) LOGGER.debug("in _init_model, role: {}, local_partyid: {}".format( self.role, self.component_properties)) self.binning_obj.set_role_party( self.role, self.component_properties.local_partyid)
def compute_best_splits(self, cur_to_split_nodes, node_map, dep, batch_idx): acc_histograms = self.get_local_histograms(dep, self.data_with_node_assignments, self.grad_and_hess, None, cur_to_split_nodes, node_map, ret='tensor', hist_sub=False) best_split_info_guest = self.splitter.find_split(acc_histograms, self.valid_features, self.data_bin.partitions, self.sitename, self.use_missing, self.zero_as_missing) LOGGER.debug('computing local splits done') if self.complete_secure_tree: return best_split_info_guest self.federated_find_split(dep, batch_idx) host_split_info = self.sync_final_split_host(dep, batch_idx) # compare host best split points with guest split points cur_best_split = self.merge_splitinfo(splitinfo_guest=best_split_info_guest, splitinfo_host=host_split_info, merge_host_split_only=False) return cur_best_split
def convert_bin_to_real(self): LOGGER.info("convert tree node bins to real value") split_nid_used = [] for i in range(len(self.tree_node)): if self.tree_node[i].is_leaf is True: continue if self.tree_node[i].sitename == self.sitename: fid = self.decode("feature_idx", self.tree_node[i].fid, split_maskdict=self.split_maskdict) bid = self.decode("feature_val", self.tree_node[i].bid, self.tree_node[i].id, self.split_maskdict) LOGGER.debug("shape of bin_split_points is {}".format( len(self.bin_split_points[fid]))) real_splitval = self.encode("feature_val", self.bin_split_points[fid][bid], self.tree_node[i].id) self.tree_node[i].bid = real_splitval split_nid_used.append(self.tree_node[i].id) self.remove_duplicated_split_nodes(split_nid_used)
def _get_param(self): header = self.header LOGGER.debug("In get_param, header: {}".format(header)) if header is None: param_protobuf_obj = poisson_model_param_pb2.PoissonModelParam( best_iteration=-1) return param_protobuf_obj weight_dict = {} for idx, header_name in enumerate(header): coef_i = self.model_weights.coef_[idx] weight_dict[header_name] = coef_i intercept_ = self.model_weights.intercept_ best_iteration = -1 if self.validation_strategy is None else self.validation_strategy.best_iteration param_protobuf_obj = poisson_model_param_pb2.PoissonModelParam( iters=self.n_iter_, loss_history=self.loss_history, is_converged=self.is_converged, weight=weight_dict, intercept=intercept_, header=header, best_iteration=best_iteration) return param_protobuf_obj
def get_default_target_framework(model_contents: dict, module_name: str): """ Returns the name of a supported ML framework based on the original FATE model module name and model contents. :param model_contents: the model content of the FATE model :param module_name: The module name, typically as HomoXXXX. :return: the corresponding framework name that this model can be converted to. """ framework_name = None if module_name == "HomoLR": framework_name = "sklearn" elif module_name == 'HomoNN': if model_contents['HomoNNModelMeta'].params.config_type == "pytorch": framework_name = "pytorch" else: framework_name = "tf_keras" elif module_name.lower() == 'homosecureboost': framework_name = 'lightgbm' else: LOGGER.debug( f"Module {module_name} is not a supported homogeneous model") return framework_name
def predict(self, data_inst): LOGGER.info('running prediction') processed_data = self.data_and_header_alignment(data_inst) predict_start_round = self.sync_predict_start_round() rounds = len(self.boosting_model_list) // self.booster_dim trees = [] for idx in range(predict_start_round, rounds): for booster_idx in range(self.booster_dim): tree = self.load_booster( self.booster_meta, self.boosting_model_list[idx * self.booster_dim + booster_idx], idx, booster_idx) trees.append(tree) # if len(trees) == 0: # LOGGER.info('no tree for predicting, prediction done') # return self.boosting_fast_predict(processed_data, trees=trees)
def display_cv_result(self, cv_results): LOGGER.debug("cv_result: {}".format(cv_results)) if self.role == consts.GUEST or (self.role == consts.HOST and self.mode == consts.H**O): format_cv_result = {} for eval_result in cv_results: for eval_name, eval_r in eval_result.items(): if not isinstance(eval_r, list): if eval_name not in format_cv_result: format_cv_result[eval_name] = [] format_cv_result[eval_name].append(eval_r) else: for e_r in eval_r: e_name = "{}_thres_{}".format(eval_name, e_r[0]) if e_name not in format_cv_result: format_cv_result[e_name] = [] format_cv_result[e_name].append(e_r[1]) for eval_name, eva_result_list in format_cv_result.items(): mean_value = np.around(np.mean(eva_result_list), 4) std_value = np.around(np.std(eva_result_list), 4) LOGGER.info("{},evaluate name: {}, mean: {}, std: {}".format( self.role, eval_name, mean_value, std_value))
def fast_homo_tree_predict(self, data_inst): LOGGER.info('running fast h**o tree predict') to_predict_data = self.data_and_header_alignment(data_inst) tree_list = [] rounds = len(self.boosting_model_list) // self.booster_dim for idx in range(0, rounds): for booster_idx in range(self.booster_dim): model = self.load_booster( self.booster_meta, self.boosting_model_list[idx * self.booster_dim + booster_idx], idx, booster_idx) tree_list.append(model) func = functools.partial(self.predict_helper, tree_list=tree_list, init_score=self.init_score, zero_as_missing=self.zero_as_missing, use_missing=self.use_missing, learning_rate=self.learning_rate, class_num=self.booster_dim) predict_rs = to_predict_data.mapValues(func) return self.score_to_predict_result(data_inst, predict_rs)
def compute_gradient_procedure(self, *args): data_instances = args[0] encrypted_calculator = args[1] model_weights = args[2] optimizer = args[3] self.batch_index = args[5] self.n_iter = args[4] cipher_operator = encrypted_calculator[0].encrypter # one_data = data_instances.first() # LOGGER.debug("data shape: {}, model weights shape: {}, model weights coef: {}, intercept: {}".format( # one_data[1].features.shape, model_weights.unboxed.shape, model_weights.coef_, model_weights.intercept_ # )) gradient_results = self.gradient_computer.compute_gradient_procedure( *args) self._update_w_tilde(model_weights) if self.iter_k % self.update_interval_L == 0: self.count_t += 1 # LOGGER.debug("Before division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) self.this_w_tilde /= self.update_interval_L # LOGGER.debug("After division, this_w_tilde: {}".format(self.this_w_tilde.unboxed)) if self.count_t > 0: LOGGER.info( "iter_k: {}, count_t: {}, start to update hessian".format( self.iter_k, self.count_t)) self._update_hessian(data_instances, optimizer, cipher_operator) self.last_w_tilde = self.this_w_tilde self.this_w_tilde = LinearModelWeights( np.zeros_like(self.last_w_tilde.unboxed), self.last_w_tilde.fit_intercept) # LOGGER.debug("After replace, last_w_tilde: {}, this_w_tilde: {}".format(self.last_w_tilde.unboxed, # self.this_w_tilde.unboxed)) return gradient_results
def _evaluate_clustering_metrics(self, mode, data): eval_result = defaultdict(list) rs0, rs1, run_outer_metric = self._clustering_extract(data) if rs0 is None and rs1 is None: # skip evaluation computation if get this input format LOGGER.debug( 'skip computing, this clustering format is not for metric computation' ) return eval_result if not run_outer_metric: no_label = set(rs0) == {None} if no_label: LOGGER.debug( 'no label found in clustering result, skip metric computation' ) return eval_result for eval_metric in self.metrics: # if input format and required metrics matches ? XNOR if not ((not (eval_metric in self.clustering_intra_metric_list) and not run_outer_metric) + ((eval_metric in self.clustering_intra_metric_list) and run_outer_metric)): LOGGER.warning( 'input data format does not match current clustering metric: {}' .format(eval_metric)) continue LOGGER.debug('clustering_metrics is {}'.format(eval_metric)) if run_outer_metric: if eval_metric == consts.DISTANCE_MEASURE: res = getattr(self.metric_interface, eval_metric)(rs0['avg_dist'], rs1, rs0['max_radius']) else: res = getattr(self.metric_interface, eval_metric)(rs0['avg_dist'], rs1) else: res = getattr(self.metric_interface, eval_metric)(rs0, rs1) eval_result[eval_metric].append(mode) eval_result[eval_metric].append(res) return eval_result
def train_and_get_backward_gradient(self, x, y): LOGGER.debug("top model start to forward propagation") selective_id = [] input_gradient = [] if self.selector: losses = self._model.get_forward_loss_from_input(x, y) loss = sum(losses) / len(losses) selective_strategy = self.selector.select_batch_sample(losses) for idx, select in enumerate(selective_strategy): if select: selective_id.append(idx) self.batch_data_cached_X.append(x[idx]) self.batch_data_cached_y.append(y[idx]) if len(self.batch_data_cached_X) >= self.batch_size: data = self.data_converter.convert_data( np.array(self.batch_data_cached_X[:self.batch_size]), np.array(self.batch_data_cached_y[:self.batch_size])) input_gradient = self._model.get_input_gradients( np.array(self.batch_data_cached_X[:self.batch_size]), np.array(self.batch_data_cached_y[:self.batch_size]))[0] self._model.train(data) self.batch_data_cached_X = self.batch_data_cached_X[ self.batch_size:] self.batch_data_cached_y = self.batch_data_cached_y[ self.batch_size:] else: input_gradient = self._model.get_input_gradients(x, y)[0] data = self.data_converter.convert_data(x, y) self._model.train(data) loss = self._model.get_loss()[0] return selective_id, input_gradient, loss
def fit(self, data_inst, validate_data=None): self.validation_strategy = self.init_validation_strategy( data_inst, validate_data) self._build_model() self.prepare_batch_data(self.batch_generator, data_inst) cur_epoch = 0 while cur_epoch < self.epochs: for batch_idx in range(len(self.data_x)): self.model.train(self.data_x[batch_idx], cur_epoch, batch_idx) self.reset_flowid() self.model.evaluate(self.data_x[batch_idx], cur_epoch, batch_idx) self.recovery_flowid() if self.validation_strategy: self.validation_strategy.validate(self, cur_epoch) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break is_converge = self.transfer_variable.is_converge.get( idx=0, suffix=(cur_epoch, )) if is_converge: LOGGER.debug( "Training process is converged in epoch {}".format( cur_epoch)) break cur_epoch += 1 if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model)
def transform(self, data): LOGGER.info(f"Enter Column Expand transform") if self.method == consts.MANUAL and len(self.append_header) == 0: LOGGER.info( f"Finish Column Expand transform. Original data returned.") return data new_data, self.header = self._append_column(data) LOGGER.info(f"Finish Column Expand transform") return new_data
def compute_loss(self, data_instances, n_iter_, batch_index, loss_norm=None): ''' Compute hetero linr loss: loss = (1/N)*\sum(wx-y)^2 where y is label, w is model weight and x is features log(wx - y)^2 = (wx_h)^2 + (wx_g - y)^2 + 2*(wx_h + wx_g - y) ''' current_suffix = (n_iter_, batch_index) n = data_instances.count() loss_list = [] host_wx_squares = self.get_host_loss_intermediate(current_suffix) if loss_norm is not None: host_loss_regular = self.get_host_loss_regular( suffix=current_suffix) else: host_loss_regular = [] if len(self.host_forwards) > 1: LOGGER.info("More than one host exist, loss is not available") else: host_forward = self.host_forwards[0] host_wx_square = host_wx_squares[0] wxy_square = self.half_d.mapValues(lambda x: np.square(x)).reduce( reduce_add) loss_gh = self.half_d.join(host_forward, lambda g, h: g * h).reduce(reduce_add) loss = (wxy_square + host_wx_square + 2 * loss_gh) / (2 * n) if loss_norm is not None: loss = loss + loss_norm + host_loss_regular[0] loss_list.append(loss) # LOGGER.debug("In compute_loss, loss list are: {}".format(loss_list)) self.sync_loss_info(loss_list, suffix=current_suffix)
def fit(self, data_inst): LOGGER.debug(f"Enter Hetero {self.role} Data Split fit") if self.need_run is False: return self.param_validator(data_inst) ids = self._get_ids(data_inst) y = self._get_y(data_inst) id_train, id_test_validate, y_train, y_test_validate = self._split( ids, y, test_size=self.test_size + self.validate_size, train_size=self.train_size) validate_size, test_size = DataSplitter.get_train_test_size( self.validate_size, self.test_size) id_validate, id_test, y_validate, y_test = self._split( id_test_validate, y_test_validate, test_size=test_size, train_size=validate_size) train_data, validate_data, test_data = self.split_data( data_inst, id_train, id_validate, id_test) all_metas = {} all_metas = self.callback_count_info(id_train, id_validate, id_test, all_metas) if self.stratified: all_metas = self.callback_label_info(y_train, y_validate, y_test, all_metas) self.callback(all_metas) self.set_summary(all_metas) return [train_data, validate_data, test_data]
def predict(self, data_instances): """ Prediction of Poisson Parameters ---------- data_instances: Table of Instance, input data Returns ---------- Table include input data label, predict results """ LOGGER.info("Start predict ...") self._abnormal_detection(data_instances) header = data_instances.schema.get("header") self.exposure_index = self.get_exposure_index(header, self.exposure_colname) exposure_index = self.exposure_index # OK exposure = data_instances.mapValues( lambda v: HeteroPoissonBase.load_exposure(v, exposure_index)) data_instances = self.align_data_header(data_instances, self.header) pred_guest = self.compute_mu(data_instances, self.model_weights.coef_, self.model_weights.intercept_, exposure) pred_host = self.transfer_variable.host_partial_prediction.get(idx=0) LOGGER.info("Get prediction from Host") pred = pred_guest.join(pred_host, lambda g, h: g * h) # predict_result = data_instances.join(pred, lambda d, p: [d.label, p, p, {"label": p}]) predict_result = self.predict_score_to_output( data_instances=data_instances, predict_score=pred, classes=None) return predict_result
def check(self): descr = "intersect preprocess param's false_positive_rate " self.check_decimal_float(self.false_positive_rate, descr) self.check_positive_number(self.false_positive_rate, descr) if self.false_positive_rate > 0.5: raise ValueError( f"{descr} must be positive float no greater than 0.5") descr = "intersect preprocess param's encrypt_method " self.encrypt_method = self.check_and_change_lower( self.encrypt_method, [consts.RSA], descr) descr = "intersect preprocess param's random_state " if self.random_state: self.check_nonnegative_number(self.random_state, descr) descr = "intersect preprocess param's hash_method " self.hash_method = self.check_and_change_lower(self.hash_method, [ consts.MD5, consts.SHA1, consts.SHA224, consts.SHA256, consts.SHA384, consts.SHA512, consts.SM3 ], descr) descr = "intersect preprocess param's preprocess_salt " self.check_string(self.preprocess_salt, descr) descr = "intersect preprocess param's preprocess_method " self.preprocess_method = self.check_and_change_lower( self.preprocess_method, [ consts.MD5, consts.SHA1, consts.SHA224, consts.SHA256, consts.SHA384, consts.SHA512, consts.SM3 ], descr) descr = "intersect preprocess param's filter_owner " self.filter_owner = self.check_and_change_lower( self.filter_owner, [consts.GUEST, consts.HOST], descr) LOGGER.debug("Finish IntersectPreProcessParam parameter check!") return True
def load_model(self, model_dict): LOGGER.debug(f"Start to load model") if 'model' in model_dict: LOGGER.debug("Loading selection model") self._load_selection_model(model_dict) if 'isometric_model' in model_dict: LOGGER.debug("Loading isometric_model") self._load_isometric_model(model_dict['isometric_model'])