def predict(self, data_inst, predict_param=None): LOGGER.info("start predict") cache_dataset_key = self.predict_data_cache.get_data_key(data_inst) if cache_dataset_key in self.data_alignment_map: data_inst = self.data_alignment_map[cache_dataset_key] else: data_inst = self.data_alignment(data_inst) header = [None] * len(self.feature_name_fid_mapping) for idx, col in self.feature_name_fid_mapping.items(): header[idx] = col data_inst = data_overview.header_alignment(data_inst, header) self.data_alignment_map[cache_dataset_key] = data_inst rounds = len(self.trees_) // self.tree_dim predict_start_round = self.sync_predict_start_round() for i in range(predict_start_round, rounds): # n_tree = self.trees_[i] for tidx in range(self.tree_dim): LOGGER.info( "start to predict, boost round: {}, tree index: {}".format( i, tidx)) tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx( self.component_properties.local_partyid) tree_inst.predict(data_inst) LOGGER.info("end predict")
def fit(self, data_inst): LOGGER.info("begin to train secureboosting guest model") self.gen_feature_fid_mapping(data_inst.schema) LOGGER.debug("schema is {}".format(data_inst.schema)) data_inst = self.data_alignment(data_inst) self.convert_feature_to_bin(data_inst) self.sync_tree_dim() for i in range(self.num_trees): for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.set_inputinfo(data_bin=self.data_bin, bin_split_points=self.bin_split_points, bin_sparse_points=self.bin_sparse_points) valid_features = self.sample_valid_features() tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx(self.runtime_idx) tree_inst.set_valid_features(valid_features) tree_inst.fit() tree_meta, tree_param = tree_inst.get_model() self.trees_.append(tree_param) if self.tree_meta is None: self.tree_meta = tree_meta if self.n_iter_no_change is True: stop_flag = self.sync_stop_flag(i) if stop_flag: break LOGGER.info("end to train secureboosting guest model")
def fit(self, data_inst, validate_data=None): LOGGER.info("begin to train secureboosting guest model") self.gen_feature_fid_mapping(data_inst.schema) LOGGER.debug("schema is {}".format(data_inst.schema)) data_inst = self.data_alignment(data_inst) self.convert_feature_to_bin(data_inst) self.sync_tree_dim() self.validation_strategy = self.init_validation_strategy( data_inst, validate_data) for i in range(self.num_trees): # n_tree = [] for tidx in range(self.tree_dim): LOGGER.info( "start to fit, boost round: {}, tree index: {}".format( i, tidx)) tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.set_inputinfo( data_bin=self.data_bin, bin_split_points=self.bin_split_points, bin_sparse_points=self.bin_sparse_points) valid_features = self.sample_valid_features() tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx( self.component_properties.local_partyid) tree_inst.set_valid_features(valid_features) tree_inst.fit() tree_meta, tree_param = tree_inst.get_model() self.trees_.append(tree_param) if self.tree_meta is None: self.tree_meta = tree_meta # n_tree.append(tree_inst.get_tree_model()) # self.trees_.append(n_tree) if self.validation_strategy: LOGGER.debug('host running validation') self.validation_strategy.validate(self, i) if self.validation_strategy.need_stop(): LOGGER.debug('early stopping triggered') break if self.n_iter_no_change is True: stop_flag = self.sync_stop_flag(i) if stop_flag: break if self.validation_strategy and self.validation_strategy.has_saved_best_model( ): self.load_model(self.validation_strategy.cur_best_model) LOGGER.info("end to train secureboosting guest model")
def predict_f_value(self, data_inst, predict_start_round, rounds): for i in range(predict_start_round, rounds): # n_tree = self.trees_[i] for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx( self.component_properties.local_partyid) tree_inst.predict(data_inst)
def predict(self, data_inst, predict_param=None): LOGGER.info("start predict") data_inst = self.data_alignment(data_inst) rounds = len(self.trees_) // self.tree_dim for i in range(rounds): for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx(self.runtime_idx) tree_inst.predict(data_inst) LOGGER.info("end predict")
def predict(self, data_inst, predict_param=None): LOGGER.info("start predict") data_inst = self.data_alignment(data_inst) rounds = len(self.trees_) // self.tree_dim predict_start_round = self.sync_predict_start_round() for i in range(predict_start_round, rounds): # n_tree = self.trees_[i] for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeHost(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx( self.component_properties.local_partyid) tree_inst.predict(data_inst) LOGGER.info("end predict")