def predict_f_value(self, data_inst): LOGGER.info("predict tree f value, there are {} trees".format(len(self.trees_))) tree_dim = self.tree_dim init_score = self.init_score self.F = data_inst.mapValues(lambda v: init_score) rounds = len(self.trees_) // self.tree_dim for i in range(rounds): for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeGuest(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) predict_data = tree_inst.predict(data_inst) self.update_f_value(new_f=predict_data, tidx=tidx)
def predict_f_value(self, data_inst, cache_dataset_key): LOGGER.info("predict tree f value, there are {} trees".format( len(self.trees_))) init_score = self.init_score last_round = self.predict_data_cache.predict_data_last_round( cache_dataset_key) LOGGER.debug("jyp last_round is {}".format(last_round)) rounds = len(self.trees_) // self.tree_dim if last_round == -1: self.predict_F = data_inst.mapValues(lambda v: init_score) else: LOGGER.debug("hit cache, cached round is {}".format(last_round)) if last_round >= rounds - 1: LOGGER.debug( "predict data cached, rounds is {}, total cached round is {}" .format(rounds, last_round)) self.predict_F = self.predict_data_cache.predict_data_at( cache_dataset_key, min(rounds - 1, last_round)) self.sync_predict_start_round(last_round + 1) # LOGGER.debug("jyp self.predict_F is {}".format(self.predict_F)) # LOGGER.debug("jyp self.predict_F.collect() is {}".format(self.predict_F.collect())) # LOGGER.debug("jyp self.predict_F.count() is {}".format(self.predict_F.count())) # LOGGER.debug("jyp self.predict_F.first() is {}".format(self.predict_F.first())) for i in range(last_round + 1, rounds): for tidx in range(self.tree_dim): tree_inst = HeteroDecisionTreeGuest(self.tree_param) tree_inst.load_model(self.tree_meta, self.trees_[i * self.tree_dim + tidx]) # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx]) tree_inst.set_flowid(self.generate_flowid(i, tidx)) tree_inst.set_runtime_idx( self.component_properties.local_partyid) tree_inst.set_host_party_idlist( self.component_properties.host_party_idlist) tree_inst.set_encrypter(self.encrypter) predict_data = tree_inst.predict(data_inst) # LOGGER.debug("jyp predict_data type is {}".format(type(predict_data))) self.update_f_value(new_f=predict_data, tidx=tidx, mode="predict") self.predict_data_cache.add_data(cache_dataset_key, self.predict_F)