class TestHeteroLogisticGradient(unittest.TestCase): def setUp(self): self.paillier_encrypt = PaillierEncrypt() self.paillier_encrypt.generate_key() self.hetero_lr_gradient = HeteroLogisticGradient(self.paillier_encrypt) size = 10 self.wx = eggroll.parallelize([self.paillier_encrypt.encrypt(i) for i in range(size)]) self.en_sum_wx_square = eggroll.parallelize([self.paillier_encrypt.encrypt(np.square(i)) for i in range(size)]) self.w = [i for i in range(size)] self.data_inst = eggroll.parallelize( [Instance(features=[1 for _ in range(size)], label=pow(-1, i % 2)) for i in range(size)], partition=1) # test fore_gradient self.fore_gradient_local = [-0.5, 0.75, 0, 1.25, 0.5, 1.75, 1, 2.25, 1.5, 2.75] # test gradient self.gradient = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.gradient_fit_intercept = [1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125, 1.125] self.loss = 4.505647 def test_compute_fore_gradient(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) fore_gradient_local = [self.paillier_encrypt.decrypt(iterator[1]) for iterator in fore_gradient.collect()] self.assertListEqual(fore_gradient_local, self.fore_gradient_local) def test_compute_gradient(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=False) de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient] self.assertListEqual(de_gradient, self.gradient) gradient = self.hetero_lr_gradient.compute_gradient(self.data_inst, fore_gradient, fit_intercept=True) de_gradient = [self.paillier_encrypt.decrypt(iterator) for iterator in gradient] self.assertListEqual(de_gradient, self.gradient_fit_intercept) def test_compute_gradient_and_loss(self): fore_gradient = self.hetero_lr_gradient.compute_fore_gradient(self.data_inst, self.wx) gradient, loss = self.hetero_lr_gradient.compute_gradient_and_loss(self.data_inst, fore_gradient, self.wx, self.en_sum_wx_square, False) de_gradient = [self.paillier_encrypt.decrypt(i) for i in gradient] self.assertListEqual(de_gradient, self.gradient) diff_loss = np.abs(self.loss - self.paillier_encrypt.decrypt(loss)) self.assertLess(diff_loss, 1e-5)
class HeteroLRHost(BaseLogisticRegression): def __init__(self, logistic_params): # LogisticParamChecker.check_param(logistic_params) super(HeteroLRHost, self).__init__(logistic_params) self.transfer_variable = HeteroLRTransferVariable() self.batch_num = None self.batch_index_list = [] def compute_forward(self, data_instances, coef_, intercept_): wx = self.compute_wx(data_instances, coef_, intercept_) encrypt_operator = self.encrypt_operator host_forward = wx.mapValues(lambda v: (encrypt_operator.encrypt( v), encrypt_operator.encrypt(np.square(v)))) return host_forward def fit(self, data_instances): LOGGER.info("Enter hetero_lr host") self._abnormal_detection(data_instances) self.header = data_instances.schema.get("header") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:" + str(batch_info)) self.batch_size = batch_info["batch_size"] self.batch_num = batch_info["batch_num"] LOGGER.info("Start initialize model.") model_shape = data_overview.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if self.fit_intercept: self.fit_intercept = False self.coef_ = self.initializer.init_model( model_shape, init_params=self.init_param_obj) self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) # set batch_data if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") batch_size = batch_data_index.count() if batch_size < consts.MIN_BATCH_SIZE and batch_size != -1: raise ValueError( "Batch size get from guest should not less than 10, except -1, batch_size is {}" .format(batch_size)) self.batch_index_list.append(batch_data_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train data if len(index_data_inst_map) < self.batch_num: batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] LOGGER.info("batch_data_inst size:{}".format( batch_data_inst.count())) # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data_inst) # compute forward host_forward = self.compute_forward(batch_feat_inst, self.coef_, self.intercept_) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # compute host gradient fore_gradient = federation.get( name=self.transfer_variable.fore_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.fore_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get fore_gradient from guest") if self.gradient_operator is None: self.gradient_operator = HeteroLogisticGradient( self.encrypt_operator) host_gradient = self.gradient_operator.compute_gradient( batch_feat_inst, fore_gradient, fit_intercept=False) # regulation if necessary if self.updater is not None: loss_regular = self.updater.loss_norm(self.coef_) en_loss_regular = self.encrypt_operator.encrypt( loss_regular) federation.remote( en_loss_regular, name=self.transfer_variable.host_loss_regular.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_loss_regular, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_loss_regular to guest") federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") LOGGER.info("update_model") self.update_model(optim_host_gradient) # update local model that transforms features of raw input 'batch_data_inst' training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) # is converge batch_index += 1 # if is_stopped: # break is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter)) def predict(self, data_instances, predict_param=None): LOGGER.info("Start predict ...") data_features = self.transform(data_instances) prob_host = self.compute_wx(data_features, self.coef_, self.intercept_) federation.remote(prob_host, name=self.transfer_variable.host_prob.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_prob), role=consts.GUEST, idx=0) LOGGER.info("Remote probability to Host")
class HeteroLRHost(HeteroLRBase): def __init__(self): super(HeteroLRHost, self).__init__() self.batch_num = None self.batch_index_list = [] self.role = consts.HOST def compute_forward(self, data_instances, coef_, intercept_, batch_index=-1): """ Compute W * X + b and (W * X + b)^2, where X is the input data, W is the coefficient of lr, and b is the interception Parameters ---------- data_instances: DTable of Instance, input data coef_: list, coefficient of lr intercept_: float, the interception of lr """ wx = self.compute_wx(data_instances, coef_, intercept_) en_wx = self.encrypted_calculator[batch_index].encrypt(wx) wx_square = wx.mapValues(lambda v: np.square(v)) en_wx_square = self.encrypted_calculator[batch_index].encrypt( wx_square) host_forward = en_wx.join(en_wx_square, lambda wx, wx_square: (wx, wx_square)) # temporary resource recovery and will be removed in the future rubbish_list = [wx, en_wx, wx_square, en_wx_square] rubbish_clear(rubbish_list) return host_forward def fit(self, data_instances): """ Train lr model of role host Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_lr host") self._abnormal_detection(data_instances) self.header = self.get_header(data_instances) public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:" + str(batch_info)) self.batch_size = batch_info["batch_size"] self.batch_num = batch_info["batch_num"] if self.batch_size < consts.MIN_BATCH_SIZE and self.batch_size != -1: raise ValueError( "Batch size get from guest should not less than 10, except -1, batch_size is {}" .format(self.batch_size)) self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(self.batch_num) ] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if self.fit_intercept: self.fit_intercept = False self.coef_ = self.initializer.init_model( model_shape, init_params=self.init_param_obj) self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) # set batch_data if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") self.batch_index_list.append(batch_data_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train data if len(index_data_inst_map) < self.batch_num: batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] LOGGER.info("batch_data_inst size:{}".format( batch_data_inst.count())) # transforms features of raw input 'batch_data_inst' into more representative features 'batch_feat_inst' batch_feat_inst = self.transform(batch_data_inst) # compute forward host_forward = self.compute_forward(batch_feat_inst, self.coef_, self.intercept_, batch_index) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # compute host gradient fore_gradient = federation.get( name=self.transfer_variable.fore_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.fore_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get fore_gradient from guest") if self.gradient_operator is None: self.gradient_operator = HeteroLogisticGradient( self.encrypt_operator) host_gradient = self.gradient_operator.compute_gradient( batch_feat_inst, fore_gradient, fit_intercept=False) # regulation if necessary if self.updater is not None: loss_regular = self.updater.loss_norm(self.coef_) en_loss_regular = self.encrypt_operator.encrypt( loss_regular) federation.remote( en_loss_regular, name=self.transfer_variable.host_loss_regular.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_loss_regular, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_loss_regular to guest") federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") LOGGER.info("update_model") self.update_model(optim_host_gradient) # update local model that transforms features of raw input 'batch_data_inst' training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.coef_, **training_info) batch_index += 1 # temporary resource recovery and will be removed in the future rubbish_list = [host_forward, fore_gradient] data_overview.rubbish_clear(rubbish_list) is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter)) def predict(self, data_instances): """ Prediction of lr Parameters ---------- data_instances:DTable of Instance, input data """ LOGGER.info("Start predict ...") data_features = self.transform(data_instances) prob_host = self.compute_wx(data_features, self.coef_, self.intercept_) federation.remote(prob_host, name=self.transfer_variable.host_prob.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_prob), role=consts.GUEST, idx=0) LOGGER.info("Remote probability to Guest")
class HeteroLRHost(BaseLogisticRegression): def __init__(self, logistic_params): super(HeteroLRHost, self).__init__(logistic_params) self.transfer_variable = HeteroLRTransferVariable() self.batch_num = None self.batch_index_list = [] def compute_forward(self, data_instances, coef_, intercept_): wx = self.compute_wx(data_instances, coef_, intercept_) encrypt_operator = self.encrypt_operator host_forward = wx.mapValues(lambda v: (encrypt_operator.encrypt( v), encrypt_operator.encrypt(np.square(v)))) return host_forward def fit(self, data_instances): LOGGER.info("Enter hetero_lr host") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:" + str(batch_info)) self.batch_size = batch_info["batch_size"] self.batch_num = batch_info["batch_num"] LOGGER.info("Start initialize model.") model_shape = self.get_features_shape(data_instances) if self.init_param_obj.fit_intercept: self.init_param_obj.fit_intercept = False if self.fit_intercept: self.fit_intercept = False self.coef_ = self.initializer.init_model( model_shape, init_params=self.init_param_obj) is_stopped = False self.n_iter_ = 0 while self.n_iter_ < self.max_iter: LOGGER.info("iter:" + str(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: # set batch_data if len(self.batch_index_list) < self.batch_num: batch_data_index = federation.get( name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), idx=0) LOGGER.info("Get batch_index from Guest") self.batch_index_list.append(batch_data_index) else: batch_data_index = self.batch_index_list[batch_index] # Get mini-batch train data batch_data_inst = batch_data_index.join( data_instances, lambda g, d: d) # compute forward host_forward = self.compute_forward(batch_data_inst, self.coef_, self.intercept_) federation.remote( host_forward, name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_forward to guest") # compute host gradient fore_gradient = federation.get( name=self.transfer_variable.fore_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.fore_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get fore_gradient from guest") if self.gradient_operator is None: self.gradient_operator = HeteroLogisticGradient( self.encrypt_operator) host_gradient = self.gradient_operator.compute_gradient( data_instances, fore_gradient, fit_intercept=False) # regulation if necessary if self.updater is not None: loss_regular = self.updater.loss_norm(self.coef_) en_loss_regular = self.encrypt_operator.encrypt( loss_regular) federation.remote( en_loss_regular, name=self.transfer_variable.host_loss_regular.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_loss_regular, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote host_loss_regular to guest") federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") # Get optimize host gradient and update model optim_host_gradient = federation.get( name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_host_gradient from arbiter") LOGGER.info("update_model") self.update_model(optim_host_gradient) # is converge is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), idx=0) LOGGER.info( "Get is_stop flag from arbiter:{}".format(is_stopped)) batch_index += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break self.n_iter_ += 1 if is_stopped: break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter)) def predict(self, data_instances, predict_param=None): LOGGER.info("Start predict ...") prob_host = self.compute_wx(data_instances, self.coef_, self.intercept_) federation.remote(prob_host, name=self.transfer_variable.host_prob.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_prob), role=consts.GUEST, idx=0) LOGGER.info("Remote probability to Host")