def compute_gradient(self, data_instance, fore_gradient, fit_intercept): feat_join_grad = data_instance.join(fore_gradient, lambda d, g: (d.features, g)) f = functools.partial(self.__compute_gradient, fit_intercept=fit_intercept) gradient_partition = feat_join_grad.mapPartitions(f) gradient = HeteroFederatedAggregator.aggregate_mean(gradient_partition) return gradient
def test_aggregate_add_square(self): res = HeteroFederatedAggregator.aggregate_add_square( self.table_list_a, self.table_list_b, self.table_list_a_square, self.table_list_b_square).collect() res_to_list = [] for iterator in res: res_to_list.append(iterator[1]) res = list(np.sort(np.array(res_to_list))) self.assertListEqual(self.list_add_square_result, res)
def test_aggregate_add(self): table_add_res = HeteroFederatedAggregator.aggregate_add( self.table_a, self.table_b) res = [] for iterater in table_add_res.collect(): res.append(iterater[1]) res = np.sort(np.array(res)) self.assertListEqual(self.add_a_b, list(res))
def compute_gradient_and_loss(self, data_instance, fore_gradient, encrypted_wx, en_sum_wx_square, fit_intercept): # compute gradient gradient = self.compute_gradient(data_instance, fore_gradient, fit_intercept) # compute and loss half_ywx = encrypted_wx.join(data_instance, lambda wx, d: 0.5 * wx * int(d.label)) half_ywx_join_en_sum_wx_square = half_ywx.join(en_sum_wx_square, lambda yz, ez: (yz, ez)) f = functools.partial(self.__compute_loss) loss_partition = half_ywx_join_en_sum_wx_square.mapPartitions(f) loss = HeteroFederatedAggregator.aggregate_mean(loss_partition) return gradient, loss
def fit(self, data_instances=None): """ Train lr model of role arbiter Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_lr_arbiter fit") if data_instances: # self.header = data_instance.schema.get('header') self.header = self.get_header(data_instances) else: self.header = [] # Generate encrypt keys self.encrypt_operator.generate_key(self.key_length) public_key = self.encrypt_operator.get_public_key() public_key = public_key LOGGER.info("public_key:{}".format(public_key)) # remote is to send an object to other party federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.HOST, idx=0) LOGGER.info("remote public_key to host") federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.GUEST, idx=0) LOGGER.info("remote public_key to guest") # get method will block until the remote object is fetched. batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:{}".format(batch_info)) self.batch_num = batch_info["batch_num"] is_stop = False self.n_iter_ = 0 while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_index = 0 iter_loss = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) host_gradient = federation.get( name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_gradient from Host") guest_gradient = federation.get( name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get guest_gradient from Guest") # aggregate gradient host_gradient, guest_gradient = np.array( host_gradient), np.array(guest_gradient) gradient = np.hstack((host_gradient, guest_gradient)) LOGGER.info("gradient shape={}".format(gradient.shape)) # decrypt gradient for i in range(gradient.shape[0]): gradient[i] = self.encrypt_operator.decrypt(gradient[i]) # optimization optim_gradient = self.optimizer.apply_gradients(gradient) # separate optim_gradient according gradient size of Host and Guest separate_optim_gradient = HeteroFederatedAggregator.separate( optim_gradient, [host_gradient.shape[0], guest_gradient.shape[0]]) host_optim_gradient = separate_optim_gradient[0] guest_optim_gradient = separate_optim_gradient[1] LOGGER.info("host data feature dims:{}".format( np.array(host_optim_gradient).shape[0])) LOGGER.info("guest data feature dims:{}".format( np.array(guest_optim_gradient).shape[0])) federation.remote( host_optim_gradient, name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote host_optim_gradient to Host") federation.remote( guest_optim_gradient, name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_optim_gradient to Guest") training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.perform_subtasks(**training_info) loss = federation.get( name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), idx=0) de_loss = self.encrypt_operator.decrypt(loss) iter_loss += de_loss # LOGGER.info("Get loss from guest:{}".format(de_loss)) batch_index += 1 # if converge loss = iter_loss / self.batch_num LOGGER.info("iter loss:{}".format(loss)) if self.converge_func.is_converge(loss): is_stop = True federation.remote(is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote is_stop to host:{}".format(is_stop)) federation.remote(is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote is_stop to guest:{}".format(is_stop)) self.n_iter_ += 1 if is_stop: LOGGER.info("Model is converged, iter:{}".format(self.n_iter_)) break LOGGER.info( "Reach max iter {} or converge, train model finish!".format( self.max_iter))
def fit(self, data_instance=None): # Generate encrypt keys self.encrypt_operator.generate_key(self.key_length) public_key = self.encrypt_operator.get_public_key() public_key = public_key LOGGER.info("public_key:{}".format(public_key)) federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.HOST, idx=0) LOGGER.info("remote public_key to host") federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.GUEST, idx=0) LOGGER.info("remote public_key to guest") batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:{}".format(batch_info)) self.batch_num = batch_info["batch_num"] is_stop = False self.n_iter_ = 0 while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) host_gradient = federation.get( name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_gradient from Host") guest_gradient = federation.get( name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get guest_gradient from Guest") # aggregate gradient host_gradient, guest_gradient = np.array( host_gradient), np.array(guest_gradient) gradient = np.hstack( (np.array(host_gradient), np.array(guest_gradient))) # decrypt gradient for i in range(gradient.shape[0]): gradient[i] = self.encrypt_operator.decrypt(gradient[i]) # optimization optim_gradient = self.optimizer.apply_gradients(gradient) # separate optim_gradient according gradient size of Host and Guest separate_optim_gradient = HeteroFederatedAggregator.separate( optim_gradient, [host_gradient.shape[0], guest_gradient.shape[0]]) host_optim_gradient = separate_optim_gradient[0] guest_optim_gradient = separate_optim_gradient[1] federation.remote( host_optim_gradient, name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote host_optim_gradient to Host") federation.remote( guest_optim_gradient, name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_optim_gradient to Guest") loss = federation.get( name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), idx=0) de_loss = self.encrypt_operator.decrypt(loss) LOGGER.info("Get loss from guest:{}".format(de_loss)) # if converge if self.converge_func.is_converge(de_loss): is_stop = True federation.remote( is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote is_stop to guest:{}".format(is_stop)) federation.remote( is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote is_stop to guest:".format(is_stop)) batch_index += 1 if is_stop: LOGGER.info("Model is converged, iter:{}".format( self.n_iter_)) break self.n_iter_ += 1 if is_stop: break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
def test_separate(self): res = HeteroFederatedAggregator.separate(self.separate_data, self.separate_size_list) self.assertListEqual(res, self.separate_result)
def test_aggreagte_mean(self): res = HeteroFederatedAggregator.aggregate_mean(self.table_a) self.assertEqual(res, self.reduce_a) res = HeteroFederatedAggregator.aggregate_mean(self.table_d_tuple) self.assertListEqual(list(res), self.reduce_d_tuple)