def __init__(self, logistic_params): super(HeteroLRArbiter, self).__init__(logistic_params) self.converge_func = DiffConverge(logistic_params.eps) # attribute self.pre_loss = None self.batch_num = None self.transfer_variable = HeteroLRTransferVariable() self.optimizer = Optimizer(logistic_params.learning_rate, logistic_params.optimizer) self.key_length = logistic_params.encrypt_param.key_length
def __init__(self, network_embedding_params): super(HeteroNEGuest, self).__init__(network_embedding_params) self.transfer_variable = HeteroNETransferVariable() self.data_batch_count = [] self.encrypted_calculator = None self.guest_forward = None ###### self.local_optimizer = Optimizer( network_embedding_params.learning_rate, network_embedding_params.optimizer)
def __init__(self, params: LogisticParam): super(HomoLRHost, self).__init__(params) self.learning_rate = params.learning_rate self.batch_size = params.batch_size self.encrypt_params = params.encrypt_param if self.encrypt_params.method in [consts.PAILLIER]: self.use_encrypt = True else: self.use_encrypt = False if self.use_encrypt and params.penalty != consts.L2_PENALTY: raise RuntimeError("Encrypted h**o-lr supports L2 penalty only") if self.use_encrypt: self.gradient_operator = TaylorLogisticGradient() self.re_encrypt_batches = params.re_encrypt_batches else: self.gradient_operator = LogisticGradient() self.aggregator = HomoFederatedAggregator() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.mini_batch_obj = None self.evaluator = Evaluation(classi_type=consts.BINARY) self.classes_ = [0, 1] self.has_sychronized_encryption = False
def __init__(self, params: LogisticParam): super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.header = [] self.penalty = params.penalty self.loss_history = [] self.is_converged = False
def setUp(self): self.guest_X = np.array([[1, 2, 3, 4, 5], [3, 2, 4, 5, 1], [ 2, 2, 3, 1, 1, ]]) / 10 self.guest_Y = np.array([[1], [1], [-1]]) self.values = [] for idx, x in enumerate(self.guest_X): inst = Instance(inst_id=idx, features=x, label=self.guest_Y[idx]) self.values.append((idx, inst)) self.host_X = np.array([[1, 1.2, 3.1, 4, 5], [2.3, 2, 4, 5.3, 1], [ 2, 2.2, 1.3, 1, 1.6, ]]) / 10 self.host_Y = np.array([[-1], [1], [-1]]) self.host_values = [] for idx, x in enumerate(self.host_X): inst = Instance(inst_id=idx, features=x, label=self.host_Y[idx]) self.values.append((idx, inst)) self.max_iter = 10 self.alpha = 0.01 self.learning_rate = 0.01 optimizer = 'SGD' self.gradient_operator = LogisticGradient() self.initializer = Initializer() self.fit_intercept = True self.init_param_obj = InitParam(fit_intercept=self.fit_intercept) self.updater = L2Updater(self.alpha, self.learning_rate) self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=optimizer) self.__init_model()
def __init__(self, network_embedding_params: NetworkEmbeddingParam): super(HeteroNEArbiter, self).__init__(network_embedding_params) self.converge_func = DiffConverge(network_embedding_params.eps) # attribute self.pre_loss = None self.batch_num = None self.transfer_variable = HeteroNETransferVariable() self.optimizer = Optimizer(network_embedding_params.learning_rate, network_embedding_params.optimizer) self.key_length = network_embedding_params.encrypt_param.key_length
def __init__(self, params: LogisticParam): """ :param penalty: l1 or l2 :param alpha: :param lr: :param eps: :param max_iter: :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad'] :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size """ super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation()
def _init_model(self, params): self.model_param = params self.alpha = params.alpha self.init_param_obj = params.init_param self.fit_intercept = self.init_param_obj.fit_intercept self.learning_rate = params.learning_rate self.encrypted_mode_calculator_param = params.encrypted_mode_calculator_param self.encrypted_calculator = None if params.penalty == consts.L1_PENALTY: self.updater = L1Updater(self.alpha, self.learning_rate) elif params.penalty == consts.L2_PENALTY: self.updater = L2Updater(self.alpha, self.learning_rate) else: self.updater = None self.eps = params.eps self.batch_size = params.batch_size self.max_iter = params.max_iter self.learning_rate = params.learning_rate self.party_weight = params.party_weight self.penalty = params.penalty if params.encrypt_param.method == consts.PAILLIER: self.encrypt_operator = PaillierEncrypt() else: self.encrypt_operator = FakeEncrypt() if params.converge_func == 'diff': self.converge_func = convergence.DiffConverge(eps=self.eps) elif params.converge_func == 'weight_diff': self.converge_func = convergence.WeightDiffConverge(eps=self.eps) else: self.converge_func = convergence.AbsConverge(eps=self.eps) self.re_encrypt_batches = params.re_encrypt_batches self.predict_param = params.predict_param self.optimizer = Optimizer(params.learning_rate, params.optimizer) self.key_length = params.encrypt_param.key_length
class TestHomoLR(unittest.TestCase): def setUp(self): self.guest_X = np.array([[1, 2, 3, 4, 5], [3, 2, 4, 5, 1], [ 2, 2, 3, 1, 1, ]]) / 10 self.guest_Y = np.array([[1], [1], [-1]]) self.values = [] for idx, x in enumerate(self.guest_X): inst = Instance(inst_id=idx, features=x, label=self.guest_Y[idx]) self.values.append((idx, inst)) self.host_X = np.array([[1, 1.2, 3.1, 4, 5], [2.3, 2, 4, 5.3, 1], [ 2, 2.2, 1.3, 1, 1.6, ]]) / 10 self.host_Y = np.array([[-1], [1], [-1]]) self.host_values = [] for idx, x in enumerate(self.host_X): inst = Instance(inst_id=idx, features=x, label=self.host_Y[idx]) self.values.append((idx, inst)) self.max_iter = 10 self.alpha = 0.01 self.learning_rate = 0.01 optimizer = 'SGD' self.gradient_operator = LogisticGradient() self.initializer = Initializer() self.fit_intercept = True self.init_param_obj = InitParam(fit_intercept=self.fit_intercept) self.updater = L2Updater(self.alpha, self.learning_rate) self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=optimizer) self.__init_model() def __init_model(self): model_shape = self.guest_X.shape[1] w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 return w def __init_host_model(self): model_shape = self.host_X.shape[1] w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.host_coef_ = w[:-1] self.host_intercept_ = w[-1] else: self.host_coef_ = w self.host_intercept_ = 0 return w def test_one_iter(self): w = self.__init_model() print("before training, coef: {}, intercept: {}".format( self.coef_, self.intercept_)) self.assertEqual(self.coef_.shape[0], self.guest_X.shape[1]) grad, loss = self.gradient_operator.compute( self.values, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) loss = loss + loss_norm delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) print("After training, coef: {}, intercept: {}, loss: {}".format( self.coef_, self.intercept_, loss)) def test_multi_iter(self): w = self.__init_model() loss_hist = [100] for iter_num in range(self.max_iter): grad, loss = self.gradient_operator.compute( self.values, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) loss = loss + loss_norm delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) self.assertTrue(loss <= loss_hist[-1]) loss_hist.append(loss) print(loss_hist) def test_host_iter(self): w = self.__init_host_model() print("before training, coef: {}, intercept: {}".format( self.coef_, self.intercept_)) self.assertEqual(self.host_coef_.shape[0], self.host_X.shape[1]) grad, loss = self.gradient_operator.compute( self.host_values, coef=self.host_coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) loss_norm = self.updater.loss_norm(self.coef_) # print("***********************************************") # print(loss, loss_norm) self.assertTrue(loss is None) def update_model(self, gradient): LOGGER.debug( "In update_model function, shape of coef: {}, shape of gradient: {}" .format(np.shape(self.coef_), np.shape(gradient))) if self.fit_intercept: if self.updater is not None: self.coef_ = self.updater.update_coef(self.coef_, gradient[:-1]) else: self.coef_ = self.coef_ - gradient[:-1] self.intercept_ -= gradient[-1] else: if self.updater is not None: self.coef_ = self.updater.update_coef(self.coef_, gradient) else: self.coef_ = self.coef_ - gradient
class HeteroLRArbiter(BaseLogisticRegression): def __init__(self, logistic_params): # LogisticParamChecker.check_param(logistic_params) super(HeteroLRArbiter, self).__init__(logistic_params) self.converge_func = DiffConverge(logistic_params.eps) # attribute self.pre_loss = None self.batch_num = None self.transfer_variable = HeteroLRTransferVariable() self.optimizer = Optimizer(logistic_params.learning_rate, logistic_params.optimizer) self.key_length = logistic_params.encrypt_param.key_length def perform_subtasks(self, **training_info): """ performs any tasks that the arbiter is responsible for. This 'perform_subtasks' function serves as a handler on conducting any task that the arbiter is responsible for. For example, for the 'perform_subtasks' function of 'HeteroDNNLRArbiter' class located in 'hetero_dnn_lr_arbiter.py', it performs some works related to updating/training local neural networks of guest or host. For this particular class (i.e., 'HeteroLRArbiter') that serves as a base arbiter class for neural-networks-based hetero-logistic-regression model, the 'perform_subtasks' function will do nothing. In other words, no subtask is performed by this arbiter. :param training_info: a dictionary holding training information """ pass def fit(self, data_instances=None): """ Train lr model of role arbiter Parameters ---------- data_instances: DTable of Instance, input data """ LOGGER.info("Enter hetero_lr_arbiter fit") if data_instances: # self.header = data_instance.schema.get('header') self.header = self.get_header(data_instances) else: self.header = [] # Generate encrypt keys self.encrypt_operator.generate_key(self.key_length) public_key = self.encrypt_operator.get_public_key() public_key = public_key LOGGER.info("public_key:{}".format(public_key)) # remote is to send an object to other party federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.HOST, idx=0) LOGGER.info("remote public_key to host") federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.GUEST, idx=0) LOGGER.info("remote public_key to guest") # get method will block until the remote object is fetched. batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:{}".format(batch_info)) self.batch_num = batch_info["batch_num"] is_stop = False self.n_iter_ = 0 while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_index = 0 iter_loss = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) host_gradient = federation.get( name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_gradient from Host") guest_gradient = federation.get( name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get guest_gradient from Guest") # aggregate gradient host_gradient, guest_gradient = np.array( host_gradient), np.array(guest_gradient) gradient = np.hstack((host_gradient, guest_gradient)) LOGGER.info("gradient shape={}".format(gradient.shape)) # decrypt gradient for i in range(gradient.shape[0]): gradient[i] = self.encrypt_operator.decrypt(gradient[i]) # optimization optim_gradient = self.optimizer.apply_gradients(gradient) # separate optim_gradient according gradient size of Host and Guest separate_optim_gradient = HeteroFederatedAggregator.separate( optim_gradient, [host_gradient.shape[0], guest_gradient.shape[0]]) host_optim_gradient = separate_optim_gradient[0] guest_optim_gradient = separate_optim_gradient[1] LOGGER.info("host data feature dims:{}".format( np.array(host_optim_gradient).shape[0])) LOGGER.info("guest data feature dims:{}".format( np.array(guest_optim_gradient).shape[0])) federation.remote( host_optim_gradient, name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote host_optim_gradient to Host") federation.remote( guest_optim_gradient, name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_optim_gradient to Guest") training_info = { "iteration": self.n_iter_, "batch_index": batch_index } self.perform_subtasks(**training_info) loss = federation.get( name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), idx=0) de_loss = self.encrypt_operator.decrypt(loss) iter_loss += de_loss # LOGGER.info("Get loss from guest:{}".format(de_loss)) batch_index += 1 # if converge loss = iter_loss / self.batch_num LOGGER.info("iter loss:{}".format(loss)) if self.converge_func.is_converge(loss): is_stop = True federation.remote(is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote is_stop to host:{}".format(is_stop)) federation.remote(is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote is_stop to guest:{}".format(is_stop)) self.n_iter_ += 1 if is_stop: LOGGER.info("Model is converged, iter:{}".format(self.n_iter_)) break LOGGER.info( "Reach max iter {} or converge, train model finish!".format( self.max_iter))
class HeteroNEGuest(BaseNetworkEmbeddig): def __init__(self, network_embedding_params): super(HeteroNEGuest, self).__init__(network_embedding_params) self.transfer_variable = HeteroNETransferVariable() self.data_batch_count = [] self.encrypted_calculator = None self.guest_forward = None ###### self.local_optimizer = Optimizer( network_embedding_params.learning_rate, network_embedding_params.optimizer) ###### def aggregate_forward(self, host_forward): """ Compute e_guest.dot(e_host) Paramters --------- host_forward: DTable. key, en_e(host) """ aggregate_forward_res = self.guest_forward.join( host_forward, lambda e1, e2: (fate_operator.dot(e1[1], e2[1]), math.pow(fate_operator.dot(e1[1], e2[1]), 2))) return aggregate_forward_res @staticmethod def load_data(data_instance): """ transform pair data to Instance Parameters ---------- data_instance: tuple (node, label) """ return Instance(features=data_instance[0], label=data_instance[1]) def fit(self, data_instances, node2id, local_instances=None, common_nodes=None): """ Train node embedding for role guest Parameters ---------- data_instances: DTable of target node and label, input data node2id: a dict which can map node name to id """ LOGGER.info("samples number:{}".format(data_instances.count())) LOGGER.info("Enter network embedding procedure:") self.n_node = len(node2id) LOGGER.info("Bank A has {} nodes".format(self.n_node)) data_instances = data_instances.mapValues(HeteroNEGuest.load_data) LOGGER.info("Transform input data to train instance") public_key = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), idx=0) LOGGER.info("Get public_key from arbiter:{}".format(public_key)) self.encrypt_operator.set_public_key(public_key) # hetero network embedding LOGGER.info("Generate mini-batch from input data") mini_batch_obj = MiniBatch(data_instances, batch_size=self.batch_size) batch_num = mini_batch_obj.batch_nums LOGGER.info("samples number:{}".format(data_instances.count())) if self.batch_size == -1: LOGGER.info( "batch size is -1, set it to the number of data in data_instances" ) self.batch_size = data_instances.count() ############## # horizontal federated learning LOGGER.info("Generate mini-batch for local instances in guest") mini_batch_obj_local = MiniBatch(local_instances, batch_size=self.batch_size) local_batch_num = mini_batch_obj_local.batch_nums common_node_instances = eggroll.parallelize( ((node, node) for node in common_nodes), include_key=True, name='common_nodes') ############## batch_info = {'batch_size': self.batch_size, "batch_num": batch_num} LOGGER.info("batch_info:{}".format(batch_info)) federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.HOST, idx=0) LOGGER.info("Remote batch_info to Host") federation.remote(batch_info, name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), role=consts.ARBITER, idx=0) LOGGER.info("Remote batch_info to Arbiter") self.encrypted_calculator = [ EncryptModeCalculator( self.encrypt_operator, self.encrypted_mode_calculator_param.mode, self.encrypted_mode_calculator_param.re_encrypted_rate) for _ in range(batch_num) ] LOGGER.info("Start initialize model.") self.embedding_ = self.initializer.init_model((self.n_node, self.dim), self.init_param_obj) LOGGER.info("Embedding shape={}".format(self.embedding_.shape)) is_send_all_batch_index = False self.n_iter_ = 0 index_data_inst_map = {} while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) ################# local_batch_data_generator = mini_batch_obj_local.mini_batch_data_generator( ) total_loss = 0 local_batch_num = 0 LOGGER.info("Enter the horizontally federated learning procedure:") for local_batch_data in local_batch_data_generator: n = local_batch_data.count() #LOGGER.info("Local batch data count:{}".format(n)) E_Y = self.compute_local_embedding(local_batch_data, self.embedding_, node2id) local_grads_e1, local_grads_e2, local_loss = self.local_gradient_operator.compute( E_Y, 'E_1') local_grads_e1 = local_grads_e1.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) local_grads_e2 = local_grads_e2.mapValues( lambda g: self.local_optimizer.apply_gradients(g / n)) e1id_join_grads = local_batch_data.join( local_grads_e1, lambda v, g: (node2id[v[0]], g)) e2id_join_grads = local_batch_data.join( local_grads_e2, lambda v, g: (node2id[v[1]], g)) self.update_model(e1id_join_grads) self.update_model(e2id_join_grads) local_loss = local_loss / n local_batch_num += 1 total_loss += local_loss #LOGGER.info("gradient count:{}".format(e1id_join_grads.count())) guest_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( guest_common_embedding, name=self.transfer_variable.guest_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_common_embedding, self.n_iter_, 0), role=consts.ARBITER, idx=0) LOGGER.info("Remote the embedding of common node to arbiter!") common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 0), idx=0) LOGGER.info( "Get the aggregated embedding of common node from arbiter!") self.update_common_nodes(common_embedding, common_nodes, node2id) total_loss /= local_batch_num LOGGER.info( "Iter {}, horizontally feaderated learning loss: {}".format( self.n_iter_, total_loss)) ################# # verticallly feaderated learning # each iter will get the same batch_data_generator LOGGER.info("Enter the vertically federated learning:") batch_data_generator = mini_batch_obj.mini_batch_data_generator( result='index') batch_index = 0 for batch_data_index in batch_data_generator: LOGGER.info("batch:{}".format(batch_index)) # only need to send one times if not is_send_all_batch_index: LOGGER.info("remote mini-batch index to Host") federation.remote( batch_data_index, name=self.transfer_variable.batch_data_index.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_data_index, self.n_iter_, batch_index), role=consts.HOST, idx=0) if batch_index >= mini_batch_obj.batch_nums - 1: is_send_all_batch_index = True # in order to avoid joining in next iteration # Get mini-batch train data if len(index_data_inst_map) < batch_num: batch_data_inst = data_instances.join( batch_data_index, lambda data_inst, index: data_inst) index_data_inst_map[batch_index] = batch_data_inst else: batch_data_inst = index_data_inst_map[batch_index] # For inductive learning: transform node attributes to node embedding # self.transform(batch_data_inst) self.guest_forward = self.compute_forward( batch_data_inst, self.embedding_, node2id, batch_index) host_forward = federation.get( name=self.transfer_variable.host_forward_dict.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_forward_dict, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_forward from host") aggregate_forward_res = self.aggregate_forward(host_forward) en_aggregate_ee = aggregate_forward_res.mapValues( lambda v: v[0]) en_aggregate_ee_square = aggregate_forward_res.mapValues( lambda v: v[1]) # compute [[d]] if self.gradient_operator is None: self.gradient_operator = HeteroNetworkEmbeddingGradient( self.encrypt_operator) fore_gradient = self.gradient_operator.compute_fore_gradient( batch_data_inst, en_aggregate_ee) host_gradient = self.gradient_operator.compute_gradient( self.guest_forward.mapValues( lambda v: Instance(features=v[1])), fore_gradient) federation.remote( host_gradient, name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote host_gradient to arbiter") composed_data_inst = host_forward.join( batch_data_inst, lambda hf, d: Instance(features=hf[1], label=d.label)) guest_gradient, loss = self.gradient_operator.compute_gradient_and_loss( composed_data_inst, fore_gradient, en_aggregate_ee, en_aggregate_ee_square) federation.remote( guest_gradient, name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote guest_gradient to arbiter") optim_guest_gradient = federation.get( name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get optim_guest_gradient from arbiter") # update node embedding LOGGER.info("Update node embedding") nodeid_join_gradient = batch_data_inst.join( optim_guest_gradient, lambda instance, gradient: (node2id[instance.features], gradient)) self.update_model(nodeid_join_gradient) # update local model that transform attribute to node embedding training_info = { 'iteration': self.n_iter_, 'batch_index': batch_index } self.update_local_model(fore_gradient, batch_data_inst, self.embedding_, **training_info) # loss need to be encrypted !!!!!! federation.remote( loss, name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), role=consts.ARBITER, idx=0) LOGGER.info("Remote loss to arbiter") # is converge of loss in arbiter batch_index += 1 # remove temporary resource rubbish_list = [ host_forward, aggregate_forward_res, en_aggregate_ee, en_aggregate_ee_square, fore_gradient, self.guest_forward ] rubbish_clear(rubbish_list) ########## guest_common_embedding = common_node_instances.mapValues( lambda node: self.embedding_[node2id[node]]) federation.remote( guest_common_embedding, name=self.transfer_variable.guest_common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_common_embedding, self.n_iter_, 1), role=consts.ARBITER, idx=0) common_embedding = federation.get( name=self.transfer_variable.common_embedding.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.common_embedding, self.n_iter_, 1), idx=0) self.update_common_nodes(common_embedding, common_nodes, node2id) ########## is_stopped = federation.get( name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_), idx=0) LOGGER.info("Get is_stop flag from arbiter:{}".format(is_stopped)) self.n_iter_ += 1 if is_stopped: LOGGER.info( "Get stop signal from arbiter, model is converged, iter:{}" .format(self.n_iter_)) break embedding_table = eggroll.table(name='guest', namespace='node_embedding', partition=10) id2node = dict(zip(node2id.values(), node2id.keys())) for id, embedding in enumerate(self.embedding_): embedding_table.put(id2node[id], embedding) embedding_table.save_as(name='guest', namespace='node_embedding', partition=10) LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() self.header = [] self.penalty = params.penalty self.loss_history = [] self.is_converged = False def fit(self, data_instances): self._abnormal_detection(data_instances) self.header = data_instances.schema.get( 'header') # ['x1', 'x2', 'x3' ... ] self.__init_parameters() self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: n = batch_data.count() f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) delta_grad = self.optimizer.apply_gradients(grad) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() self.loss_history.append(total_loss) LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: self.is_converged = True break self.show_meta() self.show_model() LOGGER.debug("in fit self coef: {}".format(self.coef_)) return data_instances def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = data_overview.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): LOGGER.debug("coef: {}, intercept: {}".format(self.coef_, self.intercept_)) wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)
class HeteroLRArbiter(BaseLogisticRegression): def __init__(self, logistic_params): super(HeteroLRArbiter, self).__init__(logistic_params) self.converge_func = DiffConverge(logistic_params.eps) # attribute self.pre_loss = None self.batch_num = None self.transfer_variable = HeteroLRTransferVariable() self.optimizer = Optimizer(logistic_params.learning_rate, logistic_params.optimizer) self.key_length = logistic_params.encrypt_param.key_length def fit(self, data_instance=None): # Generate encrypt keys self.encrypt_operator.generate_key(self.key_length) public_key = self.encrypt_operator.get_public_key() public_key = public_key LOGGER.info("public_key:{}".format(public_key)) federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.HOST, idx=0) LOGGER.info("remote public_key to host") federation.remote(public_key, name=self.transfer_variable.paillier_pubkey.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey), role=consts.GUEST, idx=0) LOGGER.info("remote public_key to guest") batch_info = federation.get( name=self.transfer_variable.batch_info.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.batch_info), idx=0) LOGGER.info("Get batch_info from guest:{}".format(batch_info)) self.batch_num = batch_info["batch_num"] is_stop = False self.n_iter_ = 0 while self.n_iter_ < self.max_iter: LOGGER.info("iter:{}".format(self.n_iter_)) batch_index = 0 while batch_index < self.batch_num: LOGGER.info("batch:{}".format(batch_index)) host_gradient = federation.get( name=self.transfer_variable.host_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get host_gradient from Host") guest_gradient = federation.get( name=self.transfer_variable.guest_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_gradient, self.n_iter_, batch_index), idx=0) LOGGER.info("Get guest_gradient from Guest") # aggregate gradient host_gradient, guest_gradient = np.array( host_gradient), np.array(guest_gradient) gradient = np.hstack( (np.array(host_gradient), np.array(guest_gradient))) # decrypt gradient for i in range(gradient.shape[0]): gradient[i] = self.encrypt_operator.decrypt(gradient[i]) # optimization optim_gradient = self.optimizer.apply_gradients(gradient) # separate optim_gradient according gradient size of Host and Guest separate_optim_gradient = HeteroFederatedAggregator.separate( optim_gradient, [host_gradient.shape[0], guest_gradient.shape[0]]) host_optim_gradient = separate_optim_gradient[0] guest_optim_gradient = separate_optim_gradient[1] federation.remote( host_optim_gradient, name=self.transfer_variable.host_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_optim_gradient, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote host_optim_gradient to Host") federation.remote( guest_optim_gradient, name=self.transfer_variable.guest_optim_gradient.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.guest_optim_gradient, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_optim_gradient to Guest") loss = federation.get( name=self.transfer_variable.loss.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.loss, self.n_iter_, batch_index), idx=0) de_loss = self.encrypt_operator.decrypt(loss) LOGGER.info("Get loss from guest:{}".format(de_loss)) # if converge if self.converge_func.is_converge(de_loss): is_stop = True federation.remote( is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.HOST, idx=0) LOGGER.info("Remote is_stop to guest:{}".format(is_stop)) federation.remote( is_stop, name=self.transfer_variable.is_stopped.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.is_stopped, self.n_iter_, batch_index), role=consts.GUEST, idx=0) LOGGER.info("Remote is_stop to guest:".format(is_stop)) batch_index += 1 if is_stop: LOGGER.info("Model is converged, iter:{}".format( self.n_iter_)) break self.n_iter_ += 1 if is_stop: break LOGGER.info("Reach max iter {}, train model finish!".format( self.max_iter))
class HomoLRGuest(BaseLogisticRegression): def __init__(self, params: LogisticParam): """ :param penalty: l1 or l2 :param alpha: :param lr: :param eps: :param max_iter: :param optim_method: must be in ['sgd', 'RMSProp' ,'Adam', 'AdaGrad'] :param batch_size: only work when otpim_method is mini-batch, represent for mini-batch's size """ super(HomoLRGuest, self).__init__(params) self.learning_rate = params.learning_rate self.aggregator = HomoFederatedAggregator self.gradient_operator = LogisticGradient() self.party_weight = params.party_weight self.optimizer = Optimizer(learning_rate=self.learning_rate, opt_method_name=params.optimizer) self.transfer_variable = HomoLRTransferVariable() self.initializer = Initializer() self.classes_ = [0, 1] self.evaluator = Evaluation() def fit(self, data_instances): LOGGER.info("parameters: alpha: {}, eps: {}, max_iter: {}" "batch_size: {}".format(self.alpha, self.eps, self.max_iter, self.batch_size)) self.__init_parameters() w = self.__init_model(data_instances) mini_batch_obj = MiniBatch(data_inst=data_instances, batch_size=self.batch_size) for iter_num in range(self.max_iter): # mini-batch # LOGGER.debug("Enter iter_num: {}".format(iter_num)) batch_data_generator = mini_batch_obj.mini_batch_data_generator() total_loss = 0 batch_num = 0 for batch_data in batch_data_generator: f = functools.partial(self.gradient_operator.compute, coef=self.coef_, intercept=self.intercept_, fit_intercept=self.fit_intercept) grad_loss = batch_data.mapPartitions(f) n = grad_loss.count() grad, loss = grad_loss.reduce( self.aggregator.aggregate_grad_loss) grad /= n loss /= n if self.updater is not None: loss_norm = self.updater.loss_norm(self.coef_) total_loss += (loss + loss_norm) # LOGGER.debug("before update: {}".format(grad)) delta_grad = self.optimizer.apply_gradients(grad) # LOGGER.debug("after apply: {}".format(delta_grad)) self.update_model(delta_grad) batch_num += 1 total_loss /= batch_num w = self.merge_model() LOGGER.info("iter: {}, loss: {}".format(iter_num, total_loss)) # send model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_model, iter_num) federation.remote(w, name=self.transfer_variable.guest_model.name, tag=model_transfer_id, role=consts.ARBITER, idx=0) # send loss loss_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_loss, iter_num) federation.remote(total_loss, name=self.transfer_variable.guest_loss.name, tag=loss_transfer_id, role=consts.ARBITER, idx=0) # recv model model_transfer_id = self.transfer_variable.generate_transferid( self.transfer_variable.final_model, iter_num) w = federation.get(name=self.transfer_variable.final_model.name, tag=model_transfer_id, idx=0) w = np.array(w) # LOGGER.debug("Received final model: {}".format(w)) self.set_coef_(w) # recv converge flag converge_flag_id = self.transfer_variable.generate_transferid( self.transfer_variable.converge_flag, iter_num) converge_flag = federation.get( name=self.transfer_variable.converge_flag.name, tag=converge_flag_id, idx=0) self.n_iter_ = iter_num LOGGER.debug("converge flag is :{}".format(converge_flag)) if converge_flag: # self.save_model(w) break # LOGGER.info("trainning finish, final coef: {}, final intercept: {}".format( # self.coef_, self.intercept_)) def __init_parameters(self): party_weight_id = self.transfer_variable.generate_transferid( self.transfer_variable.guest_party_weight) federation.remote(self.party_weight, name=self.transfer_variable.guest_party_weight.name, tag=party_weight_id, role=consts.ARBITER, idx=0) # LOGGER.debug("party weight sent") LOGGER.info("Finish initialize parameters") def __init_model(self, data_instances): model_shape = self.get_features_shape(data_instances) LOGGER.info("Initialized model shape is {}".format(model_shape)) w = self.initializer.init_model(model_shape, init_params=self.init_param_obj) if self.fit_intercept: self.coef_ = w[:-1] self.intercept_ = w[-1] else: self.coef_ = w self.intercept_ = 0 # LOGGER.debug("Initialed model") return w def predict(self, data_instances, predict_param): wx = self.compute_wx(data_instances, self.coef_, self.intercept_) pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result def set_flowid(self, flowid=0): self.transfer_variable.set_flowid(flowid)