def aggregate_loss(self, transfer_variable, iter_num, party_weights, host_use_encryption): guest_loss_id = transfer_variable.generate_transferid( transfer_variable.guest_loss, iter_num) guest_loss = federation.get(name=transfer_variable.guest_loss.name, tag=guest_loss_id, idx=0) LOGGER.info("Received guest loss") # LOGGER.debug("guest_loss: {}".format(guest_loss)) host_loss_id = transfer_variable.generate_transferid( transfer_variable.host_loss, iter_num) loss_party_weight = party_weights.copy() total_loss = loss_party_weight[0] * guest_loss for idx, use_encryption in enumerate(host_use_encryption): if use_encryption: loss_party_weight[idx] = 0 continue host_loss = federation.get(name=transfer_variable.host_loss.name, tag=host_loss_id, idx=idx) LOGGER.info("Received loss from {}th host".format(idx)) total_loss += loss_party_weight[idx] * host_loss total_loss /= sum(loss_party_weight) return total_loss
def aggregate_model(self, transfer_variable, iter_num, party_weights, host_encrypter): # Step 1: Send 自己model到所有的host model_transfer_id = transfer_variable.generate_transferid( transfer_variable.guest_model, iter_num) guest_model = federation.get(name=transfer_variable.guest_model.name, tag=model_transfer_id, idx=0) guest_model = np.array(guest_model) LOGGER.info("received guest model") host_model_transfer_id = transfer_variable.generate_transferid( transfer_variable.host_model, iter_num) host_models = federation.get(name=transfer_variable.host_model.name, tag=host_model_transfer_id, idx=-1) LOGGER.info("recevied host model") final_model = guest_model * party_weights[0] for idx, host_model in enumerate(host_models): encrypter = host_encrypter[idx] host_model = encrypter.decrypt_list(host_model) host_model = np.array(host_model) final_model += party_weights[idx + 1] * host_model # LOGGER.debug("Finish aggregate model, final model shape: {}".format( # np.shape(final_model))) return final_model
def run(self, data_instances): LOGGER.info("Start rsa intersection") encrypt_operator = RsaEncrypt() encrypt_operator.generate_key(rsa_bit=1024) self.e, self.d, self.n = encrypt_operator.get_key_pair() LOGGER.info("Generate rsa keys.") public_key = {"e": self.e, "n": self.n} remote(public_key, name=self.transfer_variable.rsa_pubkey.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.rsa_pubkey), role=consts.GUEST, idx=0) LOGGER.info("Remote public key to Guest.") # (host_id_process, 1) host_ids_process_pair = data_instances.map( lambda k, v: ( RsaIntersectionHost.hash(gmpy_math.powmod(int(RsaIntersectionHost.hash(k), 16), self.d, self.n)), k) ) host_ids_process = host_ids_process_pair.mapValues(lambda v: 1) remote(host_ids_process, name=self.transfer_variable.intersect_host_ids_process.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_host_ids_process), role=consts.GUEST, idx=0) LOGGER.info("Remote host_ids_process to Guest.") # Recv guest ids guest_ids = get(name=self.transfer_variable.intersect_guest_ids.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids), idx=0) LOGGER.info("Get guest_ids from guest") # Process guest ids and return to guest guest_ids_process = guest_ids.map(lambda k, v: (k, gmpy_math.powmod(int(k), self.d, self.n))) remote(guest_ids_process, name=self.transfer_variable.intersect_guest_ids_process.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids_process), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_ids_process to Guest.") # recv intersect ids intersect_ids = None if self.get_intersect_ids_flag: encrypt_intersect_ids = get(name=self.transfer_variable.intersect_ids.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.intersect_ids), idx=0) intersect_ids_pair = encrypt_intersect_ids.join(host_ids_process_pair, lambda e, h: h) intersect_ids = intersect_ids_pair.map(lambda k, v: (v, "intersect_id")) LOGGER.info("Get intersect ids from Guest") if not self.only_output_key: intersect_ids = self._get_value_from_data(intersect_ids, data_instances) return intersect_ids
def run(self, data_instances): LOGGER.info("Start ras intersection") encrypt_operator = RsaEncrypt() encrypt_operator.generate_key(rsa_bit=1028) self.e, self.d, self.n = encrypt_operator.get_key_pair() LOGGER.info("Generate rsa keys.") public_key = {"e": self.e, "n": self.n} remote(public_key, name=self.transfer_variable.rsa_pubkey.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.rsa_pubkey), role=consts.GUEST, idx=0) LOGGER.info("Remote public key to Guest.") # (host_id_process, 1) table_host_ids_process = data_instances.map( lambda k, v: ( RsaIntersectionHost.hash(gmpy_math.powmod(int(RsaIntersectionHost.hash(k), 16), self.d, self.n)), 1) ) remote(table_host_ids_process, name=self.transfer_variable.intersect_host_ids_process.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_host_ids_process), role=consts.GUEST, idx=0) LOGGER.info("Remote host_ids_process to Guest.") # Recv guest ids guest_ids = get(name=self.transfer_variable.intersect_guest_ids.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids), idx=0) LOGGER.info("Get guest_ids from guest") # Process guest ids and return to guest guest_ids_process = guest_ids.map(lambda k, v: (k, gmpy_math.powmod(int(k), self.d, self.n))) remote(guest_ids_process, name=self.transfer_variable.intersect_guest_ids_process.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids_process), role=consts.GUEST, idx=0) LOGGER.info("Remote guest_ids_process to Guest.") # recv intersect ids intersect_ids = None if self.get_intersect_ids_flag: intersect_ids = get(name=self.transfer_variable.intersect_ids.name, tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_ids), idx=0) LOGGER.info("Get intersect ids from Guest") return intersect_ids
def predict(self, data=None, predict_param=None): # synchronize encryption information if not self.has_sychronized_encryption: self.__synchronize_encryption() self.__send_host_mode() for idx, use_encrypt in enumerate(self.host_use_encryption): if use_encrypt: encrypter = self.host_encrypter[idx] predict_wx_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_wx) predict_wx = federation.get( name=self.transfer_variable.predict_wx.name, tag=predict_wx_id, idx=idx) decrypted_wx = encrypter.distribute_decrypt(predict_wx) pred_prob = decrypted_wx.mapValues( lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) predict_result_id = self.transfer_variable.generate_transferid( self.transfer_variable.predict_result) federation.remote( pred_label, name=self.transfer_variable.predict_result.name, tag=predict_result_id, role=consts.HOST, idx=idx) LOGGER.info("Finish predicting, result has been sent back") return
def __synchronize_encryption(self): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # 1. Use Encrypt: Specify which host use encryption host_use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt) host_use_encryption = federation.get( name=self.transfer_variable.use_encrypt.name, tag=host_use_encryption_id, idx=-1) self.host_use_encryption = host_use_encryption LOGGER.info("host use encryption: {}".format(self.host_use_encryption)) # 2. Send pubkey to those use-encryption hosts for idx, use_encryption in enumerate(self.host_use_encryption): if not use_encryption: encrypter = FakeEncrypt() else: encrypter = PaillierEncrypt() encrypter.generate_key(self.encrypt_param.key_length) pub_key = encrypter.get_public_key() pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey) federation.remote( pub_key, name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, role=consts.HOST, idx=idx) # LOGGER.debug("send pubkey to host: {}".format(idx)) self.host_encrypter.append(encrypter) self.has_sychronized_encryption = True
def sync_encrypted_splitinfo_host(self, dep=-1, batch=-1): LOGGER.info("get encrypted splitinfo of depth {}, batch {}".format(dep, batch)) encrypted_splitinfo_host = federation.get(name=self.transfer_inst.encrypted_splitinfo_host.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.encrypted_splitinfo_host, dep, batch), idx=0) return encrypted_splitinfo_host
def sync_data_predicted_by_host(self, send_times): LOGGER.info("get predicted data by host, recv times is {}".format(send_times)) predict_data = federation.get(name=self.transfer_inst.predict_data_by_host.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.predict_data_by_host, send_times), idx=0) return predict_data
def predict(self, data_instances, predict_param): if not self.has_sychronized_encryption: self.__synchronize_encryption() self.__load_arbiter_model() else: LOGGER.info("in predict, has synchronize encryption information") wx = self.compute_wx(data_instances, self.coef_, self.intercept_) if self.use_encrypt: encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx) # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id)) federation.remote(wx, name=self.transfer_variable.predict_wx.name, tag=encrypted_wx_id, role=consts.ARBITER, idx=0) predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result) # LOGGER.debug("predict_result_id: {}".format(predict_result_id)) predict_result = federation.get(name=self.transfer_variable.predict_result.name, tag=predict_result_id, idx=0) # local_predict_table = predict_result.collect() predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p)) else: pred_prob = wx.mapValues(lambda x: activation.sigmoid(x)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: predict_result = data_instances.mapValues(lambda x: x.label) predict_result = predict_result.join(pred_prob, lambda x, y: (x, y)) else: predict_result = data_instances.mapValues(lambda x: (x.label, None)) predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y)) return predict_result_table
def transform(self, data_instances): self._abnormal_detection(data_instances) self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() split_points = [] for iv_attr in self.iv_attrs: s_p = list(iv_attr.split_points) split_points.append(s_p) # LOGGER.debug("In transform, self.cols: {}".format(self.cols)) data_bin_table = self.binning_obj.transform(data_instances, split_points, self.cols) encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label) encrypted_label_table = federation.get(name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, idx=0) LOGGER.info("Get encrypted_label_table from guest") encrypted_bin_sum = self.__static_encrypted_bin_label(data_bin_table, encrypted_label_table, self.cols) encrypted_bin_sum_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_bin_sum) federation.remote(encrypted_bin_sum, name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, role=consts.GUEST, idx=0) LOGGER.info("Sent encrypted_bin_sum to guest")
def __synchronize_encryption(self, mode='train'): """ Communicate with hosts. Specify whether use encryption or not and transfer the public keys. """ # Send if this host use encryption or not use_encryption_id = self.transfer_variable.generate_transferid( self.transfer_variable.use_encrypt, mode) LOGGER.debug("Start to remote use_encrypt: {}, transfer_id: {}".format( self.use_encrypt, use_encryption_id)) federation.remote(self.use_encrypt, name=self.transfer_variable.use_encrypt.name, tag=use_encryption_id, role=consts.ARBITER, idx=0) # Set public key if self.use_encrypt: pubkey_id = self.transfer_variable.generate_transferid( self.transfer_variable.paillier_pubkey, mode) pubkey = federation.get( name=self.transfer_variable.paillier_pubkey.name, tag=pubkey_id, idx=0) LOGGER.debug("Received pubkey") self.encrypt_operator.set_public_key(pubkey) LOGGER.info("Finish synchronized ecryption") self.has_sychronized_encryption = True
def intersect_join_id(self, data_instances): LOGGER.info("Join id role is {}".format(self.role)) sid_encode_pair = None if self.with_encode and self.encode_params.encode_method != "none": if Encode.is_support(self.encode_params.encode_method): encode_operator = Encode(self.encode_params.encode_method, self.encode_params.base64) sid_encode_pair = data_instances.map( lambda k, v: (encode_operator.compute(k, postfit_salt=self.encode_params.salt), k)) data_sid = sid_encode_pair.mapValues(lambda v: 1) else: raise ValueError("Unknown encode_method, please check the configure of encode_param") else: data_sid = data_instances.mapValues(lambda v: 1) if self.role == consts.HOST: send_ids_name = self.transfer_variable.send_ids_guest.name send_ids_tag = self.transfer_variable.generate_transferid(self.transfer_variable.send_ids_guest) elif self.role == consts.GUEST: send_ids_name = self.transfer_variable.send_ids_host.name send_ids_tag = self.transfer_variable.generate_transferid(self.transfer_variable.send_ids_host) else: raise ValueError("Unknown intersect role, please check the code") recv_ids = get(name=send_ids_name, tag=send_ids_tag, idx=0) LOGGER.info("Get intersect_host_ids from role-send") send_intersect_ids = recv_ids.join(data_sid, lambda i, d: "intersect_id") LOGGER.info("Finish intersect_ids computing") if self.send_intersect_id_flag: if self.role == consts.GUEST: intersect_ids_name = self.transfer_variable.intersect_ids_guest.name intersect_ids_tag = self.transfer_variable.generate_transferid( self.transfer_variable.intersect_ids_guest) recv_role = consts.HOST elif self.role == consts.HOST: intersect_ids_name = self.transfer_variable.intersect_ids_host.name intersect_ids_tag = self.transfer_variable.generate_transferid( self.transfer_variable.intersect_ids_host) recv_role = consts.GUEST else: raise ValueError("Unknown intersect role, please check the code") remote(send_intersect_ids, name=intersect_ids_name, tag=intersect_ids_tag, role=recv_role, idx=0) LOGGER.info("Remote intersect ids to role-send") if sid_encode_pair: encode_intersect_ids = send_intersect_ids.join(sid_encode_pair, lambda r, s: s) intersect_ids = encode_intersect_ids.map(lambda k, v: (v, 'intersect_id')) else: intersect_ids = send_intersect_ids return intersect_ids
def _received_result_cols(self, filter_name): result_cols_id = self.transfer_variable.generate_transferid( self.transfer_variable.result_left_cols, filter_name) left_cols = federation.get( name=self.transfer_variable.result_left_cols.name, tag=result_cols_id, idx=0) LOGGER.info( "Received left columns from guest, received left_cols: {}".format( left_cols)) # self.left_cols = left_cols LOGGER.debug("Before renew: self.left_cols: {}".format(self.left_cols)) self._renew_final_left_cols(left_cols) LOGGER.debug("After renew: self.left_cols: {}".format(self.left_cols)) # self._renew_left_col_names() host_cols = list(left_cols.keys()) left_col_result = {} original_cols = [] for col_idx, is_left in self.left_cols.items(): col_name = self.header[col_idx] left_col_result[col_name] = is_left for col_idx in host_cols: original_cols.append(self.header[col_idx]) left_col_obj = feature_selection_param_pb2.LeftCols( original_cols=original_cols, left_cols=left_col_result) result_obj = feature_selection_param_pb2.FeatureSelectionFilterParam( feature_values={}, left_cols=left_col_obj, filter_name=filter_name) self.results.append(result_obj) LOGGER.info("Received Left cols are {}".format(self.left_cols))
def __synchronize_aggregate_classed_list(self): """ synchronize all of class of data, include guest, host and arbiter, from guest to the others """ if self.role == consts.GUEST: federation.remote( self.classes, name=self.transfer_variable.aggregate_classes.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.aggregate_classes), role=consts.HOST, idx=0) if self.has_arbiter: federation.remote( self.classes, name=self.transfer_variable.aggregate_classes.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.aggregate_classes), role=consts.ARBITER, idx=0) elif self.role == consts.HOST or self.role == consts.ARBITER: self.classes = federation.get( name=self.transfer_variable.aggregate_classes.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.aggregate_classes), idx=0) else: raise ValueError("Unknown role:{}".format(self.role))
def sync_stop_flag(self, num_round): LOGGER.info("sync stop flag from guest, boosting round is {}".format(num_round)) stop_flag = federation.get(name=self.transfer_inst.stop_flag.name, tag=self.transfer_inst.generate_transferid(self.transfer_inst.stop_flag, num_round), idx=0) return stop_flag
def recv_host_sum_from_host(self): host_sum = federation.get(name=self.transfer_inst.host_sum.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.host_sum), idx=0) return host_sum
def predict(self, data_instances, predict_param): LOGGER.info("Start predict ...") prob_guest = self.compute_wx(data_instances, self.coef_, self.intercept_) prob_host = federation.get( name=self.transfer_variable.host_prob.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_prob), idx=0) LOGGER.info("Get probability from Host") # guest probability pred_prob = prob_guest.join(prob_host, lambda g, h: activation.sigmoid(g + h)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: labels = data_instances.mapValues(lambda v: v.label) predict_result = labels.join(pred_prob, lambda label, prob: (label, prob)) else: predict_result = data_instances.mapValues(lambda v: (v.label, None)) predict_result = predict_result.join(pred_label, lambda r, p: (r[0], r[1], p)) return predict_result
def __re_encrypt(self, iter_num): # If use encrypt, model weight need to be re-encrypt every several batches. self.curt_re_encrypt_times = self.re_encrypt_times.copy() # Part2: re-encrypt model weight from each host batch_num = 0 while True: batch_num += self.re_encrypt_batches to_encrypt_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.to_encrypt_model, iter_num, batch_num) re_encrypted_model_id = self.transfer_variable.generate_transferid( self.transfer_variable.re_encrypted_model, iter_num, batch_num) for idx, left_times in enumerate(self.curt_re_encrypt_times): if left_times <= 0: continue re_encrypt_model = federation.get( name=self.transfer_variable.to_encrypt_model.name, tag=to_encrypt_model_id, idx=idx) encrypter = self.host_encrypter[idx] decrypt_model = encrypter.decrypt_list(re_encrypt_model) re_encrypt_model = encrypter.encrypt_list(decrypt_model) federation.remote( re_encrypt_model, name=self.transfer_variable.re_encrypted_model.name, tag=re_encrypted_model_id, role=consts.HOST, idx=idx) left_times -= 1 self.curt_re_encrypt_times[idx] = left_times if sum(self.curt_re_encrypt_times) == 0: break
def sync_tree_node_queue(self, dep=-1): LOGGER.info("get tree node queue of depth {}".format(dep)) self.tree_node_queue = federation.get( name=self.transfer_inst.tree_node_queue.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.tree_node_queue, dep), idx=0)
def sync_node_positions(self, dep=-1): LOGGER.info("get tree node queue of depth {}".format(dep)) node_positions = federation.get(name=self.transfer_inst.node_positions.name, tag=self.transfer_inst.generate_transferid(self.transfer_inst.node_positions, dep), idx=0) return node_positions
def predict(self, data_instances, predict_param): """ Prediction of lr Parameters ---------- data_instance:DTable of Instance, input data predict_param: PredictParam, the setting of prediction. Returns ---------- DTable include input data label, predict probably, label """ LOGGER.info("Start predict ...") data_features = self.transform(data_instances) prob_guest = self.compute_wx(data_features, self.coef_, self.intercept_) prob_host = federation.get(name=self.transfer_variable.host_prob.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_prob), idx=0) LOGGER.info("Get probability from Host") # guest probability pred_prob = prob_guest.join(prob_host, lambda g, h: activation.sigmoid(g + h)) pred_label = self.classified(pred_prob, predict_param.threshold) if predict_param.with_proba: labels = data_instances.mapValues(lambda v: v.label) predict_result = labels.join(pred_prob, lambda label, prob: (label, prob)) else: predict_result = data_instances.mapValues(lambda v: (v.label, None)) predict_result = predict_result.join(pred_label, lambda r, p: (r[0], r[1], p)) return predict_result
def __synchronize_classes_list(self): """ Guest will get classes from host data, and aggregate classes it has. After that, send the aggregate classes to host and arbiter as binary classification times. """ if self.mode == consts.H**O: if self.role == consts.GUEST: host_classes_list = federation.get( name=self.transfer_variable.host_classes.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_classes), idx=0) for host_class in host_classes_list: self.classes.add(host_class) elif self.role == consts.HOST: federation.remote( self.classes, name=self.transfer_variable.host_classes.name, tag=self.transfer_variable.generate_transferid( self.transfer_variable.host_classes), role=consts.GUEST, idx=0) self.__synchronize_aggregate_classed_list()
def sync_dispatch_node_host(self, dep): LOGGER.info("get node from host to dispath, depth is {}".format(dep)) dispatch_node_host = federation.get(name=self.transfer_inst.dispatch_node_host.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.dispatch_node_host, dep), idx=0) return dispatch_node_host
def sync_encrypted_grad_and_hess(self): LOGGER.info("get encrypted grad and hess") self.grad_and_hess = federation.get( name=self.transfer_inst.encrypted_grad_and_hess.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.encrypted_grad_and_hess), idx=0)
def sync_predict_finish_tag(self, recv_times): LOGGER.info("get the {}-th predict finish tag from guest".format(recv_times)) finish_tag = federation.get(name=self.transfer_inst.predict_finish_tag.name, tag=self.transfer_inst.generate_transferid(self.transfer_inst.predict_finish_tag, recv_times), idx=0) return finish_tag
def _synchronous_data(self, data_instance, flowid, data_application=None): if data_application is None: LOGGER.warning("not data_application!") return transfer_variable = HeteroWorkFlowTransferVariable() if data_application == consts.TRAIN_DATA: transfer_id = transfer_variable.train_data elif data_application == consts.TEST_DATA: transfer_id = transfer_variable.test_data else: LOGGER.warning("data_application error!") return if self.role == consts.GUEST: data_sid = data_instance.mapValues(lambda v: 1) federation.remote(data_sid, name=transfer_id.name, tag=transfer_variable.generate_transferid( transfer_id, flowid), role=consts.HOST, idx=0) LOGGER.info("remote {} to host".format(data_application)) return None elif self.role == consts.HOST: data_sid = federation.get( name=transfer_id.name, tag=transfer_variable.generate_transferid(transfer_id, flowid), idx=0) LOGGER.info("get {} from guest".format(data_application)) join_data_insts = data_sid.join(data_instance, lambda s, d: d) return join_data_insts
def _distributed_negative_sampling_dst(self, adj_instances, src=consts.HOST, dst=consts.GUEST): if src == consts.HOST: if dst != consts.GUEST: raise NameError("if src is host, then dst should be guest!!!") nega_ids_transfer = self.transfer_variable.host_neg_samp_ids elif src == consts.GUEST: if dst != consts.HOST: raise NameError("if src is guest, then dst should be host!!!") nega_ids_transfer = self.transfer_variable.guest_neg_samp_ids else: raise NameError("src should be choose from {host, guest}") distributed_negative_ids = federation.get(name=nega_ids_transfer.name, tag=self.transfer_variable.generate_transferid(nega_ids_transfer), idx=0) LOGGER.info("Get distributed nagative samples from {}".format(src)) for i in range(10): LOGGER.info("id:{}".format(distributed_negative_ids[i])) #sample some negative samples distribution = NeighborsSampling.generate_nega_distribution(adj_instances) sampler = DiscreteDistributionSampler([data[1] for data in distribution]) distributed_negative_instances_dst = eggroll.table(name=dst + eggroll.generateUniqueId(), namespace='neighbors_sampling/distributed_sampling', persistent=False) for id in distributed_negative_ids: index = sampler.sampling() distributed_negative_instances_dst.put(id, (distribution[index][0], -1)) logDtableInstances(LOGGER, distributed_negative_instances_dst, isInstance=False) return distributed_negative_instances_dst
def fit(self, data_instances): """ Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate the specific metric value for specific columns. Currently, iv is support for binary labeled data only. """ LOGGER.info("Start feature binning fit and transform") self._abnormal_detection(data_instances) self._parse_cols(data_instances) self.binning_obj.fit_split_points(data_instances) is_binary_data = data_overview.is_binary_labels(data_instances) if not is_binary_data: LOGGER.warning("Iv is not supported for Multiple-label data.") # data_instances = self.fit_local(data_instances) return data_instances # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) self.set_schema(data_instances) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host") # 4. Calculates self's binning. In case the other party need time to compute its data, # do binning calculation at this point. data_instances = self.fit_local(data_instances, label_table) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get(name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) LOGGER.info("Get encrypted_bin_sum from host") result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe(result_counts, self.model_param.adjustment_factor) # Support one host only in this version. Multiple host will be supported in the future. self.host_results[consts.HOST] = host_iv_attrs self.set_schema(data_instances) self.transform(data_instances) LOGGER.info("Finish feature binning fit and transform") return self.data_output
def fit(self, data_instances): """ Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate the specific metric value for specific columns. """ self._abnormal_detection(data_instances) self._parse_cols(data_instances) # 1. Synchronize encryption information self.__synchronize_encryption() # 2. Prepare labels data_instances = data_instances.mapValues(self.load_data) label_table = data_instances.mapValues(lambda x: x.label) # 3. Transfer encrypted label f = functools.partial(self.encrypt, encryptor=self.encryptor) encrypted_label_table = label_table.mapValues(f) encrypted_label_table_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_label) federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name, tag=encrypted_label_table_id, role=consts.HOST, idx=0) LOGGER.info("Sent encrypted_label_table to host") # 4. Calculates self's binning. In case the other party need time to compute its data, # do binning calculation at this point. local_iv = self.fit_local(data_instances, label_table) # 5. Received host result and calculate iv value encrypted_bin_sum_id = self.transfer_variable.generate_transferid( self.transfer_variable.encrypted_bin_sum) encrypted_bin_sum = federation.get( name=self.transfer_variable.encrypted_bin_sum.name, tag=encrypted_bin_sum_id, idx=0) LOGGER.info("Get encrypted_bin_sum from host") result_counts = self.__decrypt_bin_sum(encrypted_bin_sum) host_iv_attrs = self.binning_obj.cal_iv_woe( result_counts, self.bin_param.adjustment_factor) self.host_iv_attrs = host_iv_attrs # LOGGER.debug("Lenght of host iv attrs: {}".format(len(self.host_iv_attrs))) # for idx, col in enumerate(self.cols): # LOGGER.info("The local iv of {}th feature is {}".format(col, local_iv[idx].iv)) for idx, iv_attr in enumerate(host_iv_attrs): LOGGER.info("The remote iv of {}th measured feature is {}".format( idx, iv_attr.iv)) iv_result = {'local': local_iv, 'remote': host_iv_attrs} return iv_result
def sync_tree_dim(self): LOGGER.info("sync tree dim from guest") self.tree_dim = federation.get( name=self.transfer_inst.tree_dim.name, tag=self.transfer_inst.generate_transferid( self.transfer_inst.tree_dim), idx=0) LOGGER.info("tree dim is %d" % (self.tree_dim))