Exemplo n.º 1
0
    def aggregate_loss(self, transfer_variable, iter_num, party_weights,
                       host_use_encryption):
        guest_loss_id = transfer_variable.generate_transferid(
            transfer_variable.guest_loss, iter_num)
        guest_loss = federation.get(name=transfer_variable.guest_loss.name,
                                    tag=guest_loss_id,
                                    idx=0)
        LOGGER.info("Received guest loss")
        # LOGGER.debug("guest_loss: {}".format(guest_loss))

        host_loss_id = transfer_variable.generate_transferid(
            transfer_variable.host_loss, iter_num)
        loss_party_weight = party_weights.copy()

        total_loss = loss_party_weight[0] * guest_loss
        for idx, use_encryption in enumerate(host_use_encryption):
            if use_encryption:
                loss_party_weight[idx] = 0
                continue
            host_loss = federation.get(name=transfer_variable.host_loss.name,
                                       tag=host_loss_id,
                                       idx=idx)
            LOGGER.info("Received loss from {}th host".format(idx))
            total_loss += loss_party_weight[idx] * host_loss

        total_loss /= sum(loss_party_weight)
        return total_loss
Exemplo n.º 2
0
    def aggregate_model(self, transfer_variable, iter_num, party_weights,
                        host_encrypter):
        # Step 1: Send 自己model到所有的host

        model_transfer_id = transfer_variable.generate_transferid(
            transfer_variable.guest_model, iter_num)
        guest_model = federation.get(name=transfer_variable.guest_model.name,
                                     tag=model_transfer_id,
                                     idx=0)

        guest_model = np.array(guest_model)
        LOGGER.info("received guest model")
        host_model_transfer_id = transfer_variable.generate_transferid(
            transfer_variable.host_model, iter_num)
        host_models = federation.get(name=transfer_variable.host_model.name,
                                     tag=host_model_transfer_id,
                                     idx=-1)
        LOGGER.info("recevied host model")
        final_model = guest_model * party_weights[0]

        for idx, host_model in enumerate(host_models):
            encrypter = host_encrypter[idx]
            host_model = encrypter.decrypt_list(host_model)
            host_model = np.array(host_model)
            final_model += party_weights[idx + 1] * host_model
        # LOGGER.debug("Finish aggregate model, final model shape: {}".format(
        #     np.shape(final_model)))
        return final_model
Exemplo n.º 3
0
    def run(self, data_instances):
        LOGGER.info("Start rsa intersection")

        encrypt_operator = RsaEncrypt()
        encrypt_operator.generate_key(rsa_bit=1024)
        self.e, self.d, self.n = encrypt_operator.get_key_pair()
        LOGGER.info("Generate rsa keys.")
        public_key = {"e": self.e, "n": self.n}
        remote(public_key,
               name=self.transfer_variable.rsa_pubkey.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.rsa_pubkey),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote public key to Guest.")

        # (host_id_process, 1)
        host_ids_process_pair = data_instances.map(
            lambda k, v: (
                RsaIntersectionHost.hash(gmpy_math.powmod(int(RsaIntersectionHost.hash(k), 16), self.d, self.n)), k)
        )

        host_ids_process = host_ids_process_pair.mapValues(lambda v: 1)
        remote(host_ids_process,
               name=self.transfer_variable.intersect_host_ids_process.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_host_ids_process),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote host_ids_process to Guest.")

        # Recv guest ids
        guest_ids = get(name=self.transfer_variable.intersect_guest_ids.name,
                        tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids),
                        idx=0)
        LOGGER.info("Get guest_ids from guest")

        # Process guest ids and return to guest
        guest_ids_process = guest_ids.map(lambda k, v: (k, gmpy_math.powmod(int(k), self.d, self.n)))
        remote(guest_ids_process,
               name=self.transfer_variable.intersect_guest_ids_process.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids_process),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote guest_ids_process to Guest.")

        # recv intersect ids
        intersect_ids = None
        if self.get_intersect_ids_flag:
            encrypt_intersect_ids = get(name=self.transfer_variable.intersect_ids.name,
                                        tag=self.transfer_variable.generate_transferid(
                                            self.transfer_variable.intersect_ids),
                                        idx=0)

            intersect_ids_pair = encrypt_intersect_ids.join(host_ids_process_pair, lambda e, h: h)
            intersect_ids = intersect_ids_pair.map(lambda k, v: (v, "intersect_id"))
            LOGGER.info("Get intersect ids from Guest")

            if not self.only_output_key:
                intersect_ids = self._get_value_from_data(intersect_ids, data_instances)

        return intersect_ids
Exemplo n.º 4
0
    def run(self, data_instances):
        LOGGER.info("Start ras intersection")

        encrypt_operator = RsaEncrypt()
        encrypt_operator.generate_key(rsa_bit=1028)
        self.e, self.d, self.n = encrypt_operator.get_key_pair()
        LOGGER.info("Generate rsa keys.")
        public_key = {"e": self.e, "n": self.n}
        remote(public_key,
               name=self.transfer_variable.rsa_pubkey.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.rsa_pubkey),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote public key to Guest.")

        # (host_id_process, 1)
        table_host_ids_process = data_instances.map(
            lambda k, v: (
                RsaIntersectionHost.hash(gmpy_math.powmod(int(RsaIntersectionHost.hash(k), 16), self.d, self.n)), 1)
        )
        remote(table_host_ids_process,
               name=self.transfer_variable.intersect_host_ids_process.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_host_ids_process),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote host_ids_process to Guest.")

        # Recv guest ids
        guest_ids = get(name=self.transfer_variable.intersect_guest_ids.name,
                        tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids),
                        idx=0)
        LOGGER.info("Get guest_ids from guest")

        # Process guest ids and return to guest
        guest_ids_process = guest_ids.map(lambda k, v: (k, gmpy_math.powmod(int(k), self.d, self.n)))
        remote(guest_ids_process,
               name=self.transfer_variable.intersect_guest_ids_process.name,
               tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_guest_ids_process),
               role=consts.GUEST,
               idx=0)
        LOGGER.info("Remote guest_ids_process to Guest.")

        # recv intersect ids
        intersect_ids = None
        if self.get_intersect_ids_flag:
            intersect_ids = get(name=self.transfer_variable.intersect_ids.name,
                                tag=self.transfer_variable.generate_transferid(self.transfer_variable.intersect_ids),
                                idx=0)
            LOGGER.info("Get intersect ids from Guest")
        return intersect_ids
Exemplo n.º 5
0
    def predict(self, data=None, predict_param=None):
        # synchronize encryption information
        if not self.has_sychronized_encryption:
            self.__synchronize_encryption()
            self.__send_host_mode()

        for idx, use_encrypt in enumerate(self.host_use_encryption):
            if use_encrypt:
                encrypter = self.host_encrypter[idx]
                predict_wx_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.predict_wx)
                predict_wx = federation.get(
                    name=self.transfer_variable.predict_wx.name,
                    tag=predict_wx_id,
                    idx=idx)
                decrypted_wx = encrypter.distribute_decrypt(predict_wx)
                pred_prob = decrypted_wx.mapValues(
                    lambda x: activation.sigmoid(x))
                pred_label = self.classified(pred_prob,
                                             predict_param.threshold)
                predict_result_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.predict_result)
                federation.remote(
                    pred_label,
                    name=self.transfer_variable.predict_result.name,
                    tag=predict_result_id,
                    role=consts.HOST,
                    idx=idx)
        LOGGER.info("Finish predicting, result has been sent back")
        return
Exemplo n.º 6
0
    def __synchronize_encryption(self):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # 1. Use Encrypt: Specify which host use encryption
        host_use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt)
        host_use_encryption = federation.get(
            name=self.transfer_variable.use_encrypt.name,
            tag=host_use_encryption_id,
            idx=-1)
        self.host_use_encryption = host_use_encryption

        LOGGER.info("host use encryption: {}".format(self.host_use_encryption))
        # 2. Send pubkey to those use-encryption hosts
        for idx, use_encryption in enumerate(self.host_use_encryption):
            if not use_encryption:
                encrypter = FakeEncrypt()
            else:
                encrypter = PaillierEncrypt()
                encrypter.generate_key(self.encrypt_param.key_length)
                pub_key = encrypter.get_public_key()
                pubkey_id = self.transfer_variable.generate_transferid(
                    self.transfer_variable.paillier_pubkey)
                federation.remote(
                    pub_key,
                    name=self.transfer_variable.paillier_pubkey.name,
                    tag=pubkey_id,
                    role=consts.HOST,
                    idx=idx)
                # LOGGER.debug("send pubkey to host: {}".format(idx))

            self.host_encrypter.append(encrypter)
        self.has_sychronized_encryption = True
Exemplo n.º 7
0
 def sync_encrypted_splitinfo_host(self, dep=-1, batch=-1):
     LOGGER.info("get encrypted splitinfo of depth {}, batch {}".format(dep, batch))
     encrypted_splitinfo_host = federation.get(name=self.transfer_inst.encrypted_splitinfo_host.name,
                                               tag=self.transfer_inst.generate_transferid(
                                                   self.transfer_inst.encrypted_splitinfo_host, dep, batch),
                                               idx=0)
     return encrypted_splitinfo_host
Exemplo n.º 8
0
 def sync_data_predicted_by_host(self, send_times):
     LOGGER.info("get predicted data by host, recv times is {}".format(send_times))
     predict_data = federation.get(name=self.transfer_inst.predict_data_by_host.name,
                                   tag=self.transfer_inst.generate_transferid(
                                       self.transfer_inst.predict_data_by_host, send_times),
                                   idx=0)
     return predict_data
Exemplo n.º 9
0
    def predict(self, data_instances, predict_param):
        if not self.has_sychronized_encryption:
            self.__synchronize_encryption()
            self.__load_arbiter_model()
        else:
            LOGGER.info("in predict, has synchronize encryption information")

        wx = self.compute_wx(data_instances, self.coef_, self.intercept_)

        if self.use_encrypt:
            encrypted_wx_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_wx)
            # LOGGER.debug("predict_wd_id: {}".format(encrypted_wx_id))
            federation.remote(wx,
                              name=self.transfer_variable.predict_wx.name,
                              tag=encrypted_wx_id,
                              role=consts.ARBITER,
                              idx=0)
            predict_result_id = self.transfer_variable.generate_transferid(self.transfer_variable.predict_result)
            # LOGGER.debug("predict_result_id: {}".format(predict_result_id))
            predict_result = federation.get(name=self.transfer_variable.predict_result.name,
                                            tag=predict_result_id,
                                            idx=0)
            # local_predict_table = predict_result.collect()
            predict_result_table = predict_result.join(data_instances, lambda p, d: (d.label, None, p))
        else:
            pred_prob = wx.mapValues(lambda x: activation.sigmoid(x))
            pred_label = self.classified(pred_prob, predict_param.threshold)
            if predict_param.with_proba:
                predict_result = data_instances.mapValues(lambda x: x.label)
                predict_result = predict_result.join(pred_prob, lambda x, y: (x, y))
            else:
                predict_result = data_instances.mapValues(lambda x: (x.label, None))
            predict_result_table = predict_result.join(pred_label, lambda x, y: (x[0], x[1], y))
        return predict_result_table
Exemplo n.º 10
0
    def transform(self, data_instances):
        self._abnormal_detection(data_instances)

        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        split_points = []
        for iv_attr in self.iv_attrs:
            s_p = list(iv_attr.split_points)
            split_points.append(s_p)

        # LOGGER.debug("In transform, self.cols: {}".format(self.cols))
        data_bin_table = self.binning_obj.transform(data_instances, split_points, self.cols)

        encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label)
        encrypted_label_table = federation.get(name=self.transfer_variable.encrypted_label.name,
                                               tag=encrypted_label_table_id,
                                               idx=0)
        LOGGER.info("Get encrypted_label_table from guest")

        encrypted_bin_sum = self.__static_encrypted_bin_label(data_bin_table, encrypted_label_table, self.cols)
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_bin_sum)
        federation.remote(encrypted_bin_sum,
                          name=self.transfer_variable.encrypted_bin_sum.name,
                          tag=encrypted_bin_sum_id,
                          role=consts.GUEST,
                          idx=0)
        LOGGER.info("Sent encrypted_bin_sum to guest")
Exemplo n.º 11
0
    def __synchronize_encryption(self, mode='train'):
        """
        Communicate with hosts. Specify whether use encryption or not and transfer the public keys.
        """
        # Send if this host use encryption or not
        use_encryption_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.use_encrypt, mode)
        LOGGER.debug("Start to remote use_encrypt: {}, transfer_id: {}".format(
            self.use_encrypt, use_encryption_id))

        federation.remote(self.use_encrypt,
                          name=self.transfer_variable.use_encrypt.name,
                          tag=use_encryption_id,
                          role=consts.ARBITER,
                          idx=0)

        # Set public key
        if self.use_encrypt:
            pubkey_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.paillier_pubkey, mode)
            pubkey = federation.get(
                name=self.transfer_variable.paillier_pubkey.name,
                tag=pubkey_id,
                idx=0)
            LOGGER.debug("Received pubkey")
            self.encrypt_operator.set_public_key(pubkey)
        LOGGER.info("Finish synchronized ecryption")
        self.has_sychronized_encryption = True
Exemplo n.º 12
0
    def intersect_join_id(self, data_instances):
        LOGGER.info("Join id role is {}".format(self.role))

        sid_encode_pair = None
        if self.with_encode and self.encode_params.encode_method != "none":
            if Encode.is_support(self.encode_params.encode_method):
                encode_operator = Encode(self.encode_params.encode_method, self.encode_params.base64)
                sid_encode_pair = data_instances.map(
                    lambda k, v: (encode_operator.compute(k, postfit_salt=self.encode_params.salt), k))
                data_sid = sid_encode_pair.mapValues(lambda v: 1)
            else:
                raise ValueError("Unknown encode_method, please check the configure of encode_param")
        else:
            data_sid = data_instances.mapValues(lambda v: 1)

        if self.role == consts.HOST:
            send_ids_name = self.transfer_variable.send_ids_guest.name
            send_ids_tag = self.transfer_variable.generate_transferid(self.transfer_variable.send_ids_guest)
        elif self.role == consts.GUEST:
            send_ids_name = self.transfer_variable.send_ids_host.name
            send_ids_tag = self.transfer_variable.generate_transferid(self.transfer_variable.send_ids_host)
        else:
            raise ValueError("Unknown intersect role, please check the code")

        recv_ids = get(name=send_ids_name,
                       tag=send_ids_tag,
                       idx=0)

        LOGGER.info("Get intersect_host_ids from role-send")
        send_intersect_ids = recv_ids.join(data_sid, lambda i, d: "intersect_id")
        LOGGER.info("Finish intersect_ids computing")

        if self.send_intersect_id_flag:
            if self.role == consts.GUEST:
                intersect_ids_name = self.transfer_variable.intersect_ids_guest.name
                intersect_ids_tag = self.transfer_variable.generate_transferid(
                    self.transfer_variable.intersect_ids_guest)
                recv_role = consts.HOST
            elif self.role == consts.HOST:
                intersect_ids_name = self.transfer_variable.intersect_ids_host.name
                intersect_ids_tag = self.transfer_variable.generate_transferid(
                    self.transfer_variable.intersect_ids_host)
                recv_role = consts.GUEST
            else:
                raise ValueError("Unknown intersect role, please check the code")

            remote(send_intersect_ids,
                   name=intersect_ids_name,
                   tag=intersect_ids_tag,
                   role=recv_role,
                   idx=0)
            LOGGER.info("Remote intersect ids to role-send")

        if sid_encode_pair:
            encode_intersect_ids = send_intersect_ids.join(sid_encode_pair, lambda r, s: s)
            intersect_ids = encode_intersect_ids.map(lambda k, v: (v, 'intersect_id'))
        else:
            intersect_ids = send_intersect_ids

        return intersect_ids
Exemplo n.º 13
0
    def _received_result_cols(self, filter_name):
        result_cols_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.result_left_cols, filter_name)
        left_cols = federation.get(
            name=self.transfer_variable.result_left_cols.name,
            tag=result_cols_id,
            idx=0)
        LOGGER.info(
            "Received left columns from guest, received left_cols: {}".format(
                left_cols))
        # self.left_cols = left_cols
        LOGGER.debug("Before renew: self.left_cols: {}".format(self.left_cols))
        self._renew_final_left_cols(left_cols)
        LOGGER.debug("After renew: self.left_cols: {}".format(self.left_cols))

        # self._renew_left_col_names()

        host_cols = list(left_cols.keys())

        left_col_result = {}
        original_cols = []
        for col_idx, is_left in self.left_cols.items():
            col_name = self.header[col_idx]
            left_col_result[col_name] = is_left

        for col_idx in host_cols:
            original_cols.append(self.header[col_idx])

        left_col_obj = feature_selection_param_pb2.LeftCols(
            original_cols=original_cols, left_cols=left_col_result)

        result_obj = feature_selection_param_pb2.FeatureSelectionFilterParam(
            feature_values={}, left_cols=left_col_obj, filter_name=filter_name)
        self.results.append(result_obj)
        LOGGER.info("Received Left cols are {}".format(self.left_cols))
Exemplo n.º 14
0
    def __synchronize_aggregate_classed_list(self):
        """
        synchronize all of class of data, include guest, host and arbiter, from guest to the others
        """
        if self.role == consts.GUEST:
            federation.remote(
                self.classes,
                name=self.transfer_variable.aggregate_classes.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.aggregate_classes),
                role=consts.HOST,
                idx=0)

            if self.has_arbiter:
                federation.remote(
                    self.classes,
                    name=self.transfer_variable.aggregate_classes.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.aggregate_classes),
                    role=consts.ARBITER,
                    idx=0)

        elif self.role == consts.HOST or self.role == consts.ARBITER:
            self.classes = federation.get(
                name=self.transfer_variable.aggregate_classes.name,
                tag=self.transfer_variable.generate_transferid(
                    self.transfer_variable.aggregate_classes),
                idx=0)

        else:
            raise ValueError("Unknown role:{}".format(self.role))
Exemplo n.º 15
0
    def sync_stop_flag(self, num_round):
        LOGGER.info("sync stop flag from guest, boosting round is {}".format(num_round))
        stop_flag = federation.get(name=self.transfer_inst.stop_flag.name,
                                   tag=self.transfer_inst.generate_transferid(self.transfer_inst.stop_flag, num_round),
                                   idx=0)

        return stop_flag
Exemplo n.º 16
0
    def recv_host_sum_from_host(self):
        host_sum = federation.get(name=self.transfer_inst.host_sum.name,
                                  tag=self.transfer_inst.generate_transferid(
                                      self.transfer_inst.host_sum),
                                  idx=0)

        return host_sum
Exemplo n.º 17
0
    def predict(self, data_instances, predict_param):
        LOGGER.info("Start predict ...")
        prob_guest = self.compute_wx(data_instances, self.coef_,
                                     self.intercept_)
        prob_host = federation.get(
            name=self.transfer_variable.host_prob.name,
            tag=self.transfer_variable.generate_transferid(
                self.transfer_variable.host_prob),
            idx=0)
        LOGGER.info("Get probability from Host")

        # guest probability
        pred_prob = prob_guest.join(prob_host,
                                    lambda g, h: activation.sigmoid(g + h))
        pred_label = self.classified(pred_prob, predict_param.threshold)
        if predict_param.with_proba:
            labels = data_instances.mapValues(lambda v: v.label)
            predict_result = labels.join(pred_prob, lambda label, prob:
                                         (label, prob))
        else:
            predict_result = data_instances.mapValues(lambda v:
                                                      (v.label, None))

        predict_result = predict_result.join(pred_label, lambda r, p:
                                             (r[0], r[1], p))
        return predict_result
Exemplo n.º 18
0
    def __re_encrypt(self, iter_num):
        # If use encrypt, model weight need to be re-encrypt every several batches.
        self.curt_re_encrypt_times = self.re_encrypt_times.copy()

        # Part2: re-encrypt model weight from each host
        batch_num = 0
        while True:
            batch_num += self.re_encrypt_batches

            to_encrypt_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.to_encrypt_model, iter_num, batch_num)
            re_encrypted_model_id = self.transfer_variable.generate_transferid(
                self.transfer_variable.re_encrypted_model, iter_num, batch_num)
            for idx, left_times in enumerate(self.curt_re_encrypt_times):
                if left_times <= 0:
                    continue
                re_encrypt_model = federation.get(
                    name=self.transfer_variable.to_encrypt_model.name,
                    tag=to_encrypt_model_id,
                    idx=idx)
                encrypter = self.host_encrypter[idx]
                decrypt_model = encrypter.decrypt_list(re_encrypt_model)
                re_encrypt_model = encrypter.encrypt_list(decrypt_model)
                federation.remote(
                    re_encrypt_model,
                    name=self.transfer_variable.re_encrypted_model.name,
                    tag=re_encrypted_model_id,
                    role=consts.HOST,
                    idx=idx)

                left_times -= 1
                self.curt_re_encrypt_times[idx] = left_times

            if sum(self.curt_re_encrypt_times) == 0:
                break
Exemplo n.º 19
0
 def sync_tree_node_queue(self, dep=-1):
     LOGGER.info("get tree node queue of depth {}".format(dep))
     self.tree_node_queue = federation.get(
         name=self.transfer_inst.tree_node_queue.name,
         tag=self.transfer_inst.generate_transferid(
             self.transfer_inst.tree_node_queue, dep),
         idx=0)
Exemplo n.º 20
0
 def sync_node_positions(self, dep=-1):
     LOGGER.info("get tree node queue of depth {}".format(dep))
     node_positions = federation.get(name=self.transfer_inst.node_positions.name,
                                     tag=self.transfer_inst.generate_transferid(self.transfer_inst.node_positions,
                                                                                dep),
                                     idx=0)
     return node_positions
Exemplo n.º 21
0
    def predict(self, data_instances, predict_param):
        """
        Prediction of lr
        Parameters
        ----------
        data_instance:DTable of Instance, input data
        predict_param: PredictParam, the setting of prediction.

        Returns
        ----------
        DTable
            include input data label, predict probably, label
        """
        LOGGER.info("Start predict ...")

        data_features = self.transform(data_instances)

        prob_guest = self.compute_wx(data_features, self.coef_, self.intercept_)
        prob_host = federation.get(name=self.transfer_variable.host_prob.name,
                                   tag=self.transfer_variable.generate_transferid(
                                       self.transfer_variable.host_prob),
                                   idx=0)
        LOGGER.info("Get probability from Host")

        # guest probability
        pred_prob = prob_guest.join(prob_host, lambda g, h: activation.sigmoid(g + h))
        pred_label = self.classified(pred_prob, predict_param.threshold)
        if predict_param.with_proba:
            labels = data_instances.mapValues(lambda v: v.label)
            predict_result = labels.join(pred_prob, lambda label, prob: (label, prob))
        else:
            predict_result = data_instances.mapValues(lambda v: (v.label, None))

        predict_result = predict_result.join(pred_label, lambda r, p: (r[0], r[1], p))
        return predict_result
Exemplo n.º 22
0
    def __synchronize_classes_list(self):
        """
        Guest will get classes from host data, and aggregate classes it has. After that, send the aggregate classes to
        host and arbiter as binary classification times.
        """
        if self.mode == consts.H**O:
            if self.role == consts.GUEST:
                host_classes_list = federation.get(
                    name=self.transfer_variable.host_classes.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_classes),
                    idx=0)

                for host_class in host_classes_list:
                    self.classes.add(host_class)

            elif self.role == consts.HOST:
                federation.remote(
                    self.classes,
                    name=self.transfer_variable.host_classes.name,
                    tag=self.transfer_variable.generate_transferid(
                        self.transfer_variable.host_classes),
                    role=consts.GUEST,
                    idx=0)

        self.__synchronize_aggregate_classed_list()
Exemplo n.º 23
0
 def sync_dispatch_node_host(self, dep):
     LOGGER.info("get node from host to dispath, depth is {}".format(dep))
     dispatch_node_host = federation.get(name=self.transfer_inst.dispatch_node_host.name,
                                         tag=self.transfer_inst.generate_transferid(
                                             self.transfer_inst.dispatch_node_host, dep),
                                         idx=0)
     return dispatch_node_host
Exemplo n.º 24
0
 def sync_encrypted_grad_and_hess(self):
     LOGGER.info("get encrypted grad and hess")
     self.grad_and_hess = federation.get(
         name=self.transfer_inst.encrypted_grad_and_hess.name,
         tag=self.transfer_inst.generate_transferid(
             self.transfer_inst.encrypted_grad_and_hess),
         idx=0)
Exemplo n.º 25
0
 def sync_predict_finish_tag(self, recv_times):
     LOGGER.info("get the {}-th predict finish tag from guest".format(recv_times))
     finish_tag = federation.get(name=self.transfer_inst.predict_finish_tag.name,
                                 tag=self.transfer_inst.generate_transferid(self.transfer_inst.predict_finish_tag,
                                                                            recv_times),
                                 idx=0)
     return finish_tag
Exemplo n.º 26
0
    def _synchronous_data(self, data_instance, flowid, data_application=None):
        if data_application is None:
            LOGGER.warning("not data_application!")
            return

        transfer_variable = HeteroWorkFlowTransferVariable()
        if data_application == consts.TRAIN_DATA:
            transfer_id = transfer_variable.train_data
        elif data_application == consts.TEST_DATA:
            transfer_id = transfer_variable.test_data
        else:
            LOGGER.warning("data_application error!")
            return

        if self.role == consts.GUEST:
            data_sid = data_instance.mapValues(lambda v: 1)

            federation.remote(data_sid,
                              name=transfer_id.name,
                              tag=transfer_variable.generate_transferid(
                                  transfer_id, flowid),
                              role=consts.HOST,
                              idx=0)
            LOGGER.info("remote {} to host".format(data_application))
            return None
        elif self.role == consts.HOST:
            data_sid = federation.get(
                name=transfer_id.name,
                tag=transfer_variable.generate_transferid(transfer_id, flowid),
                idx=0)

            LOGGER.info("get {} from guest".format(data_application))
            join_data_insts = data_sid.join(data_instance, lambda s, d: d)
            return join_data_insts
Exemplo n.º 27
0
    def _distributed_negative_sampling_dst(self, adj_instances, src=consts.HOST, dst=consts.GUEST):
        if src == consts.HOST:
            if dst != consts.GUEST:
                raise NameError("if src is host, then dst should be guest!!!")
            nega_ids_transfer = self.transfer_variable.host_neg_samp_ids
        elif src == consts.GUEST:
            if dst != consts.HOST:
                raise NameError("if src is guest, then dst should be host!!!")
            nega_ids_transfer = self.transfer_variable.guest_neg_samp_ids
        else:
            raise NameError("src should be choose from {host, guest}")

        distributed_negative_ids = federation.get(name=nega_ids_transfer.name,
                                                  tag=self.transfer_variable.generate_transferid(nega_ids_transfer),
                                                  idx=0)
        LOGGER.info("Get distributed nagative samples from {}".format(src))
        for i in range(10):
            LOGGER.info("id:{}".format(distributed_negative_ids[i]))

        #sample some negative samples
        distribution = NeighborsSampling.generate_nega_distribution(adj_instances)
        sampler = DiscreteDistributionSampler([data[1] for data in distribution])

        distributed_negative_instances_dst = eggroll.table(name=dst + eggroll.generateUniqueId(),
                                                           namespace='neighbors_sampling/distributed_sampling',
                                                           persistent=False)
                                                           
        for id in distributed_negative_ids:
            index = sampler.sampling()
            distributed_negative_instances_dst.put(id, (distribution[index][0], -1))
        
        logDtableInstances(LOGGER, distributed_negative_instances_dst, isInstance=False)

        return distributed_negative_instances_dst
Exemplo n.º 28
0
    def fit(self, data_instances):
        """
        Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate
        the specific metric value for specific columns. Currently, iv is support for binary labeled data only.
        """
        LOGGER.info("Start feature binning fit and transform")
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        self.binning_obj.fit_split_points(data_instances)
        is_binary_data = data_overview.is_binary_labels(data_instances)

        if not is_binary_data:
            LOGGER.warning("Iv is not supported for Multiple-label data.")
            # data_instances = self.fit_local(data_instances)
            return data_instances

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        self.set_schema(data_instances)

        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt,
                              encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)

        encrypted_label_table_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table, name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id, role=consts.HOST, idx=0)

        LOGGER.info("Sent encrypted_label_table to host")

        # 4. Calculates self's binning. In case the other party need time to compute its data,
        #  do binning calculation at this point.
        data_instances = self.fit_local(data_instances, label_table)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(self.transfer_variable.encrypted_bin_sum)

        encrypted_bin_sum = federation.get(name=self.transfer_variable.encrypted_bin_sum.name,
                                           tag=encrypted_bin_sum_id,
                                           idx=0)

        LOGGER.info("Get encrypted_bin_sum from host")

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(result_counts, self.model_param.adjustment_factor)

        # Support one host only in this version. Multiple host will be supported in the future.
        self.host_results[consts.HOST] = host_iv_attrs
        self.set_schema(data_instances)
        self.transform(data_instances)
        LOGGER.info("Finish feature binning fit and transform")
        return self.data_output
Exemplo n.º 29
0
    def fit(self, data_instances):
        """
        Apply binning method for both data instances in local party as well as the other one. Afterwards, calculate
        the specific metric value for specific columns.
        """
        self._abnormal_detection(data_instances)
        self._parse_cols(data_instances)

        # 1. Synchronize encryption information
        self.__synchronize_encryption()

        # 2. Prepare labels
        data_instances = data_instances.mapValues(self.load_data)
        label_table = data_instances.mapValues(lambda x: x.label)

        # 3. Transfer encrypted label
        f = functools.partial(self.encrypt, encryptor=self.encryptor)
        encrypted_label_table = label_table.mapValues(f)

        encrypted_label_table_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_label)
        federation.remote(encrypted_label_table,
                          name=self.transfer_variable.encrypted_label.name,
                          tag=encrypted_label_table_id,
                          role=consts.HOST,
                          idx=0)

        LOGGER.info("Sent encrypted_label_table to host")

        # 4. Calculates self's binning. In case the other party need time to compute its data,
        #  do binning calculation at this point.
        local_iv = self.fit_local(data_instances, label_table)

        # 5. Received host result and calculate iv value
        encrypted_bin_sum_id = self.transfer_variable.generate_transferid(
            self.transfer_variable.encrypted_bin_sum)

        encrypted_bin_sum = federation.get(
            name=self.transfer_variable.encrypted_bin_sum.name,
            tag=encrypted_bin_sum_id,
            idx=0)

        LOGGER.info("Get encrypted_bin_sum from host")

        result_counts = self.__decrypt_bin_sum(encrypted_bin_sum)
        host_iv_attrs = self.binning_obj.cal_iv_woe(
            result_counts, self.bin_param.adjustment_factor)
        self.host_iv_attrs = host_iv_attrs
        # LOGGER.debug("Lenght of host iv attrs: {}".format(len(self.host_iv_attrs)))
        # for idx, col in enumerate(self.cols):
        #     LOGGER.info("The local iv of {}th feature is {}".format(col, local_iv[idx].iv))

        for idx, iv_attr in enumerate(host_iv_attrs):
            LOGGER.info("The remote iv of {}th measured feature is {}".format(
                idx, iv_attr.iv))

        iv_result = {'local': local_iv, 'remote': host_iv_attrs}

        return iv_result
Exemplo n.º 30
0
 def sync_tree_dim(self):
     LOGGER.info("sync tree dim from guest")
     self.tree_dim = federation.get(
         name=self.transfer_inst.tree_dim.name,
         tag=self.transfer_inst.generate_transferid(
             self.transfer_inst.tree_dim),
         idx=0)
     LOGGER.info("tree dim is %d" % (self.tree_dim))