Exemplo n.º 1
0
    def fit(self, data_instance):
        # local
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)
        self.local_corr /= n

        if self.model_param.cross_parties:
            with SPDZ("pearson",
                      local_party=self.local_party,
                      all_parties=self.parties,
                      use_mix_rand=self.model_param.use_mix_rand) as spdz:
                source = [normed, self.other_party]
                if self.local_party.role == "guest":
                    x, y = FixedPointTensor.from_source(
                        "x", source[0]), FixedPointTensor.from_source(
                            "y", source[1])
                else:
                    y, x = FixedPointTensor.from_source(
                        "y", source[0]), FixedPointTensor.from_source(
                            "x", source[1])
                m1 = len(x.value.first()[1])
                m2 = len(y.value.first()[1])
                self.shapes.append(m1)
                self.shapes.append(m2)

                self.corr = spdz.dot(x, y, "corr").get() / n
        else:
            self.shapes.append(self.local_corr.shape[0])
            self.parties = [self.local_party]

        self._callback()
Exemplo n.º 2
0
def create_and_get(job_id, idx, data):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data)
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
        return x.get()
Exemplo n.º 3
0
def mul_plaintext(job_id, idx, data_list):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data_list[0])
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
        y = data_list[1]
        return (x * y).get(), (y * x).get()
Exemplo n.º 4
0
 def broadcast_reconstruct_share(self, tensor_name=None):
     from federatedml.secureprotol.spdz import SPDZ
     spdz = SPDZ.get_instance()
     share_val = self.value.copy()
     name = tensor_name or self.tensor_name
     if name is None:
         raise ValueError("name not specified")
     # remote share to other parties
     spdz.communicator.broadcast_rescontruct_share(share_val, name)
     return share_val
Exemplo n.º 5
0
def einsum(job_id, idx, einsum_expr, data_list):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data_list[0])
            y = FixedPointTensor.from_source("y", all_parties[1])
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
            y = FixedPointTensor.from_source("y", data_list[1])
        return x.einsum(y, einsum_expr).get()
Exemplo n.º 6
0
def mat_mul(job_id, idx, data_list):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data_list[0])
            y = FixedPointTensor.from_source("y", all_parties[1])
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
            y = FixedPointTensor.from_source("y", data_list[1])
        return (x @ y).get()
Exemplo n.º 7
0
def add_and_sub(job_id, idx, data_list):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data_list[0])
            y = FixedPointTensor.from_source("y", all_parties[1])
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
            y = FixedPointTensor.from_source("y", data_list[1])
        a = (x + y).get()
        b = (x - y).get()
        return a, b
Exemplo n.º 8
0
def add_and_sub_plaintext(job_id, idx, data_list):
    _, all_parties = session_init(job_id, idx)
    with SPDZ():
        if idx == 0:
            x = FixedPointTensor.from_source("x", data_list[0])
        else:
            x = FixedPointTensor.from_source("x", all_parties[0])
        y = data_list[1]
        a = (x + y).get()
        a1 = (y + x).get()
        b = (x - y).get()
        b1 = (y - x).get()
        return a, a1, b, b1
Exemplo n.º 9
0
    def rescontruct(self, tensor_name=None):
        from federatedml.secureprotol.spdz import SPDZ
        spdz = SPDZ.get_instance()
        share_val = self.value
        name = tensor_name or self.tensor_name

        if name is None:
            raise ValueError("name not specified")

        # remote share to other parties
        spdz.communicator.broadcast_rescontruct_share(share_val, name)

        # get shares from other parties
        for other_share in spdz.communicator.get_rescontruct_shares(name):
            share_val = _table_binary_op(share_val, other_share, self.q_field, operator.add)
        return share_val
Exemplo n.º 10
0
    def fit(self, data_instance):
        # local
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)
        self.local_corr /= n
        if self.model_param.calc_local_vif:
            self.local_vif = self._vif_from_pearson_matrix(self.local_corr)
        self._summary["local_corr"] = self.local_corr.tolist()
        self._summary["num_local_features"] = n

        if self.model_param.cross_parties:
            with SPDZ(
                    "pearson",
                    local_party=self.local_party,
                    all_parties=self.parties,
                    use_mix_rand=self.model_param.use_mix_rand,
            ) as spdz:
                source = [normed, self.other_party]
                if self.local_party.role == "guest":
                    x, y = (
                        FixedPointTensor.from_source("x", source[0]),
                        FixedPointTensor.from_source("y", source[1]),
                    )
                else:
                    y, x = (
                        FixedPointTensor.from_source("y", source[0]),
                        FixedPointTensor.from_source("x", source[1]),
                    )
                m1 = len(x.value.first()[1])
                m2 = len(y.value.first()[1])
                self.shapes.append(m1)
                self.shapes.append(m2)

                self.corr = spdz.dot(x, y, "corr").get() / n
                self._summary["corr"] = self.corr.tolist()
                self._summary["num_remote_features"] = (
                    m2 if self.local_party.role == "guest" else m1)

        else:
            self.shapes.append(self.local_corr.shape[0])
            self.parties = [self.local_party]

        self._callback()
        self.set_summary(self._summary)
Exemplo n.º 11
0
    def fit(self, data_instance):
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)

        with SPDZ("pearson") as spdz:
            source = [normed, self._other_party]
            if self._local_party.role == "guest":
                x, y = FixedPointTensor.from_source(
                    "x",
                    source[0]), FixedPointTensor.from_source("y", source[1])
            else:
                y, x = FixedPointTensor.from_source(
                    "y",
                    source[0]), FixedPointTensor.from_source("x", source[1])
            m1 = len(x.value.first()[1])
            m2 = len(y.value.first()[1])
            self.shapes.append(m1)
            self.shapes.append(m2)

            self.corr = spdz.dot(x, y, "corr").get() / n
            self.local_corr /= n
        self._callback()
Exemplo n.º 12
0
    def reconstruct(self, tensor_name=None, broadcast=True):
        from federatedml.secureprotol.spdz import SPDZ
        spdz = SPDZ.get_instance()
        share_val = self.value.copy()
        LOGGER.debug(f"share_val: {share_val}")

        name = tensor_name or self.tensor_name

        if name is None:
            raise ValueError("name not specified")

        # remote share to other parties
        if broadcast:
            spdz.communicator.broadcast_rescontruct_share(share_val, name)

        # get shares from other parties
        for other_share in spdz.communicator.get_rescontruct_shares(name):
            # LOGGER.debug(f"share_val: {share_val}, other_share: {other_share}")
            share_val += other_share
            try:
                share_val %= self.q_field
                return share_val
            except BaseException:
                return share_val
Exemplo n.º 13
0
 def get_spdz(cls):
     from federatedml.secureprotol.spdz import SPDZ
     return SPDZ.get_instance()
Exemplo n.º 14
0
    def fit_binary(self, data_instances, validate_data=None):
        LOGGER.info("Starting to hetero_sshe_logistic_regression")
        self.callback_list.on_train_begin(data_instances, validate_data)

        model_shape = self.get_features_shape(data_instances)
        instances_count = data_instances.count()

        if not self.component_properties.is_warm_start:
            w = self._init_weights(model_shape)
            self.model_weights = LinearModelWeights(
                l=w, fit_intercept=self.model_param.init_param.fit_intercept)
            last_models = copy.deepcopy(self.model_weights)
        else:
            last_models = copy.deepcopy(self.model_weights)
            w = last_models.unboxed
            self.callback_warm_start_init_iter(self.n_iter_)

        self.batch_generator.initialize_batch_generator(
            data_instances, batch_size=self.batch_size)

        with SPDZ(
                "sshe_lr",
                local_party=self.local_party,
                all_parties=self.parties,
                q_field=self.q_field,
                use_mix_rand=self.model_param.use_mix_rand,
        ) as spdz:
            spdz.set_flowid(self.flowid)
            self.secure_matrix_obj.set_flowid(self.flowid)
            if self.role == consts.GUEST:
                self.labels = data_instances.mapValues(
                    lambda x: np.array([x.label], dtype=int))

            w_self, w_remote = self.share_model(w, suffix="init")
            last_w_self, last_w_remote = w_self, w_remote
            LOGGER.debug(
                f"first_w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}"
            )

            batch_data_generator = self.batch_generator.generate_batch_data()

            self.cipher_tool = []
            encoded_batch_data = []
            for batch_data in batch_data_generator:
                if self.fit_intercept:
                    batch_features = batch_data.mapValues(lambda x: np.hstack(
                        (x.features, 1.0)))
                else:
                    batch_features = batch_data.mapValues(lambda x: x.features)
                self.batch_num.append(batch_data.count())

                encoded_batch_data.append(
                    fixedpoint_table.FixedPointTensor(
                        self.fixedpoint_encoder.encode(batch_features),
                        q_field=self.fixedpoint_encoder.n,
                        endec=self.fixedpoint_encoder))

                self.cipher_tool.append(
                    EncryptModeCalculator(
                        self.cipher, self.encrypted_mode_calculator_param.mode,
                        self.encrypted_mode_calculator_param.re_encrypted_rate)
                )

            while self.n_iter_ < self.max_iter:
                self.callback_list.on_epoch_begin(self.n_iter_)
                LOGGER.info(f"start to n_iter: {self.n_iter_}")

                loss_list = []

                self.optimizer.set_iters(self.n_iter_)
                if not self.reveal_every_iter:
                    self.self_optimizer.set_iters(self.n_iter_)
                    self.remote_optimizer.set_iters(self.n_iter_)

                for batch_idx, batch_data in enumerate(encoded_batch_data):
                    current_suffix = (str(self.n_iter_), str(batch_idx))

                    if self.reveal_every_iter:
                        y = self.forward(weights=self.model_weights,
                                         features=batch_data,
                                         suffix=current_suffix,
                                         cipher=self.cipher_tool[batch_idx])
                    else:
                        y = self.forward(weights=(w_self, w_remote),
                                         features=batch_data,
                                         suffix=current_suffix,
                                         cipher=self.cipher_tool[batch_idx])

                    if self.role == consts.GUEST:
                        error = y - self.labels

                        self_g, remote_g = self.backward(
                            error=error,
                            features=batch_data,
                            suffix=current_suffix,
                            cipher=self.cipher_tool[batch_idx])
                    else:
                        self_g, remote_g = self.backward(
                            error=y,
                            features=batch_data,
                            suffix=current_suffix,
                            cipher=self.cipher_tool[batch_idx])

                    # loss computing;
                    suffix = ("loss", ) + current_suffix
                    if self.reveal_every_iter:
                        batch_loss = self.compute_loss(
                            weights=self.model_weights,
                            suffix=suffix,
                            cipher=self.cipher_tool[batch_idx])
                    else:
                        batch_loss = self.compute_loss(
                            weights=(w_self, w_remote),
                            suffix=suffix,
                            cipher=self.cipher_tool[batch_idx])

                    if batch_loss is not None:
                        batch_loss = batch_loss * self.batch_num[batch_idx]
                    loss_list.append(batch_loss)

                    if self.reveal_every_iter:
                        # LOGGER.debug(f"before reveal: self_g shape: {self_g.shape}, remote_g_shape: {remote_g},"
                        #              f"self_g: {self_g}")

                        new_g = self.reveal_models(self_g,
                                                   remote_g,
                                                   suffix=current_suffix)

                        # LOGGER.debug(f"after reveal: new_g shape: {new_g.shape}, new_g: {new_g}"
                        #              f"self.model_param.reveal_strategy: {self.model_param.reveal_strategy}")

                        if new_g is not None:
                            self.model_weights = self.optimizer.update_model(
                                self.model_weights, new_g, has_applied=False)

                        else:
                            self.model_weights = LinearModelWeights(
                                l=np.zeros(self_g.shape),
                                fit_intercept=self.model_param.init_param.
                                fit_intercept)
                    else:
                        if self.optimizer.penalty == consts.L2_PENALTY:
                            self_g = self_g + self.self_optimizer.alpha * w_self
                            remote_g = remote_g + self.remote_optimizer.alpha * w_remote

                        # LOGGER.debug(f"before optimizer: {self_g}, {remote_g}")

                        self_g = self.self_optimizer.apply_gradients(self_g)
                        remote_g = self.remote_optimizer.apply_gradients(
                            remote_g)

                        # LOGGER.debug(f"after optimizer: {self_g}, {remote_g}")
                        w_self -= self_g
                        w_remote -= remote_g

                    LOGGER.debug(
                        f"w_self shape: {w_self.shape}, w_remote_shape: {w_remote.shape}"
                    )

                if self.role == consts.GUEST:
                    loss = np.sum(loss_list) / instances_count
                    self.loss_history.append(loss)
                    if self.need_call_back_loss:
                        self.callback_loss(self.n_iter_, loss)
                else:
                    loss = None

                if self.converge_func_name in ["diff", "abs"]:
                    self.is_converged = self.check_converge_by_loss(
                        loss, suffix=(str(self.n_iter_), ))
                elif self.converge_func_name == "weight_diff":
                    if self.reveal_every_iter:
                        self.is_converged = self.check_converge_by_weights(
                            last_w=last_models.unboxed,
                            new_w=self.model_weights.unboxed,
                            suffix=(str(self.n_iter_), ))
                        last_models = copy.deepcopy(self.model_weights)
                    else:
                        self.is_converged = self.check_converge_by_weights(
                            last_w=(last_w_self, last_w_remote),
                            new_w=(w_self, w_remote),
                            suffix=(str(self.n_iter_), ))
                        last_w_self, last_w_remote = copy.deepcopy(
                            w_self), copy.deepcopy(w_remote)
                else:
                    raise ValueError(
                        f"Cannot recognize early_stop function: {self.converge_func_name}"
                    )

                LOGGER.info("iter: {},  is_converged: {}".format(
                    self.n_iter_, self.is_converged))
                self.callback_list.on_epoch_end(self.n_iter_)
                self.n_iter_ += 1

                if self.stop_training:
                    break

                if self.is_converged:
                    break

            # Finally reconstruct
            if not self.reveal_every_iter:
                new_w = self.reveal_models(w_self,
                                           w_remote,
                                           suffix=("final", ))
                if new_w is not None:
                    self.model_weights = LinearModelWeights(
                        l=new_w,
                        fit_intercept=self.model_param.init_param.fit_intercept
                    )

        LOGGER.debug(f"loss_history: {self.loss_history}")
        self.set_summary(self.get_model_summary())
Exemplo n.º 15
0
    def _test_spdz(self):
        table_list = []
        table_int_data_x, table_float_data_x = None, None
        table_int_data_y, table_float_data_y = None, None
        if self.local_party.role == "guest":
            table_int_data_x = session.parallelize(
                self.int_data_x,
                include_key=False,
                partition=self.data_partition)
            table_int_data_x = table_int_data_x.mapValues(
                lambda x: np.array([x]))
            table_float_data_x = session.parallelize(
                self.float_data_x,
                include_key=False,
                partition=self.data_partition)
            table_float_data_x = table_float_data_x.mapValues(
                lambda x: np.array([x]))
        else:
            table_int_data_y = session.parallelize(
                self.int_data_y,
                include_key=False,
                partition=self.data_partition)
            table_int_data_y = table_int_data_y.mapValues(
                lambda y: np.array([y]))
            table_float_data_y = session.parallelize(
                self.float_data_y,
                include_key=False,
                partition=self.data_partition)
            table_float_data_y = table_float_data_y.mapValues(
                lambda y: np.array([y]))

        for tensor_type in ["numpy", "table"]:
            table = PrettyTable()
            table.set_style(ORGMODE)
            field_name = [
                "DataType", "One time consumption",
                f"{self.data_num} times consumption", "relative acc",
                "log2 acc", "operations per second"
            ]
            self._summary["field_name"] = field_name
            table.field_names = field_name

            with SPDZ(local_party=self.local_party,
                      all_parties=self.parties) as spdz:
                for op_type in self.op_test_list:
                    start_time = time.time()
                    for epoch in range(self.test_round):
                        LOGGER.info(
                            f"test spdz, tensor_type: {tensor_type}, op_type: {op_type}, epoch: {epoch}"
                        )
                        tag = "_".join([tensor_type, op_type, str(epoch)])
                        spdz.set_flowid(tag)
                        if self.local_party.role == "guest":
                            if tensor_type == "table":
                                if op_type.startswith("int"):
                                    fixed_point_x = TableTensor.from_source(
                                        "int_x_" + tag, table_int_data_x)
                                    fixed_point_y = TableTensor.from_source(
                                        "int_y_" + tag, self.other_party)
                                else:
                                    fixed_point_x = TableTensor.from_source(
                                        "float_x_" + tag, table_float_data_x)
                                    fixed_point_y = TableTensor.from_source(
                                        "float_y_" + tag, self.other_party)
                            else:
                                if op_type.startswith("int"):
                                    fixed_point_x = NumpyTensor.from_source(
                                        "int_x_" + tag, self.int_data_x)
                                    fixed_point_y = NumpyTensor.from_source(
                                        "int_y_" + tag, self.other_party)
                                else:
                                    fixed_point_x = NumpyTensor.from_source(
                                        "float_x_" + tag, self.float_data_x)
                                    fixed_point_y = NumpyTensor.from_source(
                                        "float_y_" + tag, self.other_party)
                        else:
                            if tensor_type == "table":
                                if op_type.startswith("int"):
                                    fixed_point_y = TableTensor.from_source(
                                        "int_y_" + tag, table_int_data_y)
                                    fixed_point_x = TableTensor.from_source(
                                        "int_x_" + tag, self.other_party)
                                else:
                                    fixed_point_y = TableTensor.from_source(
                                        "float_y_" + tag, table_float_data_y)
                                    fixed_point_x = TableTensor.from_source(
                                        "float_x_" + tag, self.other_party)
                            else:
                                if op_type.startswith("int"):
                                    fixed_point_y = NumpyTensor.from_source(
                                        "int_y_" + tag, self.int_data_y)
                                    fixed_point_x = NumpyTensor.from_source(
                                        "int_x_" + tag, self.other_party)
                                else:
                                    fixed_point_y = NumpyTensor.from_source(
                                        "float_y_" + tag, self.float_data_y)
                                    fixed_point_x = NumpyTensor.from_source(
                                        "float_x_" + tag, self.other_party)

                        ret = self.calculate_ret(op_type, tensor_type,
                                                 fixed_point_x, fixed_point_y)

                    total_time = time.time() - start_time
                    self.output_table(op_type, table, tensor_type, total_time,
                                      ret)

            table_list.append(table)

        self.tracker.log_component_summary(self._summary)
        for table in table_list:
            LOGGER.info(table)