示例#1
0
 def compute_half_d(self, data_instances, w, cipher, batch_index,
                    current_suffix):
     if self.use_sample_weight:
         self.half_d = data_instances.mapValues(lambda v: (vec_dot(
             v.features, w.coef_) + w.intercept_ - v.label) * v.weight)
     else:
         self.half_d = data_instances.mapValues(lambda v: vec_dot(
             v.features, w.coef_) + w.intercept_ - v.label)
     return self.half_d
 def compute_forwards(self, data_instances, model_weights):
     if self.use_sample_weight:
         wx = data_instances.mapValues(
             lambda v: (vec_dot(v.features, model_weights.coef_) +
                        model_weights.intercept_) * v.weight)
     else:
         wx = data_instances.mapValues(lambda v: vec_dot(
             v.features, model_weights.coef_) + model_weights.intercept_)
     return wx
 def compute_half_g(self, data_instances, w, cipher, batch_index):
     if self.use_sample_weight:
         half_g = data_instances.mapValues(lambda v: (vec_dot(
             v.features, w.coef_) + w.intercept_) * v.weight)
     else:
         half_g = data_instances.mapValues(
             lambda v: vec_dot(v.features, w.coef_) + w.intercept_)
     encrypt_half_g = cipher[batch_index].encrypt(half_g)
     return half_g, encrypt_half_g
示例#4
0
 def compute_forwards(self, data_instances, model_weights):
     """
     forwards = 1/4 * wx
     """
     # wx = data_instances.mapValues(lambda v: vec_dot(v.features, model_weights.coef_) + model_weights.intercept_)
     if self.use_sample_weight:
         self.forwards = data_instances.mapValues(lambda v: 0.25 * vec_dot(
             v.features, model_weights.coef_) * v.weight)
     else:
         self.forwards = data_instances.mapValues(
             lambda v: 0.25 * vec_dot(v.features, model_weights.coef_))
     return self.forwards
示例#5
0
 def compute_half_d(self, data_instances, w, cipher, batch_index,
                    current_suffix):
     if self.use_sample_weight:
         self.half_d = data_instances.mapValues(
             lambda v: 0.25 * (vec_dot(v.features, w.coef_) + w.intercept_
                               ) * v.weight - 0.5 * v.label * v.weight)
     else:
         self.half_d = data_instances.mapValues(lambda v: 0.25 * (vec_dot(
             v.features, w.coef_) + w.intercept_) - 0.5 * v.label)
     # encrypted_half_d = cipher[batch_index].encrypt(self.half_d)
     # self.fore_gradient_transfer.remote(encrypted_half_d, suffix=current_suffix)
     return self.half_d
示例#6
0
    def compute_loss(self,
                     data_instances,
                     w,
                     n_iter_,
                     batch_index,
                     loss_norm=None):
        """
        Compute hetero-lr loss for:
        loss = (1/N)*∑(log2 - 1/2*ywx + 1/8*(wx)^2), where y is label, w is model weight and x is features
        where (wx)^2 = (Wg * Xg + Wh * Xh)^2 = (Wg*Xg)^2 + (Wh*Xh)^2 + 2 * Wg*Xg * Wh*Xh

        Then loss = log2 - (1/N)*0.5*∑ywx + (1/N)*0.125*[∑(Wg*Xg)^2 + ∑(Wh*Xh)^2 + 2 * ∑(Wg*Xg * Wh*Xh)]

        where Wh*Xh is a table obtain from host and ∑(Wh*Xh)^2 is a sum number get from host.
        """
        current_suffix = (n_iter_, batch_index)
        n = data_instances.count()

        quarter_wx = self.host_forwards[0].join(self.half_d,
                                                lambda x, y: x + y)
        ywx = quarter_wx.join(data_instances, lambda wx, d: wx *
                              (4 * d.label) + 2).reduce(reduce_add)
        # self_wx_square = self.forwards.mapValues(lambda x: np.square(x)).reduce(reduce_add)
        self_wx_square = data_instances.mapValues(lambda v: np.square(
            vec_dot(v.features, w.coef_) + w.intercept_)).reduce(reduce_add)
        half_wx = data_instances.mapValues(
            lambda v: vec_dot(v.features, w.coef_) + w.intercept_)

        loss_list = []
        wx_squares = self.get_host_loss_intermediate(suffix=current_suffix)

        if loss_norm is not None:
            host_loss_regular = self.get_host_loss_regular(
                suffix=current_suffix)
        else:
            host_loss_regular = []

        # for host_idx, host_forward in enumerate(self.host_forwards):
        if len(self.host_forwards) > 1:
            LOGGER.info("More than one host exist, loss is not available")
        else:
            host_forward = self.host_forwards[0]
            wx_square = wx_squares[0]
            wxg_wxh = half_wx.join(
                host_forward, lambda wxg, wxh: wxg * wxh).reduce(reduce_add)
            loss = np.log(2) - 0.5 * (1 / n) * ywx + 0.125 * (1 / n) * \
                   (self_wx_square + wx_square + 2 * wxg_wxh)
            if loss_norm is not None:
                loss += loss_norm
                loss += host_loss_regular[0]
            loss_list.append(loss)
        LOGGER.debug("In compute_loss, loss list are: {}".format(loss_list))
        self.sync_loss_info(loss_list, suffix=current_suffix)
    def compute_mu(self, data_instances, coef_, intercept_=0, exposure=None):
        if exposure is None:
            mu = data_instances.mapValues(
                lambda v: np.exp(vec_dot(v.features, coef_) + intercept_))
        else:
            offset = exposure.mapValues(
                lambda v: BasePoissonRegression.safe_log(v))
            mu = data_instances.join(
                offset, lambda v, m: np.exp(
                    vec_dot(v.features, coef_) + intercept_ + m))

        return mu
示例#8
0
    def predict(self, data_instances):

        self._abnormal_detection(data_instances)
        self.init_schema(data_instances)

        data_instances = self.align_data_header(data_instances, self.header)

        LOGGER.info("Start predict is a one_vs_rest task: {}".format(
            self.need_one_vs_rest))
        if self.need_one_vs_rest:
            predict_result = self.one_vs_rest_obj.predict(data_instances)
            return predict_result

        # predict_wx = self.compute_wx(data_instances, self.model_weights.coef_, self.model_weights.intercept_)
        pred_prob = data_instances.mapValues(lambda v: activation.sigmoid(
            vec_dot(v.features, self.model_weights.coef_) + self.model_weights.
            intercept_))

        predict_result = self.predict_score_to_output(
            data_instances,
            pred_prob,
            classes=[0, 1],
            threshold=self.model_param.predict_param.threshold)

        return predict_result
    def compute_and_aggregate_forwards(self,
                                       data_instances,
                                       model_weights,
                                       encrypted_calculator,
                                       batch_index,
                                       offset=None):
        """
        gradient = (1/N)*∑(1/2*ywx-1)*1/2yx = (1/N)*∑(0.25 * wx - 0.5 * y) * x, where y = 1 or -1
        Define wx as guest_forward or host_forward
        Define (0.25 * wx - 0.5 * y) as fore_gradient

        """

        half_wx = data_instances.mapValues(lambda v: vec_dot(
            v.features, model_weights.coef_) + model_weights.intercept_)
        self.forwards = half_wx
        # LOGGER.debug("half_wx: {}".format(half_wx.take(20)))
        self.aggregated_forwards = encrypted_calculator[batch_index].encrypt(
            half_wx)

        for host_forward in self.host_forwards:
            self.aggregated_forwards = self.aggregated_forwards.join(
                host_forward, lambda g, h: g + h)
        fore_gradient = self.aggregated_forwards.join(
            data_instances, lambda wx, d: 0.25 * wx - 0.5 * d.label)
        return fore_gradient
 def compute_forwards(self, data_instances, model_weights):
     """
     forwards = wx
     """
     wx = data_instances.mapValues(lambda v: vec_dot(
         v.features, model_weights.coef_) + model_weights.intercept_)
     return wx
    def compute_and_aggregate_forwards(self,
                                       data_instances,
                                       model_weights,
                                       encrypted_calculator,
                                       batch_index,
                                       current_suffix,
                                       offset=None):
        '''
        Compute gradients:
        gradient = (1/N) * \sum(exp(wx) - y) * x

        Define exp(wx) as mu, named it as guest_forward or host_forward
        Define (mu-y) as fore_gradient
        Then, gradient = fore_gradient * x

        '''
        if offset is None:
            raise ValueError(
                "Offset should be provided when compute poisson forwards")
        mu = data_instances.join(
            offset, lambda d, m: np.exp(
                vec_dot(d.features, model_weights.coef_) + model_weights.
                intercept_ + m))
        self.forwards = mu

        self.host_forwards = self.get_host_forward(suffix=current_suffix)

        self.aggregated_forwards = self.forwards.join(self.host_forwards[0],
                                                      lambda g, h: g * h)
        fore_gradient = self.aggregated_forwards.join(
            data_instances, lambda mu, d: mu - d.label)
        return fore_gradient
    def compute_loss(self, data_instances, model_weights, encrypted_calculator,
                     optimizer, n_iter_, batch_index, cipher_operator):
        '''
        Compute hetero poisson loss:
            h_loss = sum(exp(mu_h))

        Parameters:
        ___________
        data_instances: DTable, input data

        model_weights: model weight object, stores intercept_ and coef_

        encrypted_calculator: ecnrypted calculator object

        optimizer: optimizer object

        n_iter_: int, current number of iter.

        batch_index: int, use to obtain current encrypted_calculator index

        cipher_operator: cipher for encrypt intermediate loss and loss_regular

        '''
        current_suffix = (n_iter_, batch_index)
        self_wx = data_instances.mapValues(lambda v: vec_dot(
            v.features, model_weights.coef_) + model_weights.intercept_)
        en_wx = encrypted_calculator[batch_index].encrypt(self_wx)
        self.remote_loss_intermediate(en_wx, suffix=current_suffix)

        loss_regular = optimizer.loss_norm(model_weights)
        if loss_regular is not None:
            en_loss_regular = cipher_operator.encrypt(loss_regular)
            self.remote_loss_regular(en_loss_regular, suffix=current_suffix)
    def compute_loss(self,
                     data_instances,
                     model_weights,
                     n_iter_,
                     batch_index,
                     offset,
                     loss_norm=None):
        '''
        Compute hetero poisson loss:
            loss = sum(exp(mu_g)*exp(mu_h) - y(wx_g + wx_h) + log(exposure))

        Parameters:
        ___________
        data_instances: DTable, input data

        model_weights: model weight object, stores intercept_ and coef_

        n_iter_: int, current number of iter.

        batch_index: int, use to obtain current encrypted_calculator index

        offset: log(exposure)

        loss_norm: penalty term, default to None

        '''
        current_suffix = (n_iter_, batch_index)
        n = data_instances.count()
        guest_wx_y = data_instances.join(
            offset, lambda v, m: (vec_dot(v.features, model_weights.coef_) +
                                  model_weights.intercept_ + m, v.label))
        loss_list = []
        host_wxs = self.get_host_loss_intermediate(current_suffix)
        if loss_norm is not None:
            host_loss_regular = self.get_host_loss_regular(
                suffix=current_suffix)
        else:
            host_loss_regular = []

        if len(self.host_forwards) > 1:
            raise ValueError(
                "More than one host exists. Poisson regression does not support multi-host."
            )

        host_mu = self.host_forwards[0]
        host_wx = host_wxs[0]
        loss_wx = guest_wx_y.join(host_wx, lambda g, h: g[1] *
                                  (g[0] + h)).reduce(reduce_add)
        loss_mu = self.forwards.join(host_mu,
                                     lambda g, h: g * h).reduce(reduce_add)
        loss = (loss_mu - loss_wx) / n
        if loss_norm is not None:
            loss = loss + loss_norm + host_loss_regular[0]
        loss_list.append(loss)
        self.sync_loss_info(loss_list, suffix=current_suffix)
 def compute_sqn_forwards(self, data_instances, delta_s, cipher_operator):
     """
     To compute Hessian matrix, y, s are needed.
     g = (1/N)*∑(0.25 * wx - 0.5 * y) * x
     y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(0.25 * x * s) * x
     define forward_hess = ∑(0.25 * x * s)
     """
     sqn_forwards = data_instances.mapValues(
         lambda v: cipher_operator.encrypt(fate_operator.vec_dot(v.features, delta_s.coef_) + delta_s.intercept_))
     # forward_sum = sqn_forwards.reduce(reduce_add)
     return sqn_forwards
示例#15
0
    def predict(self, data_instances):

        self._abnormal_detection(data_instances)
        self.init_schema(data_instances)

        data_instances = self.align_data_header(data_instances, self.header)
        # predict_wx = self.compute_wx(data_instances, self.model_weights.coef_, self.model_weights.intercept_)
        pred_prob = data_instances.mapValues(lambda v: activation.sigmoid(vec_dot(v.features, self.model_weights.coef_)
                                                                          + self.model_weights.intercept_))

        predict_result = self.predict_score_to_output(data_instances, pred_prob, classes=[0, 1],
                                                      threshold=self.model_param.predict_param.threshold)

        return predict_result
示例#16
0
    def predict(self, data_instances):

        LOGGER.info(f'Start predict task')
        self._abnormal_detection(data_instances)
        self.init_schema(data_instances)
        data_instances = self.align_data_header(data_instances, self.header)
        suffix = ('predict', )
        if self.component_properties.has_arbiter:
            pubkey = self.cipher.gen_paillier_pubkey(enable=self.use_encrypt,
                                                     suffix=suffix)
        else:
            if self.use_encrypt:
                raise ValueError(f"In use_encrypt case, arbiter should be set")
            pubkey = None
        if self.use_encrypt:
            self.cipher_operator.set_public_key(pubkey)

            final_model = self.transfer_variable.aggregated_model.get(
                idx=0, suffix=suffix)
            model_weights = LogisticRegressionWeights(final_model.unboxed,
                                                      self.fit_intercept)
            wx = self.compute_wx(data_instances, model_weights.coef_,
                                 model_weights.intercept_)
            self.transfer_variable.predict_wx.remote(wx,
                                                     consts.ARBITER,
                                                     0,
                                                     suffix=suffix)
            predict_result = self.transfer_variable.predict_result.get(
                idx=0, suffix=suffix)
            # predict_result = predict_result.join(data_instances, lambda p, d: [d.label, p, None,
            #                                                                    {"0": None, "1": None}])
            predict_result = predict_result.join(
                data_instances, lambda p, d: Instance(
                    features=[d.label, p, None, {
                        "0": None,
                        "1": None
                    }],
                    inst_id=d.inst_id))
        else:
            pred_prob = data_instances.mapValues(lambda v: activation.sigmoid(
                vec_dot(v.features, self.model_weights.coef_) + self.
                model_weights.intercept_))
            predict_result = self.predict_score_to_output(
                data_instances,
                pred_prob,
                classes=[0, 1],
                threshold=self.model_param.predict_param.threshold)

        return predict_result
示例#17
0
 def compute_forward_hess(self, data_instances, delta_s, host_forwards):
     """
     To compute Hessian matrix, y, s are needed.
     g = (1/N)*∑(0.25 * wx - 0.5 * y) * x
     y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(0.25 * x * s) * x
     define forward_hess = (1/N)*∑(0.25 * x * s)
     """
     forwards = data_instances.mapValues(lambda v: (vec_dot(
         v.features, delta_s.coef_) + delta_s.intercept_) * 0.25)
     for host_forward in host_forwards:
         forwards = forwards.join(host_forward, lambda g, h: g + (h * 0.25))
     # forward_hess = forwards.mapValues(lambda x: 0.25 * x / sample_size)
     hess_vector = self.compute_gradient(data_instances, forwards,
                                         delta_s.fit_intercept)
     return forwards, np.array(hess_vector)
 def compute_forward_hess(self, data_instances, delta_s, host_forwards):
     """
     To compute Hessian matrix, y, s are needed.
     g = (1/N)*∑(wx - y) * x
     y = ∇2^F(w_t)s_t = g' * s = (1/N)*∑(x * s) * x
     define forward_hess = (1/N)*∑(x * s)
     """
     forwards = data_instances.mapValues(lambda v: (vec_dot(
         v.features, delta_s.coef_) + delta_s.intercept_))
     for host_forward in host_forwards:
         forwards = forwards.join(host_forward, lambda g, h: g + h)
     if self.use_sample_weight:
         forwards = forwards.join(data_instances, lambda h, d: h * d.weight)
     hess_vector = self.compute_gradient(data_instances, forwards,
                                         delta_s.fit_intercept)
     return forwards, np.array(hess_vector)
    def compute_and_aggregate_forwards(self,
                                       data_instances,
                                       model_weights,
                                       encrypted_calculator,
                                       batch_index,
                                       current_suffix,
                                       offset=None):
        """
        Compute gradients:
        gradient = (1/N)*\sum(wx -y)*x

        Define wx as guest_forward or host_forward
        Define (wx-y) as fore_gradient
        Parameters
        ----------
        data_instances: DTable of Instance, input data

        model_weights: LinearRegressionWeights
            Stores coef_ and intercept_ of model

        encrypted_calculator: Use for different encrypted methods

        offset: Used in Poisson only.

        batch_index: int, use to obtain current encrypted_calculator index:

        current_suffix: tuple or string. Used in transfer_variable
        """
        wx = data_instances.mapValues(lambda v: vec_dot(
            v.features, model_weights.coef_) + model_weights.intercept_)
        self.forwards = wx
        self.aggregated_forwards = encrypted_calculator[batch_index].encrypt(
            wx)

        self.host_forwards = self.get_host_forward(suffix=current_suffix)

        for host_forward in self.host_forwards:
            self.aggregated_forwards = self.aggregated_forwards.join(
                host_forward, lambda g, h: g + h)
        fore_gradient = self.aggregated_forwards.join(
            data_instances, lambda wx, d: wx - d.label)
        return fore_gradient
示例#20
0
 def compute_wx(self, data_instances, coef_, intercept_=0):
     return data_instances.mapValues(
         lambda v: vec_dot(v.features, coef_) + intercept_)
示例#21
0
 def _vec_dot(v, coef, intercept):
     return fate_operator.vec_dot(v.features, coef) + intercept
示例#22
0
 def compute_half_g(self, data_instances, w, cipher, batch_index):
     half_g = data_instances.mapValues(
         lambda v: vec_dot(v.features, w.coef_) * 0.25 + w.intercept_)
     encrypt_half_g = cipher[batch_index].encrypt(half_g)
     return half_g, encrypt_half_g
示例#23
0
    def compute_loss(self,
                     data_instances,
                     w,
                     n_iter_,
                     batch_index,
                     loss_norm=None,
                     batch_masked=False):
        """
        Compute hetero-lr loss for:
        loss = (1/N)*∑(log2 - 1/2*ywx + 1/8*(wx)^2), where y is label, w is model weight and x is features
        where (wx)^2 = (Wg * Xg + Wh * Xh)^2 = (Wg*Xg)^2 + (Wh*Xh)^2 + 2 * Wg*Xg * Wh*Xh

        Then loss = log2 - (1/N)*0.5*∑ywx + (1/N)*0.125*[∑(Wg*Xg)^2 + ∑(Wh*Xh)^2 + 2 * ∑(Wg*Xg * Wh*Xh)]

        where Wh*Xh is a table obtain from host and ∑(Wh*Xh)^2 is a sum number get from host.
        """
        current_suffix = (n_iter_, batch_index)
        n = data_instances.count()

        # host_wx_y = self.host_forwards[0].join(data_instances, lambda x, y: (x, y.label))
        host_wx_y = data_instances.join(self.host_forwards[0], lambda y, x:
                                        (x, y.label))
        self_wx_y = self.half_d.join(data_instances, lambda x, y: (x, y.label))

        def _sum_ywx(wx_y):
            sum1, sum2 = 0, 0
            for _, (x, y) in wx_y:
                if y == 1:
                    sum1 += x
                else:
                    sum2 -= x
            return sum1 + sum2

        ywx = host_wx_y.applyPartitions(_sum_ywx).reduce(reduce_add) + \
            self_wx_y.applyPartitions(_sum_ywx).reduce(reduce_add)
        ywx = ywx * 4 + 2 * n

        # quarter_wx = self.host_forwards[0].join(self.half_d, lambda x, y: x + y)
        # ywx = quarter_wx.join(data_instances, lambda wx, d: wx * (4 * d.label) + 2).reduce(reduce_add)

        half_wx = data_instances.mapValues(
            lambda v: vec_dot(v.features, w.coef_) + w.intercept_)
        self_wx_square = half_wx.mapValues(lambda v: np.square(v)).reduce(
            reduce_add)

        # self_wx_square = data_instances.mapValues(
        #    lambda v: np.square(vec_dot(v.features, w.coef_) + w.intercept_)).reduce(reduce_add)

        loss_list = []

        wx_squares = self.get_host_loss_intermediate(suffix=current_suffix)
        if batch_masked:
            wx_squares_sum = []
            for square_table in wx_squares:
                square_sum = data_instances.join(
                    square_table,
                    lambda inst, enc_h_squares: enc_h_squares).reduce(
                        lambda x, y: x + y)
                wx_squares_sum.append(square_sum)

            wx_squares = wx_squares_sum

        if loss_norm is not None:
            host_loss_regular = self.get_host_loss_regular(
                suffix=current_suffix)
        else:
            host_loss_regular = []

        # for host_idx, host_forward in enumerate(self.host_forwards):
        if len(self.host_forwards) > 1:
            LOGGER.info("More than one host exist, loss is not available")
        else:
            host_forward = self.host_forwards[0]
            wx_square = wx_squares[0]
            wxg_wxh = half_wx.join(
                host_forward, lambda wxg, wxh: wxg * wxh).reduce(reduce_add)
            loss = np.log(2) - 0.5 * (1 / n) * ywx + 0.125 * (1 / n) * \
                (self_wx_square + wx_square + 8 * wxg_wxh)
            if loss_norm is not None:
                loss += loss_norm
                loss += host_loss_regular[0]
            loss_list.append(loss)
        LOGGER.debug("In compute_loss, loss list are: {}".format(loss_list))
        self.sync_loss_info(loss_list, suffix=current_suffix)
 def compute_forwards(self, data_instances, model_weights):
     mu = data_instances.mapValues(lambda v: np.exp(
         vec_dot(v.features, model_weights.coef_) + model_weights.intercept_
     ))
     return mu