示例#1
0
            def subsample_func(x):  # linear interpolation upper bound
                # This function implements the RDP at alpha = x

                if np.isinf(func(x)):
                    return np.inf
                if prob == 1.0:
                    return func(x)
                if prob == 0:
                    return 0

                epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob)

                if np.isinf(x):
                    return epsinf
                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf,
                                      subsample_func_int(2.0) / (2.0 - 1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x - 1))
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(epsinf,
                                  ((x - xf) * subsample_func_int(xc) +
                                   (1 - (x - xf)) * subsample_func_int(xf)) /
                                  (x - 1))
示例#2
0
    def compose_poisson_subsampled_mechanisms1(self, func, prob, coeff=1.0):
        # This function implements the general amplification bounds for Poisson sampling.
        # No additional assumptions are needed.

        # At the moment, we do not support mixing poisson subsampling and standard subsampling.
        #
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)]
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else: # compute an easy to compute upper bound of it.

            cgf = lambda x:  x*func(x+1)

            def subsample_func_int(x):
                # This function evaluates the CGF at alpha = x, i.e., lamb =  x- 1
                if np.isinf(func(x)):
                    return np.inf
                if prob == 1.0:
                    return func(x)

                mm = int(x)

                fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob)

                if x <= self.alphas[-1]: # compute the bound exactly.
                    moments = [cgf(1) + 2*np.log(prob) + (mm-2) * np.log(1 - prob) + self.logBinomC[mm, 2]]
                    moments = moments + [cgf(j-1+1) +j*np.log(prob) + (mm-j) * np.log(1 - prob)
                               + self.logBinomC[mm, j] for j in range(3,mm+1,1)]

                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments)
                elif mm <= self.m_lin_max:
                    moments = [cgf(1) + 2*np.log(prob) + (mm-2) * np.log(1 - prob) + utils.logcomb(mm, 2)]
                    moments = moments + [cgf(j-1+1) +j*np.log(prob) + (mm-j) * np.log(1 - prob)
                               + utils.logcomb(mm, j) for j in range(3,mm+1,1)]
                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments)
                else:
                    return fastbound


            def subsample_func(x): # linear interpolation upper bound
                epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob)

                if np.isinf(x):
                    return epsinf
                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x-1) )
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(
                    epsinf,
                    ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)
                )

            # book keeping
            self.idxhash[(func, prob)] = self.n # save the index
            self.n += 1 # increment the number of unique mechanisms
            self.coeffs.append(coeff) # Update the coefficient
            self.RDPs.append(subsample_func) # update the analytical functions

            # also update the integer results
            if (func,prob) in self.cache:
                results = self.cache[(func,prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                mm = np.max(self.alphas)  # evaluate the RDP up to order mm

                for alpha in range(2, mm+1):
                    results[alpha-1] = subsample_func_int(alpha)
                results[0] = results[1]  # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func,prob)] = results # save in cache
            self.RDPs_int += results * coeff
        # update the pure DP tracker
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff


# TODO: 1. Modularize the several Poission sampling versions.  2. Support both sampling schemes together.
示例#3
0
    def compose_poisson_subsampled_mechanisms(self, func, prob, coeff=1.0):
        # This function implements the lower bound for subsampled RDP.
        # It is also the exact formula of poission_subsampled RDP for many mechanisms including Gaussian mech.
        #
        # At the moment, we do not support mixing poisson subsampling and standard subsampling.
        # TODO: modify the caching identifies so that we can distinguish different types of subsampling
        #
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)] # TODO: this is really where it needs to be changed.
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else: # compute an easy to compute upper bound of it.

            def cgf(x):
                return x * func(x+1)

            def subsample_func_int(x):
                # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1

                if np.isinf(func(x)):
                    return np.inf

                mm = int(x)
                #
                fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob)

                if x <= self.alphas[-1]: # compute the bound exactly.
                    moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob)
                               + self.logBinomC[mm, j] for j in range(2,mm+1,1)]

                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments)
                elif mm <= self.m_lin_max:
                    moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob)
                               + utils.logcomb(mm,j) for j in range(2,mm+1,1)]
                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)] + moments)
                else:
                    return fastbound

            def subsample_func(x): # linear interpolation upper bound
                # This function implements the RDP at alpha = x

                if np.isinf(func(x)):
                    return np.inf
                if prob == 1.0:
                    return func(x)

                epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob)

                if np.isinf(x):
                    return epsinf
                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x-1) )
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(
                    epsinf,
                    ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)
                )

            # book keeping
            self.idxhash[(func, prob)] = self.n # save the index
            self.n += 1 # increment the number of unique mechanisms
            self.coeffs.append(coeff) # Update the coefficient
            self.RDPs.append(subsample_func) # update the analytical functions

            # also update the integer results, with a vectorized computation.
            # TODO: pre-computing subsampled RDP for integers is error-prone (implement the same thing twice)
            # TODO: and its benefits are not clear. We should consider removing it and simply call the lambda function.
            #
            if (func,prob) in self.cache:
                results = self.cache[(func,prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                mm = np.max(self.alphas)  # evaluate the RDP up to order mm
                jvec = np.arange(2, mm + 1)
                logterm3plus = np.zeros_like(results)  # This saves everything from j=2 to j = m+1
                for j in jvec:
                    logterm3plus[j-2] = cgf(j-1) + j * np.log(prob)  #- np.log(1-prob))

                for alpha in range(2, mm+1):
                    if np.isinf(logterm3plus[alpha-1]):
                        results[alpha-1] = np.inf
                    else:
                        tmp = utils.stable_logsumexp(logterm3plus[0:alpha-1] + self.logBinomC[alpha , 2:(alpha + 1)]
                                               + (alpha+1-jvec[0:alpha-1])*np.log(1-prob))
                        results[alpha-1] = utils.stable_logsumexp_two((alpha-1)*np.log(1-prob)
                                                                    + np.log(1+(alpha-1)*prob), tmp) / (1.0*alpha-1)

                results[0] = results[1]  # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func,prob)] = results # save in cache
            self.RDPs_int += results * coeff
        # update the pure DP tracker
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff
示例#4
0
    def compose_subsampled_mechanism(self, func, prob, coeff=1.0):
        # This function is for subsample without replacements.
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)]
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else:

            def cgf(x):
                return x * func(x+1)
            # we need forward differences of thpe exp(cgf)
            # The following line is the numericall y stable way of implementing it.
            # The output is in polar form with logarithmic magnitude
            deltas, signs_deltas = utils.get_forward_diffs(cgf,self.m)

            #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m)

            #tmp = deltas-deltas1

            self.deltas_cache[(func,prob)] = [deltas,signs_deltas]

            def subsample_func_int(x):
                # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1
                deltas_local, signs_deltas_local = self.deltas_cache[(func,prob)]
                if np.isinf(func(x)):
                    return np.inf

                mm = int(x)

                fastupperbound = fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local)

                if mm <= self.alphas[-1]: # compute the bound exactly. Requires book keeping of O(x^2)

                    moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)),
                                                      np.log(2) + cgf(j-1)),
                                           np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                                           + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob)
                                +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)]

                    return np.minimum(fastupperbound, utils.stable_logsumexp([0]+moments))
                elif mm <= self.m_lin_max:  # compute the bound with stirling approximation. Everything is O(x) now.
                    moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1)
                                                        + np.minimum(cgf(j - 1), np.log(4)), np.log(2)
                                                        + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j)
                    moments = [moment_bound(j) for j in range(2,mm+1,1)]
                    return np.minimum(fastupperbound, utils.stable_logsumexp([0]+ moments))
                else: # Compute the O(1) upper bound
                    return fastupperbound



            def subsample_func(x):
                # This function returns the RDP at alpha = x
                # RDP with the linear interpolation upper bound of the CGF

                epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob)

                if np.isinf(x):
                    return epsinf
                if prob == 1.0:
                    return func(x)

                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x-1) )
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(
                    epsinf,
                    ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)
                )


            # book keeping
            self.idxhash[(func, prob)] = self.n # save the index
            self.n += 1 # increment the number of unique mechanisms
            self.coeffs.append(coeff) # Update the coefficient
            self.RDPs.append(subsample_func) # update the analytical functions

            # also update the integer results up to m_max.
            if (func,prob) in self.cache:
                results = self.cache[(func,prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                # m = np.max(self.lambs)
                mm = np.max(self.alphas)
                for alpha in range(2, mm+1):
                    results[alpha-1] = subsample_func(alpha)
                results[0] = results[1] # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func,prob)] = results # save in cache

            self.RDPs_int += results * coeff
        # update the pure DP
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff
示例#5
0
    def compose_subsampled_mechanism(self,
                                     func,
                                     prob,
                                     coeff=1.0,
                                     improved_bound_flag=False):
        """
            # This function is for subsample without replacements
        :param func:  RDP function of the mechanism before amplification by sampling
        :param prob:  proportion of the data to sample
        :param coeff: number of times the subsampled mechanism is being composed.
        :param improved_bound_flag:
            - If True, then it uses Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf
            - If False (default value), it uses Theorem 9 of https://arxiv.org/pdf/1808.00087.pdf
            To qualify for the improved bound, the mechanism needs to have a pair of neighboring
            datasets that is worst for all Renyi-divergence and Pearson-Vajda divergence;
            Also, the RDP bound needs to be tight (see Definition 26 from the same paper).
            Gaussian mechanism, Laplace mechanism and many others satisfy this condition.

        :return:  nothing  (updates to the RDP accountant's attribute)
        """

        # (find a random subset of proportion prob)
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)]
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else:

            def cgf(x):
                return x * func(x + 1)

            if not improved_bound_flag:

                def subsample_func_int(x):
                    # output the cgf of the subsampled mechanism
                    mm = int(x)
                    eps_inf = func(np.inf)

                    moments_two = 2 * np.log(prob) + utils.logcomb(mm,2) \
                                  + np.minimum(np.log(4) + func(2.0) + np.log(1-np.exp(-func(2.0))),
                                               func(2.0) + np.minimum(np.log(2),
                                                            2 * (eps_inf+np.log(1-np.exp(-eps_inf)))))
                    moment_bound = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
                                                        np.log(2)) + cgf(j - 1) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)
                    moments = [moment_bound(j) for j in range(3, mm + 1, 1)]
                    return np.minimum(
                        (x - 1) * func(x),
                        utils.stable_logsumexp([0, moments_two] + moments))
            else:
                # we need forward differences of exp(cgf)
                # The following line is the numerically stable way of implementing it.
                # The output is in polar form with logarithmic magnitude
                deltas, signs_deltas = utils.get_forward_diffs(cgf, self.m)

                #deltas1, signs_deltas1 = get_forward_diffs_direct(func, self.m)

                #tmp = deltas-deltas1

                self.deltas_cache[(func, prob)] = [deltas, signs_deltas]

                def subsample_func_int(x):
                    # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1
                    deltas_local, signs_deltas_local = self.deltas_cache[(
                        func, prob)]
                    if np.isinf(func(x)):
                        return np.inf

                    mm = int(x)
                    eps_inf = func(np.inf)

                    moments_two = 2 * np.log(prob) + utils.logcomb(mm, 2) \
                                  + np.minimum(
                        np.log(4) + func(2.0) + np.log(1 - np.exp(-func(2.0))),
                        func(2.0) + np.minimum(np.log(2),
                                               2 * (eps_inf + np.log(1 - np.exp(-eps_inf)))))

                    moment_bound = lambda j: np.minimum(np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                                                        + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1],
                                                        np.minimum(j * (eps_inf + np.log(1 - np.exp(-eps_inf))),
                                                                   np.log(2))
                                                        + cgf(j - 1)) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)

                    moment_bound_linear = lambda j: np.minimum(j * (eps_inf + np.log(1-np.exp(-eps_inf))),
                                                        np.log(2)) + cgf(j - 1) \
                                             + j * np.log(prob) + utils.logcomb(mm, j)

                    fastupperbound = fast_subsampled_cgf_upperbound(
                        func, mm, prob, deltas_local)

                    if mm <= self.alphas[
                            -1]:  # compute the bound exactly. Requires book keeping of O(x^2)
                        #
                        # moments = [ np.minimum(np.minimum((j)*np.log(np.exp(func(np.inf))-1) + np.minimum(cgf(j-1),np.log(4)),
                        #                                   np.log(2) + cgf(j-1)),
                        #                        np.log(4) + 0.5*deltas_local[int(2*np.floor(j/2.0))-1]
                        #                        + 0.5*deltas_local[int(2*np.ceil(j/2.0))-1]) + j*np.log(prob)
                        #             +self.logBinomC[int(mm), j] for j in range(2,int(mm+1),1)]
                        moments = [
                            moment_bound(j) for j in range(3, mm + 1, 1)
                        ]

                        return np.minimum(
                            fastupperbound,
                            utils.stable_logsumexp([0, moments_two] + moments))
                    elif mm <= self.m_lin_max:  # compute the bound with stirling approximation. Everything is O(x) now.
                        # moment_bound = lambda j: np.minimum(j * np.log(np.exp(func(np.inf)) - 1)
                        #                                     + np.minimum(cgf(j - 1), np.log(4)), np.log(2)
                        #                                     + cgf(j - 1)) + j * np.log(prob) + utils.logcomb(mm, j)
                        # moments = [moment_bound(j) for j in range(2,mm+1,1)]

                        moments = [
                            moment_bound_linear(j)
                            for j in range(3, mm + 1, 1)
                        ]

                        return np.minimum(
                            fastupperbound,
                            utils.stable_logsumexp([0, moments_two] + moments))
                    else:  # Compute the O(1) upper bound
                        return fastupperbound

            def subsample_func(x):
                # This function returns the RDP at alpha = x
                # RDP with the linear interpolation upper bound of the CGF

                epsinf, tmp = subsample_epsdelta(func(np.inf), 0, prob)

                if np.isinf(x):
                    return epsinf
                if prob == 1.0:
                    return func(x)

                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf,
                                      subsample_func_int(2.0) / (2.0 - 1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x - 1))
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.min([
                    epsinf,
                    func(x),
                    ((x - xf) * subsample_func_int(xc) +
                     (1 - (x - xf)) * subsample_func_int(xf)) / (x - 1)
                ])

            # book keeping
            self.idxhash[(func, prob)] = self.n  # save the index
            self.n += 1  # increment the number of unique mechanisms
            self.coeffs.append(coeff)  # Update the coefficient
            self.RDPs.append(subsample_func)  # update the analytical functions

            # also update the integer results up to m_max.
            if (func, prob) in self.cache:
                results = self.cache[(func, prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                # m = np.max(self.lambs)
                mm = np.max(self.alphas)
                for alpha in range(2, mm + 1):
                    results[alpha - 1] = subsample_func(alpha)
                results[0] = results[
                    1]  # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func, prob)] = results  # save in cache

            self.RDPs_int += results * coeff
        # update the pure DP
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff