示例#1
0
    def __init__(self, m=100, tol=0.1, m_max=500, m_lin_max=1000, verbose=False):
        # m_max indicates the number that we calculate binomial coefficients exactly up to.
        # beyond that we use Stirling approximation.

        # ------ Class Attributes -----------
        self.m = m # default number of binomial coefficients to precompute
        self.m_max = m_max # An upper bound of the quadratic dependence
        self.m_lin_max = m_lin_max # An upper bound of the linear dependence.
        self.verbose = verbose

        self.lambs = np.linspace(1, self.m, self.m).astype(int) # Corresponds to \alpha = 2,3,4,5,.... for RDP

        self.alphas = np.linspace(1, self.m, self.m).astype(int)
        self.RDPs_int = np.zeros_like(self.alphas, float)

        self.n=0
        self.RDPs = [] # analytical CGFs
        self.coeffs = []
        self.RDP_inf = .0 # This is effectively for pure DP.
        self.logBinomC = utils.get_binom_coeffs(self.m + 1)  # The logBinomC is only needed for subsampling mechanisms.
        self.idxhash = {} # save the index of previously used algorithms
        self.cache = {} # dictionary to save results from previously seen algorithms
        self.deltas_cache = {} # dictionary to save results of all discrete derivative path
        self.evalRDP = lambda x: 0
        self.flag = True # a flag indicating whether evalCGF is out of date
        self.flag_subsample = False # a flag to indicate whether we need to expand the logBinomC.
        self.tol = tol
示例#2
0
def fast_k_subsample_upperbound(func, mm, prob, k):
    # evaluate the fast k-term approximate upperbound for the subsampled mechanism in proposition 8
    # func evaluates the RDP of the base mechanism
    # mm is alpha, prob is gamma, k is k-term for approximation
    # log ( (1 - gamma + alpha * gamma)(1 - gamma)^(alpha - 1) + sum_{l=2}^k (alpha choose l) * (1 - gamma)^{alpha - l} gamma^l e^{(l-1)\eps(l)} \
    # + \eta(\eps(alpha), alpha, gamma)
    # \eta(\eps(alpha),alpha,gamma) = (alpha choose {k+1}) * gamma^{k + 1} * e^{k * \eps(alpha)} * (1 - gamma + gamma*e^{\eps(alpha)})^{alpha-k-1}
    if np.isinf(func(mm)):
        return np.inf
    if mm == 1:
        return 0


    def cgf(x):
        return (x-1) * func(x)

    log_term_1 = (mm-1) * np.log(1-prob) + np.log(1 - prob + mm * prob)
    logBinomC = utils.get_binom_coeffs(mm)
    log_term_2 = [(logBinomC[int(mm),j] + j * np.log(prob) + (mm - j) * np.log(1 - prob) + cgf(j)) for j in range(2,k+1)]
    log_term_3 = logBinomC[int(mm),k+1]+(k+1) * np.log(prob) + k * func(mm) + (mm - k - 1) * np.log(1 - prob + prob * np.exp(func(mm)))
    log_term_2.append(log_term_1)
    log_term_2.append(log_term_3)
    bound  = utils.stable_logsumexp(log_term_2)/(mm-1)
    return bound
示例#3
0
    def get_eps(self, delta): # minimize over \lambda
        if not self.flag:
            self.build_zeroth_oracle()
            self.flag = True

        if delta<0 or delta > 1:
            print("Error! delta is a probability and must be between 0 and 1")
        if delta == 0:
            return self.RDP_inf
        else:
            def fun(x): # the input the RDP's \alpha
                if x <= 1:
                    return np.inf
                else:
                    return np.log(1 / delta)/(x-1) + self.evalRDP(x)

            def fun_int(i): # the input is RDP's \alpha in integer
                if i <= 1 | i >= len(self.RDPs_int):
                    return np.inf
                else:
                    return np.log(1 / delta) / (i-1) + self.RDPs_int[i - 1]


            # When do we have computational constraints?
            # Only when we have subsampled items.

            # First check if the forward difference is positive at self.m, or if it is infinite
            while (self.m<self.m_max) and (not np.isposinf(fun(self.m))) and (fun_int(self.m-1)-fun_int(self.m-2) < 0):
                # If so, double m, expand logBimomC until the forward difference is positive


                if self.flag_subsample:

                    # The following line is m^2 time.
                    self.logBinomC = utils.get_binom_coeffs(self.m*2+1)

                    # Update deltas_caches
                    for key, val in self.deltas_cache.items():
                        if type(key) is tuple:
                            func_tmp = key[0]
                        else:
                            func_tmp = key
                        cgf = lambda x:  x*func_tmp(x+1)
                        deltas,signs_deltas = utils.get_forward_diffs(cgf,self.m*2)

                        self.deltas_cache[key] = [deltas, signs_deltas]

                new_alphas = range(self.m + 1, self.m * 2 + 1, 1)
                self.alphas = np.concatenate((self.alphas, np.array(new_alphas)))  # array of integers
                self.m = self.m * 2

                mm = np.max(self.alphas)

                rdp_int_new = np.zeros_like(self.alphas, float)

                for key,val in self.cache.items():
                    idx = self.idxhash[key]
                    rdp = self.RDPs[idx]
                    newarray = np.zeros_like(self.alphas, float)
                    for j in range(2,mm+1,1):
                        newarray[j-1] = rdp(1.0*j)
                    newarray[0]=newarray[1]
                    coeff = self.coeffs[idx]
                    rdp_int_new += newarray * coeff
                    self.cache[key] = newarray

                self.RDPs_int = rdp_int_new

                # # update the integer CGF and the cache for each function
                # rdp_int_new = np.zeros_like(self.RDPs_int)
                # for key,val in self.cache.items():
                #     idx = self.idxhash[key]
                #     rdp = self.RDPs[idx]
                #     newarray = np.zeros_like(self.RDPs_int)
                #     for j in range(self.m):
                #         newarray[j] = rdp(1.0*(j+self.m+1))
                #
                #     coeff = self.coeffs[idx]
                #     rdp_int_new += newarray * coeff
                #     self.cache[key] = np.concatenate((val, newarray))
                #
                # # update the corresponding quantities
                # self.RDPs_int = np.concatenate((self.RDPs_int, rdp_int_new))

                #self.m = self.m*2

            bestint = np.argmin(np.log(1 / delta)/(self.alphas[1:]-1) + self.RDPs_int[1:]) + 1

            if bestint == self.m-1:
                if self.verbose:
                    print('Warning: Reach quadratic upper bound: m_max.')
                # In this case, we matches the maximum qudaratic upper bound
                # Fix it by calling O(1) upper bounds and do logarithmic search
                cur = fun(bestint)
                while (not np.isposinf(cur)) and fun(bestint-1)-fun(bestint-2) < -1e-8:
                    bestint = bestint*2
                    cur = fun(bestint)
#                    if bestint > self.m_lin_max:
#                        print('Warning: Reach linear upper bound: m_lin_max.')
#                        return cur

                results = minimize_scalar(fun, method='Bounded', bounds=[self.m-1, bestint + 2],
                                          options={'disp': False})
                if results.success:
                    return results.fun
                else:
                    return None
                #return fun(bestint)

            if bestint == 0:
                if self.verbose:
                    print('Warning: Smallest alpha = 1.')

            # find the best integer alpha.
            bestalpha = self.alphas[bestint]

            results = minimize_scalar(fun,  method='Bounded',bounds=[bestalpha-1, bestalpha+1],
                                      options={'disp':False})
            # the while loop above ensures that bestint+2 is at most m, and also bestint is at least 0.
            if results.success:
                return results.fun
            else:
                # There are cases when certain \delta is not feasible.
                # For example, let p and q be uniform the privacy R.V. is either 0 or \infty and unless all \infty
                # events are taken cared of by \delta, \epsilon cannot be < \infty
                return -1