def __init__(self, m=100, tol=0.1, m_max=500, m_lin_max=1000, verbose=False): # m_max indicates the number that we calculate binomial coefficients exactly up to. # beyond that we use Stirling approximation. # ------ Class Attributes ----------- self.m = m # default number of binomial coefficients to precompute self.m_max = m_max # An upper bound of the quadratic dependence self.m_lin_max = m_lin_max # An upper bound of the linear dependence. self.verbose = verbose self.lambs = np.linspace(1, self.m, self.m).astype(int) # Corresponds to \alpha = 2,3,4,5,.... for RDP self.alphas = np.linspace(1, self.m, self.m).astype(int) self.RDPs_int = np.zeros_like(self.alphas, float) self.n=0 self.RDPs = [] # analytical CGFs self.coeffs = [] self.RDP_inf = .0 # This is effectively for pure DP. self.logBinomC = utils.get_binom_coeffs(self.m + 1) # The logBinomC is only needed for subsampling mechanisms. self.idxhash = {} # save the index of previously used algorithms self.cache = {} # dictionary to save results from previously seen algorithms self.deltas_cache = {} # dictionary to save results of all discrete derivative path self.evalRDP = lambda x: 0 self.flag = True # a flag indicating whether evalCGF is out of date self.flag_subsample = False # a flag to indicate whether we need to expand the logBinomC. self.tol = tol
def fast_k_subsample_upperbound(func, mm, prob, k): # evaluate the fast k-term approximate upperbound for the subsampled mechanism in proposition 8 # func evaluates the RDP of the base mechanism # mm is alpha, prob is gamma, k is k-term for approximation # log ( (1 - gamma + alpha * gamma)(1 - gamma)^(alpha - 1) + sum_{l=2}^k (alpha choose l) * (1 - gamma)^{alpha - l} gamma^l e^{(l-1)\eps(l)} \ # + \eta(\eps(alpha), alpha, gamma) # \eta(\eps(alpha),alpha,gamma) = (alpha choose {k+1}) * gamma^{k + 1} * e^{k * \eps(alpha)} * (1 - gamma + gamma*e^{\eps(alpha)})^{alpha-k-1} if np.isinf(func(mm)): return np.inf if mm == 1: return 0 def cgf(x): return (x-1) * func(x) log_term_1 = (mm-1) * np.log(1-prob) + np.log(1 - prob + mm * prob) logBinomC = utils.get_binom_coeffs(mm) log_term_2 = [(logBinomC[int(mm),j] + j * np.log(prob) + (mm - j) * np.log(1 - prob) + cgf(j)) for j in range(2,k+1)] log_term_3 = logBinomC[int(mm),k+1]+(k+1) * np.log(prob) + k * func(mm) + (mm - k - 1) * np.log(1 - prob + prob * np.exp(func(mm))) log_term_2.append(log_term_1) log_term_2.append(log_term_3) bound = utils.stable_logsumexp(log_term_2)/(mm-1) return bound
def get_eps(self, delta): # minimize over \lambda if not self.flag: self.build_zeroth_oracle() self.flag = True if delta<0 or delta > 1: print("Error! delta is a probability and must be between 0 and 1") if delta == 0: return self.RDP_inf else: def fun(x): # the input the RDP's \alpha if x <= 1: return np.inf else: return np.log(1 / delta)/(x-1) + self.evalRDP(x) def fun_int(i): # the input is RDP's \alpha in integer if i <= 1 | i >= len(self.RDPs_int): return np.inf else: return np.log(1 / delta) / (i-1) + self.RDPs_int[i - 1] # When do we have computational constraints? # Only when we have subsampled items. # First check if the forward difference is positive at self.m, or if it is infinite while (self.m<self.m_max) and (not np.isposinf(fun(self.m))) and (fun_int(self.m-1)-fun_int(self.m-2) < 0): # If so, double m, expand logBimomC until the forward difference is positive if self.flag_subsample: # The following line is m^2 time. self.logBinomC = utils.get_binom_coeffs(self.m*2+1) # Update deltas_caches for key, val in self.deltas_cache.items(): if type(key) is tuple: func_tmp = key[0] else: func_tmp = key cgf = lambda x: x*func_tmp(x+1) deltas,signs_deltas = utils.get_forward_diffs(cgf,self.m*2) self.deltas_cache[key] = [deltas, signs_deltas] new_alphas = range(self.m + 1, self.m * 2 + 1, 1) self.alphas = np.concatenate((self.alphas, np.array(new_alphas))) # array of integers self.m = self.m * 2 mm = np.max(self.alphas) rdp_int_new = np.zeros_like(self.alphas, float) for key,val in self.cache.items(): idx = self.idxhash[key] rdp = self.RDPs[idx] newarray = np.zeros_like(self.alphas, float) for j in range(2,mm+1,1): newarray[j-1] = rdp(1.0*j) newarray[0]=newarray[1] coeff = self.coeffs[idx] rdp_int_new += newarray * coeff self.cache[key] = newarray self.RDPs_int = rdp_int_new # # update the integer CGF and the cache for each function # rdp_int_new = np.zeros_like(self.RDPs_int) # for key,val in self.cache.items(): # idx = self.idxhash[key] # rdp = self.RDPs[idx] # newarray = np.zeros_like(self.RDPs_int) # for j in range(self.m): # newarray[j] = rdp(1.0*(j+self.m+1)) # # coeff = self.coeffs[idx] # rdp_int_new += newarray * coeff # self.cache[key] = np.concatenate((val, newarray)) # # # update the corresponding quantities # self.RDPs_int = np.concatenate((self.RDPs_int, rdp_int_new)) #self.m = self.m*2 bestint = np.argmin(np.log(1 / delta)/(self.alphas[1:]-1) + self.RDPs_int[1:]) + 1 if bestint == self.m-1: if self.verbose: print('Warning: Reach quadratic upper bound: m_max.') # In this case, we matches the maximum qudaratic upper bound # Fix it by calling O(1) upper bounds and do logarithmic search cur = fun(bestint) while (not np.isposinf(cur)) and fun(bestint-1)-fun(bestint-2) < -1e-8: bestint = bestint*2 cur = fun(bestint) # if bestint > self.m_lin_max: # print('Warning: Reach linear upper bound: m_lin_max.') # return cur results = minimize_scalar(fun, method='Bounded', bounds=[self.m-1, bestint + 2], options={'disp': False}) if results.success: return results.fun else: return None #return fun(bestint) if bestint == 0: if self.verbose: print('Warning: Smallest alpha = 1.') # find the best integer alpha. bestalpha = self.alphas[bestint] results = minimize_scalar(fun, method='Bounded',bounds=[bestalpha-1, bestalpha+1], options={'disp':False}) # the while loop above ensures that bestint+2 is at most m, and also bestint is at least 0. if results.success: return results.fun else: # There are cases when certain \delta is not feasible. # For example, let p and q be uniform the privacy R.V. is either 0 or \infty and unless all \infty # events are taken cared of by \delta, \epsilon cannot be < \infty return -1