Exemplo n.º 1
0
def fast_poission_subsampled_cgf_upperbound(func, mm, prob):
    # evaulate the fast CGF bound for the subsampled mechanism
    # func evaluates the RDP of the base mechanism
    # mm is alpha.  NOT lambda.

    if np.isinf(func(mm)):
        return np.inf
    if mm == 1:
        return 0
    # Bound #1:   log [ (1-\gamma + \gamma e^{func(mm)})^mm ]
    bound1  = mm * utils.stable_logsumexp_two(np.log(1-prob), np.log(prob) + func(mm))

    # Bound #2:   log [  (1-gamma)^alpha E  [ 1 +  gamma/(1-gamma) E[p/q]]^mm ]
    #     log[  (1-gamma)^\alpha    { 1 + alpha gamma / (1-gamma)  + gamma^2 /(1-gamma)^2 * alpha(alpha-1) /2 e^eps(2))
    #  + alpha \choose 3 * gamma^3 / (1-gamma)^3  / e^(-2 eps(alpha)) * (1 + gamma /(1-gamma) e^{eps(alpha)}) ^ (alpha - 3) }
    # ]
    if mm >= 3:
        bound2 = utils.stable_logsumexp([mm * np.log(1-prob), (mm-1) * np.log(1-prob) + np.log(mm) + np.log(prob),
                                     (mm-2)*np.log(1-prob) + 2 * np.log(prob) + np.log(mm) + np.log(mm-1) + func(2),
                                     np.log(mm) + np.log(mm-1) + np.log(mm-2) - np.log(3*2) + 3 * np.log(prob)
                                     + (mm-3)*np.log(1-prob) + 2 * func(mm) +
                                     (mm-3) * utils.stable_logsumexp_two(0, np.log(prob) - np.log(1-prob) + func(mm))])
    else:
        bound2 = bound1

    #print('www={} func={} mm={}'.format(np.exp(func(mm))-1),func, mm)
    #print('bound1 ={} bound2 ={}'.format(bound1,bound2))
    return np.minimum(bound1,bound2)
Exemplo n.º 2
0
            def fun(y):
                if y == 0:
                    if x == 1:
                        return 0
                    else:
                        return np.inf
                elif y == 1:
                    if x == 0:
                        return 0
                    else:
                        return np.inf

                diff1 = (utils.stable_logsumexp_two(
                    alpha * np.log(x) + (1 - alpha) * np.log(1 - y),
                    alpha * np.log(1 - x) + (1 - alpha) * np.log(y)) - rho *
                         (alpha - 1))
                diff2 = (utils.stable_logsumexp_two(
                    alpha * np.log(y) + (1 - alpha) * np.log(1 - x),
                    alpha * np.log(1 - y) + (1 - alpha) * np.log(x)) - rho *
                         (alpha - 1))
                if alpha > 1:
                    return np.maximum(diff1, diff2)
                else:  # alpha < 1
                    # Notice that the sign of the inequality is toggled
                    return np.minimum(diff1, diff2)
Exemplo n.º 3
0
def fast_k_subsample_upperbound(func, mm, prob, k):
    """

     :param func:
     :param mm:
     :param prob: sample probability
     :param k: approximate term
     :return: k-term approximate upper bound in therorem 11 in ICML-19
     """
    def cgf(x):
        return (x - 1) * func(x)

    if np.isinf(func(mm)):
        return np.inf
    if mm == 1:
        return 0
    #logBin = utils.get_binom_coeffs(mm)
    cur_k = np.minimum(k, mm - 1)
    if (2 * cur_k) >= mm:
        exact_term_1 = (mm - 1) * np.log(1 - prob) + np.log(mm * prob - prob +
                                                            1)
        exact_term_2 = [
            np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) +
            l * np.log(prob) + cgf(l) for l in range(2, mm + 1)
        ]
        exact_term_2.append(exact_term_1)
        bound = utils.stable_logsumexp(exact_term_2)
        return bound

    s, mag1 = utils.stable_log_diff_exp(0, -func(mm - cur_k))
    new_log_term_1 = np.log(1 - prob) * mm + mag1
    new_log_term_2 = -func(mm - cur_k) + mm * utils.stable_logsumexp_two(
        np.log(1 - prob),
        np.log(prob) + func(mm - cur_k))
    new_log_term_3 = [
        np.log(scipy.special.comb(mm, l)) + (mm - l) * np.log(1 - prob) +
        l * np.log(prob) + utils.stable_log_diff_exp(
            (l - 1) * func(mm - cur_k), cgf(l))[1]
        for l in range(2, cur_k + 1)
    ]
    if len(new_log_term_3) > 0:
        new_log_term_3 = utils.stable_logsumexp(new_log_term_3)
    else:
        return utils.stable_logsumexp_two(new_log_term_1, new_log_term_2)
    new_log_term_4 = [
        np.log(scipy.special.comb(mm, mm - l)) + (mm - l) * np.log(1 - prob) +
        l * np.log(prob) +
        utils.stable_log_diff_exp(cgf(l), (l - 1) * func(mm - cur_k))[1]
        for l in range(mm - cur_k + 1, mm + 1)
    ]
    new_log_term_4.append(new_log_term_1)
    new_log_term_4.append(new_log_term_2)
    new_log_term_4 = utils.stable_logsumexp(new_log_term_4)
    s, new_log_term_5 = utils.stable_log_diff_exp(new_log_term_4,
                                                  new_log_term_3)
    new_bound = new_log_term_5
    return new_bound
Exemplo n.º 4
0
    def qua(y):

        new_y = y * 1.0 / (1 - y**2)
        stable = utils.stable_logsumexp_two(
            np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2),
            np.log(1 - gamma))
        exp_term = np.exp(1.0j * stable * t)
        density_term = utils.stable_logsumexp_two(
            -new_y**2 / (2 * sigma**2) + np.log(1 - gamma),
            -(new_y - 1)**2 / (2 * sigma**2) + np.log(gamma))
        inte_function = np.exp(density_term) * exp_term
        return inte_function
Exemplo n.º 5
0
 def fun(logx):
     if np.isneginf(logx):
         output = np.log(delta) - fun1(logx)
         return output, output
     else:
         log_neg_grad_l, log_neg_grad_h = fun2(logx)
         log_one_minus_f = fun1(logx)
         low = utils.stable_logsumexp_two(
             log_neg_grad_l + logx, np.log(delta)) - log_one_minus_f
         high = utils.stable_logsumexp_two(
             log_neg_grad_h + logx, np.log(delta)) - log_one_minus_f
         return low, high
Exemplo n.º 6
0
def RDP_independent_noisy_screen(params, alpha):
    """
    return the approximation of data-independent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN)
    The exact data-independent bound requires searching a max_count from [k/c, k] to maximize RDP_noisy_screening for any alpha

    """
    threshold = params['thresh']
    k = params['k']
    sigma = params['sigma']
    import scipy.stats
    rdp = []

    for adjacent in [+1, -1]:
        for max_count in [int(k / 10), threshold]:

            logp = scipy.stats.norm.logsf(threshold - max_count, scale=sigma)
            logq = scipy.stats.norm.logsf(threshold - max_count + adjacent,
                                          scale=sigma)
            log1q = _log1mexp(logq)
            log1p = _log1mexp(logp)
            if alpha == 1:
                return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * (
                    log1p - log1q)
            elif np.isinf(alpha):
                return np.abs(np.exp(logp - logq) * 1.0)

            term1 = alpha * logp - (alpha - 1) * logq
            term2 = alpha * log1p - (alpha - 1) * log1q
            log_term = utils.stable_logsumexp_two(term1, term2)
            rdp.append(log_term)
    log_term = np.max(log_term)
    return 1.0 * log_term / (alpha - 1)
Exemplo n.º 7
0
def RDP_gaussian_svt_c1(params, alpha):
    """
    The RDP of the gaussian-based SVT with the cut-off parameter c=1.

    The detailed algorithm is described in Theorem 8 in
    https://papers.nips.cc/paper/2020/file/e9bf14a419d77534105016f5ec122d62-Paper.pdf/

    Args:
        k:the maximum length before svt stops
        sigma: noise added to the threshold.
        c: the cut-off parameter in SVT.
    """
    sigma = params['sigma']
    k = params['k']
    margin = params['margin']
    c = 1

    rdp_rho = 0.5 / (sigma**2) * alpha

    ret_rdp = np.log(k) / (alpha - 1) + rdp_rho * 2
    if alpha == 1:
        return ret_rdp * c
    ################ Implement corollary 15 in NeurIPS-20
    inside_part = np.log(2 * np.sqrt(3) * math.pi * (1 + 9 * margin**2 /
                                                     (sigma**2)))
    moment_term = utils.stable_logsumexp_two(
        0, inside_part + margin**2 * 1.0 / (sigma**2))
    moment_term = moment_term / (2.0 * (alpha - 1))
    moment_based = moment_term + rdp_rho * 2

    return min(moment_based, ret_rdp) * c
Exemplo n.º 8
0
def RDP_gaussian_svt_c1(params, alpha):
    """
    This is for gaussian-svt with c=1
    k is the maximum length before svt stops
    :param params:
    :param alpha:
    :return:
    """
    sigma = params['sigma']
    k = params['k']
    margin = params['margin']
    c = 1

    rdp_rho = 0.5 / (sigma**2) * alpha

    ret_rdp = np.log(k) / (alpha - 1) + rdp_rho * 2
    if alpha == 1:
        return ret_rdp * c
    ################ Implement corollary 15 in NeurIPS-20
    inside_part = np.log(2 * np.sqrt(3) * math.pi * (1 + 9 * margin**2 /
                                                     (sigma**2)))
    moment_term = utils.stable_logsumexp_two(
        0, inside_part + margin**2 * 1.0 / (sigma**2))
    moment_term = moment_term / (2.0 * (alpha - 1))
    moment_based = moment_term + rdp_rho * 2

    return min(moment_based, ret_rdp) * c
Exemplo n.º 9
0
 def qua(y):
     new_y = y * 1.0 / (1 - y**2)
     phi_result = -1.0 * utils.stable_logsumexp_two(
         np.log(gamma) + (2 * new_y - 1) /
         (2 * sigma**2), np.log(1 - gamma))
     phi_result = np.exp(phi_result * 1.0j * t)
     inte_function = phi_result * np.exp(-new_y**2 / (2 * sigma**2))
     return inte_function
Exemplo n.º 10
0
def phi_laplace(params, t):
    """
    The closed-form log phi-function fof  Laplace mechanism.
    Args:
        t: the order of the characteristic function.
        b: the parameter for Laplace mechanism.

    Return:
        The log phi-function of Laplace mechanism.
    """
    b = params['b']

    term_1 = 1.j * t / b
    term_2 = -(1.j * t + 1) / b

    result = utils.stable_logsumexp_two(
        utils.stable_logsumexp_two(0, np.log(1. / (2 * t * 1.j + 1))) + term_1,
        np.log(2 * t * 1.j) - np.log(2 * t * 1.j + 1) + term_2)
    return result + np.log(0.5)
Exemplo n.º 11
0
    def rdp_int(x):
        if x == np.inf:
            return eps
        s, mag = utils.stable_log_diff_exp(eps,0)
        s, mag2 = utils.stable_log_diff_exp(eps2,0)

        s, mag3 = utils.stable_log_diff_exp(x*utils.stable_logsumexp_two(np.log(1-prob),np.log(prob)+eps),
                                            np.log(x) + np.log(prob) + mag)
        s, mag4 = utils.stable_log_diff_exp(mag3, np.log(1.0*x/2)+np.log(x-1)+2*np.log(prob)
                                            + np.log( np.exp(2*mag) - np.exp(np.min([mag,2*mag,mag2]))))

        return 1/(x-1)*mag4
Exemplo n.º 12
0
def general_upperbound(func, mm, prob):
    """

    :param func:
    :param mm: alpha in RDP
    :param prob: sample probability
    :return: the upperbound in theorem 1 in 2019 ICML,could be applied for general case(including poisson distribution)
    k_approx = 100 k approximation is applied here
    """
    def cgf(x):
        return (x - 1) * func(x)

    if np.isinf(func(mm)):
        return np.inf
    if mm == 1 or mm == 0:
        return 0

    cur_k = np.minimum(
        50, mm - 1
    )  # choose small k-approx for general upperbound (here is 50) in case of scipy-accuracy
    log_term_1 = mm * np.log(1 - prob)
    #logBin = utils.get_binom_coeffs(mm)
    log_term_2 = np.log(3) - func(mm) + mm * utils.stable_logsumexp_two(
        np.log(1 - prob),
        np.log(prob) + func(mm))
    neg_term_3 = [
        np.log(scipy.special.comb(mm, l)) + np.log(3) +
        (mm - l) * np.log(1 - prob) + l * np.log(prob) +
        utils.stable_log_diff_exp((l - 1) * func(mm), cgf(l))[1]
        for l in range(3, cur_k + 1)
    ]
    neg_term_4 = np.log(mm * (mm - 1) / 2) + 2 * np.log(prob) + (
        mm - 2) * np.log(1 - prob) + utils.stable_log_diff_exp(
            np.log(3) + func(mm), func(2))[1]
    neg_term_5 = np.log(2) + np.log(prob) + np.log(mm) + (mm -
                                                          1) * np.log(1 - prob)
    neg_term_6 = mm * np.log(1 - prob) + np.log(3) - func(mm)
    pos_term = utils.stable_logsumexp([log_term_1, log_term_2])
    neg_term_3.append(neg_term_4)
    neg_term_3.append(neg_term_5)
    neg_term_3.append(neg_term_6)
    neg_term = utils.stable_logsumexp(neg_term_3)
    bound = utils.stable_log_diff_exp(pos_term, neg_term)[1]
    return bound
Exemplo n.º 13
0
def RDP_laplace(params, alpha):
    """
    :param params:
        'b' --- is the is the ratio of the scale parameter and L1 sensitivity
    :param alpha: The order of the Renyi Divergence
    :return: Evaluation of the RDP's epsilon
    """

    b = params['b']
    # assert(b > 0)
    # assert(alpha >= 0)
    alpha=1.0*alpha
    if alpha <= 1:
        return (1 / b + np.exp(-1 / b) - 1)
    elif np.isinf(alpha):
        return 1/b
    else:  # alpha > 1
        return utils.stable_logsumexp_two((alpha-1.0) / b + np.log(alpha / (2.0 * alpha - 1)),
                                           -1.0*alpha / b + np.log((alpha-1.0) / (2.0 * alpha - 1)))/(alpha-1)
Exemplo n.º 14
0
                def fun(x):  # the input the RDP's \alpha
                    if x <= 1:
                        return np.inf
                    else:

                        if naive:
                            return np.log(1 / delta) / (x - 1) + rdp(x)
                        bbghs = np.maximum(
                            rdp(x) + np.log((x - 1) / x) -
                            (np.log(delta) + np.log(x)) / (x - 1), 0)
                        """
                        The following is for optimal conversion
                        1/(alpha -1 )log(e^{(alpha-1)*rdp -1}/(alpha*delta) +1 )
                        """
                        sign, term_1 = utils.stable_log_diff_exp(
                            (x - 1) * rdp(x), 0)
                        result = utils.stable_logsumexp_two(
                            term_1 - np.log(x) - np.log(delta), 0)
                        return min(result * 1.0 / (x - 1), bbghs)
Exemplo n.º 15
0
def RDP_randresponse(params, alpha):
    """
    :param params:
        'p' --- is the Bernoulli probability p of outputting the truth
    :param alpha: The order of the Renyi Divergence
    :return: Evaluation of the RDP's epsilon
    """

    p = params['p']
    assert((p >= 0) and (p <= 1))
    # assert(alpha >= 0)
    if p == 1 or p == 0:
        return np.inf
    if alpha <= 1:
        return (2 * p - 1) * np.log(p / (1 - p))
    elif np.isinf(alpha):
        return np.abs(np.log((1.0*p/(1-p))))
    else:  # alpha > 1
        return utils.stable_logsumexp_two(alpha * np.log(p) + (1 - alpha) * np.log(1 - p),
                                           alpha * np.log(1 - p) + (1 - alpha) * np.log(p))/(alpha-1)
Exemplo n.º 16
0
def RDP_noisy_screen(params, alpha):
    """
    return the data-dependent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN)
    param logp, logq: the log of p and q, where p is probability of P(max_vote + noise > Threshold)

    """
    logp = params['logp']
    logq = params['logq']
    log1q = _log1mexp(logq)
    log1p = _log1mexp(logp)
    if alpha == 1:
        return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * (log1p -
                                                                    log1q)
    elif np.isinf(alpha):
        return np.abs(np.exp(logp - logq) * 1.0)

    term1 = alpha * logp - (alpha - 1) * logq
    term2 = alpha * log1p - (alpha - 1) * log1q
    log_term = utils.stable_logsumexp_two(term1, term2)

    return 1.0 * log_term / (alpha - 1)
Exemplo n.º 17
0
def RDP_independent_noisy_screen(params, alpha):
    """
    The data-independent RDP of ``Noisy Screening" (Theorem 7 in Private-kNN).

    The method is described in https://openaccess.thecvf.com/content_CVPR_2020/html/Zhu_Private-kNN_Practical_Differential_Privacy_for_Computer_Vision_CVPR_2020_paper.html)
    The exact data-independent bound requires searching a max_count from [k/c, k] to maximize RDP_noisy_screening for any alpha

    Args:
        params: contains three parameters. params['thresh'] is the threshold for noisy screening,
        k is the number of neighbors in Private-kNN, sigma is the noisy scale.

    Returns:
        The RDP of data-independent noisy screening.
    """
    threshold = params['thresh']
    k = params['k']
    sigma = params['sigma']
    import scipy.stats
    rdp = []

    for adjacent in [+1, -1]:
        for max_count in [int(k / 10), threshold]:

            logp = scipy.stats.norm.logsf(threshold - max_count, scale=sigma)
            logq = scipy.stats.norm.logsf(threshold - max_count + adjacent,
                                          scale=sigma)
            log1q = _log1mexp(logq)
            log1p = _log1mexp(logp)
            if alpha == 1:
                return np.exp(logp) * (logp - logq) + (1 - np.exp(logp)) * (
                    log1p - log1q)
            elif np.isinf(alpha):
                return np.abs(np.exp(logp - logq) * 1.0)

            term1 = alpha * logp - (alpha - 1) * logq
            term2 = alpha * log1p - (alpha - 1) * log1q
            log_term = utils.stable_logsumexp_two(term1, term2)
            rdp.append(log_term)
    log_term = np.max(log_term)
    return 1.0 * log_term / (alpha - 1)
Exemplo n.º 18
0
def RDP_svt_laplace(params, alpha):
    """
    The RDP of Laplace-based SVT.
    Args:
        params['b']: the Laplace noise scale divide the sensitivity.
        params['k']: the SVT algorithm stops either k queries is achieved
        or the cut-off c is achieved.

    """
    b = params['b']
    k = params[
        'k']  # the algorithm stops either k is achieved or c is achieved
    c = max(params['c'], 1)

    alpha = 1.0 * alpha
    if alpha <= 1:
        eps_1 = (1 / b + np.exp(-1 / b) - 1)
    elif np.isinf(alpha):
        eps_1 = 1 / b
    else:  # alpha > 1
        eps_1 = utils.stable_logsumexp_two(
            (alpha - 1.0) / b + np.log(alpha / (2.0 * alpha - 1)),
            -1.0 * alpha / b + np.log(
                (alpha - 1.0) / (2.0 * alpha - 1))) / (alpha - 1)

    eps_2 = 1 / b  # infinity rdp on nu
    c_log_n_c = c * np.log(k / c)
    tilde_eps = eps_2 * (c + 1)  # eps_infinity
    ret_rdp = min(c * eps_2 + eps_1,
                  c_log_n_c * 1.0 / (alpha - 1) + eps_1 * (c + 1))
    ret_rdp = min(ret_rdp, 0.5 * alpha * tilde_eps**2)
    if np.isinf(alpha) or alpha == 1:
        return ret_rdp
    # The following is sinh-based method
    tilde_eps = eps_2 * (c + 1)
    cdp_bound = np.sinh(alpha * tilde_eps) - np.sinh((alpha - 1) * tilde_eps)
    cdp_bound = cdp_bound / np.sinh(tilde_eps)
    cdp_bound = 1.0 / (alpha - 1) * np.log(cdp_bound)
    return min(ret_rdp, cdp_bound)
Exemplo n.º 19
0
def RDP_svt_laplace(params, alpha):
    """
    Laplace-SVT (via RDP), used in NeurIPS-20
    :param b is the noise scale for rho
    :param params:
    :param alpha:
    :return:
    """
    b = params['b']
    k = params[
        'k']  # the algorithm stops either k is achieved or c is achieved
    c = max(params['c'], 1)

    alpha = 1.0 * alpha
    if alpha <= 1:
        eps_1 = (1 / b + np.exp(-1 / b) - 1)
    elif np.isinf(alpha):
        eps_1 = 1 / b
    else:  # alpha > 1
        eps_1 = utils.stable_logsumexp_two(
            (alpha - 1.0) / b + np.log(alpha / (2.0 * alpha - 1)),
            -1.0 * alpha / b + np.log(
                (alpha - 1.0) / (2.0 * alpha - 1))) / (alpha - 1)

    eps_2 = 1 / b  # infinity rdp on nu
    c_log_n_c = c * np.log(k / c)
    tilde_eps = eps_2 * (c + 1)  # eps_infinity
    ret_rdp = min(c * eps_2 + eps_1,
                  c_log_n_c * 1.0 / (alpha - 1) + eps_1 * (c + 1))
    ret_rdp = min(ret_rdp, 0.5 * alpha * tilde_eps**2)
    if np.isinf(alpha) or alpha == 1:
        return ret_rdp
    # The following is sinh-based method
    tilde_eps = eps_2 * (c + 1)
    cdp_bound = np.sinh(alpha * tilde_eps) - np.sinh((alpha - 1) * tilde_eps)
    cdp_bound = cdp_bound / np.sinh(tilde_eps)
    cdp_bound = 1.0 / (alpha - 1) * np.log(cdp_bound)
    return min(ret_rdp, cdp_bound)
Exemplo n.º 20
0
 def diff2_general(logx, u):
     return (utils.stable_logsumexp_two(
         alpha * np.log(1 - np.exp(u)) +
         (1 - alpha) * np.log(1 - np.exp(logx)), alpha * u +
         (1 - alpha) * logx) - rho * (alpha - 1))
Exemplo n.º 21
0
def phi_subsample_gaussian_p(params,
                             t,
                             phi_min=False,
                             phi_max=False,
                             L=20,
                             N=1e4):
    """
    The log phi-function of the poisson subsample Gaussian mechanism.

    The phi-function of the subsample Gaussian is not symmetric. This function implements the log phi-function
    phi(t) := E_p e^{t log(p)/log(q)}.
    We provide two approaches to approximate phi-function.
    Approach 1: we provide valid upper and lower bounds by setting phi_max or phi_min to be True. This discretization-based
    approach will truncate and discretize the output space.
    Approach 2: (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian
    quadrature directly. We recommend
    Args:
        phi_min: if True, the function provide the lower bound approximation of delta(epsilon).
        phi_max: if True, the function provide the upper bound approximation of delta(epsilon).
        if not phi_min and not phi_max, the function provide gaussian quadrature approximation.
        gamma: the sampling ratio.
        sigma: the std of the noise divide by the l2 sensitivity.
        L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins.
    """

    sigma = params['sigma']
    gamma = params['gamma']
    """
    The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)} 
    Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we 
    first convert y (the integral in Gaussian quadrature) to new_y.
    The privacy loss R.V. log(p/q) = log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma)
    """
    def qua(y):

        new_y = y * 1.0 / (1 - y**2)
        stable = utils.stable_logsumexp_two(
            np.log(gamma) + (2 * new_y - 1) / (2 * sigma**2),
            np.log(1 - gamma))
        exp_term = np.exp(1.0j * stable * t)
        density_term = utils.stable_logsumexp_two(
            -new_y**2 / (2 * sigma**2) + np.log(1 - gamma),
            -(new_y - 1)**2 / (2 * sigma**2) + np.log(gamma))
        inte_function = np.exp(density_term) * exp_term
        return inte_function

    # inte_f implements the integration over an infinite intervals.
    # int_-infty^infty f(x)dx = int_-1^1 f(y/(1-y^2)) * (1 + y**2) / ((1 - y ** 2) ** 2).
    inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2)

    if not phi_min and not phi_max:
        # Double quadrature: res computes the phi-function using Gaussian quadrature.
        res = integrate.quadrature(inte_f,
                                   -1.0,
                                   1.0,
                                   tol=1e-15,
                                   rtol=1e-15,
                                   maxiter=100)
        result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma)
        return result
    """
    Return the lower and upper bound approximation
    """
    N = int(N)
    dx = 2.0 * L / N
    y = np.linspace(-L, L - dx, N, dtype=np.complex128)  # represent y
    stable = utils.stable_logsumexp_two(
        np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma))

    if phi_min:
        # return the left riemann stable (lower bound of the privacy guarantee).
        stable = [
            min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable))
        ]
    elif phi_max:
        stable = [
            max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable))
        ]

    stable_1 = utils.stable_logsumexp(1.0j * stable * t - (y - 1)**2 /
                                      (2 * sigma**2)) + np.log(gamma) - np.log(
                                          np.sqrt(2 * np.pi) * sigma)
    stable_2 = utils.stable_logsumexp(1.0j * stable * t - y**2 /
                                      (2 * sigma**2)) + np.log(
                                          1 - gamma) - np.log(
                                              np.sqrt(2 * np.pi) * sigma)
    p_y = utils.stable_logsumexp_two(stable_1, stable_2)
    result = p_y + np.log(dx)

    return result
Exemplo n.º 22
0
def phi_subsample_gaussian_q(params,
                             t,
                             phi_min=False,
                             phi_max=False,
                             L=20,
                             N=1e4):
    """
    The phi-function of the privacy loss R.V. log(q)/log(p), i.e., phi(t) := E_q e^{t log(q)/log(p)}.

    We provide two approaches to approximate the phi-function.
    In the first approach, we provide valid upper and lower bounds by setting phi_max or phi_min to be True.
    In the second approach (both phi_min = False and phi_max = False), we compute the phi-function using Gaussian
    quadrature directly. We recommend the second approach as it's more efficient.

    Args:
        phi_min: if True, the function provide the lower bound approximation of delta(epsilon).
        phi_max: if True, the function provide the upper bound approximation of delta(epsilon).
        if not phi_min and not phi_max, the function provide gaussian quadrature approximation.
        gamma: the sampling ratio.
        sigma: the std of the noise divide by the l2 sensitivity.
        In the approximation (first approach), we first truncate the 1-dim output space to [-L, L]
         and then divide it into N points.
        L, N: used in Approach 1, truncates the output space into [-L, L] and discretize it into N bins.

    Returns:
        The phi-function evaluated at the t-th order.
    """

    sigma = params['sigma']
    gamma = params['gamma']
    """
    The qua function (used for Double Quadrature method) computes e^{it log(p)/log(q)}. 
    Gaussian quadrature requires the integration interval is [-1, 1]. To integrate over [-inf, inf], we 
    first convert y (the integral in Gaussian quadrature) to new_y.
    The privacy loss R.V. log(p/q) = -log(gamma * e^(2 * new_y - 1)/2 * sigma**2 + 1 -gamma)
    """
    def qua(y):
        new_y = y * 1.0 / (1 - y**2)
        phi_result = -1.0 * utils.stable_logsumexp_two(
            np.log(gamma) + (2 * new_y - 1) /
            (2 * sigma**2), np.log(1 - gamma))
        phi_result = np.exp(phi_result * 1.0j * t)
        inte_function = phi_result * np.exp(-new_y**2 / (2 * sigma**2))
        return inte_function

    # inte_f implements the integraion over an infinite intervals.
    # int_-infty^infty f(x)dx = int_-1^1 f(y/1-y^2) * (1 + y**2) / ((1 - y ** 2) ** 2).
    inte_f = lambda y: qua(y) * (1 + y**2) / ((1 - y**2)**2)

    if not phi_max and not phi_min:
        # Double quadrature: res computes the phi-function using Gaussian quadrature.
        res = integrate.quadrature(inte_f,
                                   -1.0,
                                   1.0,
                                   tol=1e-15,
                                   rtol=1e-15,
                                   maxiter=100)
        result = np.log(res[0]) - np.log(np.sqrt(2 * np.pi) * sigma)
        return result

    dx = 2.0 * L / N  # discretisation interval \Delta x
    y = np.linspace(-L, L - dx, N, dtype=np.complex128)
    stable = -1.0 * utils.stable_logsumexp_two(
        np.log(gamma) + (2 * y - 1) / (2 * sigma**2), np.log(1 - gamma))
    if phi_min:
        # return the left riemann stable
        stable = [
            min(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable))
        ]
    elif phi_max:
        stable = [
            max(stable[max(i - 1, 0)], stable[i]) for i in range(len(stable))
        ]
    exp_term = 1.0j * t * stable
    result = utils.stable_logsumexp(exp_term - y**2 / (2 * sigma**2)) - np.log(
        np.sqrt(2 * np.pi) * sigma)
    new_result = result + np.log(dx)
    return new_result
Exemplo n.º 23
0
    def compose_poisson_subsampled_mechanisms(self, func, prob, coeff=1.0):
        # This function implements the lower bound for subsampled RDP.
        # It is also the exact formula of poission_subsampled RDP for many mechanisms including Gaussian mech.
        #
        # At the moment, we do not support mixing poisson subsampling and standard subsampling.
        # TODO: modify the caching identifies so that we can distinguish different types of subsampling
        #
        self.flag = False
        self.flag_subsample = True
        if (func, prob) in self.idxhash:
            idx = self.idxhash[(func, prob)] # TODO: this is really where it needs to be changed.
            # update the coefficients of each function
            self.coeffs[idx] += coeff
            # also update the integer CGFs
            self.RDPs_int += self.cache[(func, prob)] * coeff
        else: # compute an easy to compute upper bound of it.

            def cgf(x):
                return x * func(x+1)

            def subsample_func_int(x):
                # This function evaluates teh CGF at alpha = x, i.e., lamb =  x- 1

                if np.isinf(func(x)):
                    return np.inf

                mm = int(x)
                #
                fastbound = fast_poission_subsampled_cgf_upperbound(func, mm, prob)

                if x <= self.alphas[-1]: # compute the bound exactly.
                    moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob)
                               + self.logBinomC[mm, j] for j in range(2,mm+1,1)]

                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)]+moments)
                elif mm <= self.m_lin_max:
                    moments = [cgf(j-1) +j*np.log(prob) + (mm-j) * np.log(1-prob)
                               + utils.logcomb(mm,j) for j in range(2,mm+1,1)]
                    return utils.stable_logsumexp([(mm-1)*np.log(1-prob)+np.log(1+(mm-1)*prob)] + moments)
                else:
                    return fastbound

            def subsample_func(x): # linear interpolation upper bound
                # This function implements the RDP at alpha = x

                if np.isinf(func(x)):
                    return np.inf
                if prob == 1.0:
                    return func(x)

                epsinf, tmp = subsample_epsdelta(func(np.inf),0,prob)

                if np.isinf(x):
                    return epsinf
                if (x >= 1.0) and (x <= 2.0):
                    return np.minimum(epsinf, subsample_func_int(2.0) / (2.0-1))
                if np.equal(np.mod(x, 1), 0):
                    return np.minimum(epsinf, subsample_func_int(x) / (x-1) )
                xc = math.ceil(x)
                xf = math.floor(x)
                return np.minimum(
                    epsinf,
                    ((x-xf)*subsample_func_int(xc) + (1-(x-xf))*subsample_func_int(xf)) / (x-1)
                )

            # book keeping
            self.idxhash[(func, prob)] = self.n # save the index
            self.n += 1 # increment the number of unique mechanisms
            self.coeffs.append(coeff) # Update the coefficient
            self.RDPs.append(subsample_func) # update the analytical functions

            # also update the integer results, with a vectorized computation.
            # TODO: pre-computing subsampled RDP for integers is error-prone (implement the same thing twice)
            # TODO: and its benefits are not clear. We should consider removing it and simply call the lambda function.
            #
            if (func,prob) in self.cache:
                results = self.cache[(func,prob)]
            else:
                results = np.zeros_like(self.RDPs_int, float)
                mm = np.max(self.alphas)  # evaluate the RDP up to order mm
                jvec = np.arange(2, mm + 1)
                logterm3plus = np.zeros_like(results)  # This saves everything from j=2 to j = m+1
                for j in jvec:
                    logterm3plus[j-2] = cgf(j-1) + j * np.log(prob)  #- np.log(1-prob))

                for alpha in range(2, mm+1):
                    if np.isinf(logterm3plus[alpha-1]):
                        results[alpha-1] = np.inf
                    else:
                        tmp = utils.stable_logsumexp(logterm3plus[0:alpha-1] + self.logBinomC[alpha , 2:(alpha + 1)]
                                               + (alpha+1-jvec[0:alpha-1])*np.log(1-prob))
                        results[alpha-1] = utils.stable_logsumexp_two((alpha-1)*np.log(1-prob)
                                                                    + np.log(1+(alpha-1)*prob), tmp) / (1.0*alpha-1)

                results[0] = results[1]  # Provide the trivial upper bound of RDP at alpha = 1 --- the KL privacy.
                self.cache[(func,prob)] = results # save in cache
            self.RDPs_int += results * coeff
        # update the pure DP tracker
        eps, delta = subsample_epsdelta(func(np.inf), 0, prob)
        self.RDP_inf += eps * coeff
Exemplo n.º 24
0
def fast_subsampled_cgf_upperbound(func, mm, prob, deltas_local):
    # evaulate the fast CGF bound for the subsampled mechanism
    # func evaluates the RDP of the base mechanism
    # mm is alpha.  NOT lambda.
    return np.inf

    if np.isinf(func(mm)):
        return np.inf
    if mm == 1:
        return 0
    secondterm = np.minimum(np.minimum((2) * np.log(np.exp(func(np.inf)) - 1)
                                       + np.minimum(func(2), np.log(4)),
                                       np.log(2) + func(2)),
                            np.log(4) + 0.5 * deltas_local[int(2 * np.floor(2 / 2.0)) - 1]
                            + 0.5 * deltas_local[int(2 * np.ceil(2 / 2.0)) - 1]
                            ) + 2 * np.log(prob) + np.log(mm) + np.log(mm - 1) - np.log(2)

    if mm == 2:
        return utils.stable_logsumexp([0, secondterm])

    # approximate the remaining terms using a geometric series
    logratio1 = np.log(prob) + np.log(mm) + func(mm)
    logratio2 = logratio1 + np.log(np.exp(func(np.inf)) - 1)
    logratio = np.minimum(logratio1, logratio2)
    if logratio1 > logratio2:
        coeff = 1
    else:
        coeff = 2


    if mm == 3:
        return utils.stable_logsumexp([0, secondterm, np.log(coeff) + 3 * logratio])

    # Calculate the sum of the geometric series starting from the third term. This is a total of mm-2 terms.
    if logratio < 0:
        geometric_series_bound = np.log(coeff) + 3 * logratio - np.log(1 - np.exp(logratio)) \
                                 + np.log(1 - np.exp((mm - 2) * logratio))
    elif logratio > 0:
        geometric_series_bound = np.log(coeff) + 3 * logratio + (mm-2) * logratio - np.log(np.exp(logratio) - 1)
    else:
        geometric_series_bound = np.log(coeff) + np.log(mm - 2)

    # we will approximate using (1+h)^mm
    logh1 = np.log(prob) + func(mm - 1)

    logh2 = logh1 + np.log(np.exp(func(np.inf)) - 1)

    binomial_series_bound1 = np.log(2) + mm * utils.stable_logsumexp_two(0, logh1)
    binomial_series_bound2 = mm * utils.stable_logsumexp_two(0, logh2)

    tmpsign, binomial_series_bound1 \
        = utils.stable_sum_signed(True, binomial_series_bound1, False, np.log(2)
                                  + utils.stable_logsumexp([0, logh1 + np.log(mm), 2 * logh1 + np.log(mm)
                                                            + np.log(mm - 1) - np.log(2)]))
    tmpsign, binomial_series_bound2 \
        = utils.stable_sum_signed(True, binomial_series_bound2, False,
                                  utils.stable_logsumexp([0, logh2 + np.log(mm), 2 * logh2 + np.log(mm)
                                                          + np.log(mm - 1) - np.log(2)]))

    remainder = np.min([geometric_series_bound, binomial_series_bound1, binomial_series_bound2])

    return utils.stable_logsumexp([0, secondterm, remainder])
Exemplo n.º 25
0
def RDP_depend_pate_gaussian(params, alpha):
    """
    Return the data-dependent RDP of GNMAX (proposed in PATE2)
    Bounds RDP from above of GNMax given an upper bound on q (Theorem 6).

    Args:
      logq: Natural logarithm of the probability of a non-argmax outcome.
      sigma: Standard deviation of Gaussian noise.
      orders: An array_like list of Renyi orders.

    Returns:
      Upper bound on RPD for all orders. A scalar if orders is a scalar.

    Raises:
      ValueError: If the input is malformed.
    """
    logq = params['logq']
    sigma = params['sigma']

    if alpha == 1:
        p = np.exp(logq)
        w = (2 * p - 1) * (logq - _log1mexp(logq))
        return w
    if logq > 0 or sigma < 0 or np.any(alpha < 1):  # not defined for alpha=1
        raise ValueError("Inputs are malformed.")

    if np.isneginf(logq):  # If the mechanism's output is fixed, it has 0-DP.
        print('isneginf', logq)
        if np.isscalar(alpha):
            return 0.
        else:
            return np.full_like(alpha, 0., dtype=np.float)

    variance = sigma**2

    # Use two different higher orders: mu_hi1 and mu_hi2 computed according to
    # Proposition 10.
    mu_hi2 = math.sqrt(variance * -logq)
    mu_hi1 = mu_hi2 + 1

    orders_vec = np.atleast_1d(alpha)

    ret = orders_vec / variance  # baseline: data-independent bound

    # Filter out entries where data-dependent bound does not apply.
    mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1)

    rdp_hi1 = mu_hi1 / variance
    rdp_hi2 = mu_hi2 / variance

    log_a2 = (mu_hi2 - 1) * rdp_hi2

    # Make sure q is in the increasing wrt q range and A is positive.
    if (np.any(mask) and logq <= log_a2 - mu_hi2 *
        (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1)))
            and -logq > rdp_hi2):
        # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0.
        log1q = _log1mexp(logq)  # log1q = log(1-q)
        log_a = (alpha - 1) * (log1q - _log1mexp(
            (logq + rdp_hi2) * (1 - 1 / mu_hi2)))
        log_b = (alpha - 1) * (rdp_hi1 - logq / (mu_hi1 - 1))

        # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y.
        log_s1 = utils.stable_logsumexp_two(log1q + log_a, logq + log_b)
        log_s = np.logaddexp(log1q + log_a, logq + log_b)
        ret[mask] = np.minimum(ret, log_s / (alpha - 1))[mask]
    # print('alpha ={} mask {}'.format(alpha,ret))
    if ret[mask] < 0:
        print('negative ret', ret)
        print('log_s1 ={} log_s = {}'.format(log_s1, log_s))
        print('alpha = {} mu_hi1 ={}'.format(alpha, mu_hi1))
        print('log1q = {} log_a = {} log_b={} log_s = {}'.format(
            log1q, log_a, log_b, log_s))
        ret[mask] = 1. / (sigma**2) * alpha
        # print('replace ret with', ret)
    assert np.all(ret >= 0)

    if np.isscalar(alpha):
        return np.asscalar(ret)
    else:
        return ret