示例#1
0
def clopper_pearson_binomial(passed, total, sigma=1, CL=None):
    """
    Estimate the exact binomial from the clopper-pearson method
    - paramters -
    passed: int or array
        counts of passed elements
    total: int or array
        total elements
    sigma: float [default: 1]
        to estimate the CL automatically from the normal distribution at <sigma> sigmas
    CL: None or float [default: None]
        to specify a confidence level for the clopper-pearson. If None, it will be automatically estimated by <sigma>

    - return -
    eff: float or array:
        efficiency (<passed>/<total>). If <passed> and <total> are arrays, <eff> is an array
    uncertainties: 1d or 2d array
        array of the uncentainties: <uncertainties>[0] is the lower boundary, <uncertainties>[1] the upper one.
        If <passed> and <total> are arrays, <uncertainties>[0] and <uncertainties>[1] are arrays
    """
    eff, notpassed = np.float_(passed)/total, total-passed
    if CL is None:
        ybeta_low, ybeta_up = 1-norm.cdf(sigma,0,1), norm.cdf(sigma,0,1)
    else:
        ybeta_low, ybeta_up = (1-CL)/2, (1+CL)/2
    el, eu = eff-betaincinv(passed,notpassed+1,ybeta_low),betaincinv(passed+1,notpassed,ybeta_up)-eff
    if isiterable(el):
        el[np.isnan(el)] = eu[np.isnan(eu)] = 0.
    else:
        if np.isnan(el):
            el = 0.
        if np.isnan(eu):
            eu = 0.
    return eff, np.atleast_2d([el,eu])
示例#2
0
 def logistic_fidelity(self):
     #group data and assign state labels
     gnd_features = np.hstack([np.real(self.ground_data.T),
                             np.imag(self.ground_data.T)])
     ex_features = np.hstack([np.real(self.excited_data.T),
                             np.imag(self.excited_data.T)])
     #liblinear wants arrays in C order
     features = np.ascontiguousarray(np.vstack([gnd_features, ex_features]))
     state = np.ascontiguousarray(np.hstack([np.zeros(self.ground_data.shape[1]),
                                             np.ones(self.excited_data.shape[1])]))
     #Set up logistic regression with cross-validation using liblinear.
     #Cs sets the inverse of the regularization strength, which will be optimized
     #through cross-validation. Uses the default Stratified K-Folds
     #CV generator, with 3 folds.
     #This is set up to be as consistent with the MATLAB implementation
     #as I can make it. --GJR
     Cs = np.logspace(-1,2,5)
     logreg = LogisticRegressionCV(Cs, cv=3, solver='liblinear')
     logreg.fit(features, state) #fit the model
     predictions = logreg.predict(features) #in-place classification
     score = logreg.score(features,state) #mean accuracy of classification
     N = len(predictions)
     S = np.sum(predictions == state) #how many we got right
     #now calculate confidence intervals
     c = 0.95
     flo = betaincinv(S+1, N-S+1, (1-c)/2., )
     fhi = betaincinv(S+1, N-S+1, (1+c)/2., )
     logger.info(("In-place logistic regression fidelity: " +
             "{:.2f}% ({:.2f}, {:.2f})".format(100*score, 100*flo, 100*fhi)))
示例#3
0
    def _get_confidence_int(self, Y):
        beta = 5.0

        Y = np.array(Y)
        Z = (1 - Y)
        W = Y * beta

        alpha = W / Z

        L = sc.betaincinv(alpha, beta, .075)
        U = sc.betaincinv(alpha, beta, .925)

        index = Y > .9
        L[index] = .95 * Y[index]
        U[index] = 1.05 * Y[index]

        L[Y < 0.00009] = 0.00009

        index = U < Y
        U[index] = 1.05 * Y[index]

        index = Y < L
        L[index] = 0.95 * Y[index]

        return L, U
示例#4
0
文件: SSRO.py 项目: ahelsing/PyQLab
def credible_interval(outcomes, c=0.95):
	"""
	Calculate the credible interval for a fidelity estimate.
	"""
	from scipy.special import betaincinv
	N = outcomes.size
	S = np.count_nonzero(outcomes)
	xlo = betaincinv(S+1,N-S+1,(1-c)/2.)
	xup = betaincinv(S+1,N-S+1,(1+c)/2.)

	return xlo, xup
示例#5
0
def credible_interval(outcomes, c=0.95):
    """
	Calculate the credible interval for a fidelity estimate.
	"""
    from scipy.special import betaincinv
    N = outcomes.size
    S = np.count_nonzero(outcomes)
    xlo = betaincinv(S + 1, N - S + 1, (1 - c) / 2.)
    xup = betaincinv(S + 1, N - S + 1, (1 + c) / 2.)

    return xlo, xup
示例#6
0
def betacred(k, n, j, p):
    """
    Returns the upper and lower bounds of the credible interval based on a beta
    distribution.
    """
    r = (1.0 - p / 100) / 2
    a = 1
    if j:
        a = 0.5
    l = betaincinv(k + a, n - k + a, r)
    u = betaincinv(k + a, n - k + a, 1.0 - r)
    return (l, u)
示例#7
0
def _rank_confidence_band(nranks):
    alpha = 0.01
    n = nranks

    k0 = np.arange(1, n + 1)
    k1 = np.flipud(k0).copy()

    top = betaincinv(k0, k1, 1 - alpha)
    mean = k0 / (n + 1)
    bottom = betaincinv(k0, k1, alpha)

    return (bottom, mean, top)
示例#8
0
文件: math.py 项目: dooglus/p2pool
 def binomial_conf_interval(x, n, conf=0.95):
     if n == 0:
         left = random.random()*(1 - conf)
         return left, left + conf
     b = special.beta(x+1, n-x+1)
     def f(left_a):
         left, right = max(1e-8, special.betaincinv(x+1, n-x+1, left_a)), min(1-1e-8, special.betaincinv(x+1, n-x+1, left_a + conf))
         top = right**(x+1) * (1-right)**(n-x+1) * left*(1-left) - left**(x+1) * (1-left)**(n-x+1) * right * (1-right)
         bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right)
         return top/bottom/b
     left_a = find_root(f, (1-conf)/2, bounds=(0, 1-conf))
     return special.betaincinv(x+1, n-x+1, left_a), special.betaincinv(x+1, n-x+1, left_a + conf)
示例#9
0
文件: math.py 项目: boinggg/p2pool
 def binomial_conf_interval(x, n, conf=0.95):
     assert 0 <= x <= n and 0 <= conf < 1
     if n == 0:
         left = random.random()*(1 - conf)
         return left, left + conf
     bl = float(special.betaln(x+1, n-x+1))
     def f(left_a):
         left, right = max(1e-8, float(special.betaincinv(x+1, n-x+1, left_a))), min(1-1e-8, float(special.betaincinv(x+1, n-x+1, left_a + conf)))
         top = math.exp(math.log(right)*(x+1) + math.log(1-right)*(n-x+1) + math.log(left) + math.log(1-left) - bl) - math.exp(math.log(left)*(x+1) + math.log(1-left)*(n-x+1) + math.log(right) + math.log(1-right) - bl)
         bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right)
         return top/bottom
     left_a = find_root(f, (1-conf)/2, bounds=(0, 1-conf))
     return float(special.betaincinv(x+1, n-x+1, left_a)), float(special.betaincinv(x+1, n-x+1, left_a + conf))
示例#10
0
def mPERT_sample(mu, a=0.0, b=1.0, gamma=4.0, var=None):
    """Provide a vectorized Modified PERT distribution.

    Parameters
    ----------
    mu : float or ndarray
        Mean value for the PERT distribution.
    a : float or ndarray
        Lower bound for the distribution.
    b : float or ndarray
        Upper bound for the distribution.
    gamma : float or ndarray
        Shape paramter.
    var : float, ndarray or None
        Variance of the distribution. If var != None,
        gamma will be calcuated to meet the desired variance.

    Returns
    -------
    out : float or ndarray
        Samples drawn from the specified mPERT distribution.
        Shape is the broadcasted shape of the the input parameters.

    """
    mu, a, b = np.atleast_1d(mu, a, b)
    if var is not None:
        gamma = (mu - a) * (b - mu) / var - 3.0
    alp1 = 1.0 + gamma * ((mu - a) / (b - a))
    alp2 = 1.0 + gamma * ((b - mu) / (b - a))
    u = np.random.random_sample(mu.shape)
    alp3 = sc.betaincinv(alp1, alp2, u)
    return (b - a) * alp3 + a
    def generate_thresholds_SMV(self):
        # this function generate thresholds for SMV scenario
        nsamples=self.nsamples;
        nfeatures=self.nfeatures;
        kmax=self.kmax;
        alpha_list=self.alpha_list;
        threshold_dict={}; # place to save the set of thresholds used for residual ratio thresholding
        for alpha in alpha_list:
            # we compare the residual ratios with a sequence of threshold defined using the given value of alpha.
            # however, if that thresholding scheme fails which happens only at low signal to noise ratio,
            # we gradually increase the value of alpha until we get a succesful thresholding. alphas_to_use is this set of thresholds
            # gradually increasing.

            alphas_to_use = 10 ** (np.linspace(np.log10(alpha), np.log10(nfeatures* kmax), 100));
            threshold_alpha = {};
            threshold_alpha['when_rrt_fails'] = []
            for alpha_t in alphas_to_use:
                thres = np.zeros(kmax);
                for k in np.arange(kmax):
                    j = k + 1;
                    a = (nsamples - j)/ 2;
                    b = 1/ 2;
                    npossibilities = nfeatures- j + 1
                    val = alpha_t / (npossibilities * kmax)
                    thres[k] = np.sqrt(special.betaincinv(a, b, val))
                if alpha_t == alpha:
                    threshold_alpha['direct'] = thres;
                else:
                    threshold_alpha['when_rrt_fails'].append(thres)
            threshold_alpha['alphas_to_use'] = alphas_to_use
            threshold_dict[alpha] = threshold_alpha
        self.threshold_dict = threshold_dict
        return None
示例#12
0
def inverse_binom_cdf_prob(k, N, F):
    """Calculate the trial probability that gives the CDF.

    This gets the trial probability that gives an overall cumulative
    probability for Pr(X <= k; N, p) = F

    Parameters
    ----------
    k : int
        Maximum number of successes.
    N : int
        Total number of trials.
    F : float
        The cumulative probability for (k, N).

    Returns
    -------
    p : float
        The trial probability.
    """
    # This uses the result that we can write the cumulative probability of a
    # binomial in terms of an incomplete beta function

    import scipy.special as sp

    return sp.betaincinv(k + 1, N - k, 1 - F)
示例#13
0
    def qf(self, p, alpha, beta):
        r"""

        Quantile function for the Beta Distribution:

        Parameters
        ----------

        p : numpy array or scalar
            The percentiles at which the quantile will be calculated
        alpha : numpy array or scalar
            One shape parameter for the Beta distribution
        beta : numpy array or scalar
            Another scale parameter for the Beta distribution

        Returns
        -------

        q : scalar or numpy array
            The quantiles for the Beta distribution at each value p.

        Examples
        --------
        >>> import numpy as np
        >>> from surpyval import Beta
        >>> p = np.array([.1, .2, .3, .4, .5])
        >>> Beta.qf(p, 3, 4)
        array([0.20090888, 0.26864915, 0.32332388, 0.37307973, 0.42140719])
        """
        return betaincinv(alpha, beta, p)
    def generate_thresholds_robust_regression(self):
        # this function generate thresholds for robust regression
        nsamples=self.nsamples;
        nfeatures=self.nfeatures;
        kmax=self.kmax;
        alpha_list=self.alpha_list;
        if nfeatures>nsamples:
            raise Exception('Must satisfy nfeatures<nsamples. This technique is for low dimensional dense regression with sparse outliers. High dimensional regression with sparse outliers can be posed as a compressive sensing problem')

        threshold_dict={}; # place to save the set of thresholds used for residual ratio thresholding
        for alpha in alpha_list:
            # we compare the residual ratios with a sequence of threshold defined using the given value of alpha. 
            # however, if that thresholding scheme fails which happens only at low signal to noise ratio,
            # we gradually increase the value of alpha until we get a succesful thresholding. alphas_to_use is this set of thresholds
            # gradually increasing.
            alphas_to_use=10**(np.linspace(np.log10(alpha),np.log10(nsamples*kmax),100));
            threshold_alpha={}; threshold_alpha['when_rrt_fails']=[]
            for alpha_t in alphas_to_use:
                thres=np.zeros(kmax);
                for k in np.arange(kmax):
                    # definition of RRT thresholds. 
                    j=k+1+nfeatures;a=(nsamples-j)/2;b=1/2
                    npossibilities=(nsamples-j+1)
                    val=alpha_t/(npossibilities*kmax)
                    thres[k]=np.sqrt(special.betaincinv(a,b,val))
                if alpha_t==alpha:
                    threshold_alpha['direct']=thres; # save the threshold related to given alpha seperately.
                else:
                    threshold_alpha['when_rrt_fails'].append(thres) # save the thresholds to be used when RRT fails 
            threshold_alpha['alphas_to_use']=alphas_to_use
            threshold_dict[alpha]=threshold_alpha
        self.threshold_dict=threshold_dict
        return None
示例#15
0
    def Percentile(self, ps):
        """Returns the given percentiles from this distribution.

        ps: scalar, array, or list of [0-100]
        """
        ps = np.asarray(ps) / 100
        xs = special.betaincinv(self.alpha, self.beta, ps)
        return xs
示例#16
0
def mPERT_sample(mu, a=0.0, b=1.0, gamma=4.0, var=None):
    mu, a, b = np.atleast_1d(mu, a, b)
    if var is not None:
        gamma = (mu - a) * (b - mu) / var - 3.0
    alp1 = 1.0 + gamma * ((mu - a) / (b - a))
    alp2 = 1.0 + gamma * ((b - mu) / (b - a))
    u = np.random.random_sample(mu.shape)
    alp3 = sc.betaincinv(alp1, alp2, u)
    return (b - a) * alp3 + a
示例#17
0
def get_ortho_haar_theta(
    units: int, num_layers: int, hadamard: bool
) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[tf.Variable, tf.Variable],
           tf.Variable]:
    alpha_checkerboard = get_alpha_checkerboard_general(units, num_layers)
    theta_0_root = alpha_checkerboard.T[::2, ::2] - 1
    theta_1_root = alpha_checkerboard.T[1::2, 1::2] - 1
    theta_0_init = 2 * np.arcsin(
        betaincinv(0.5 * theta_0_root, 0.5,
                   np.random.rand(*theta_0_root.shape)))
    theta_1_init = 2 * np.arcsin(
        betaincinv(0.5 * theta_1_root, 0.5,
                   np.random.rand(*theta_1_root.shape)))
    if not hadamard:
        theta_0_init = np.pi - theta_0_init
        theta_1_init = np.pi - theta_1_init
    return theta_0_init.astype(dtype=NP_FLOAT), theta_1_init.astype(
        dtype=NP_FLOAT)
示例#18
0
def _rank_confidence_band(nranks, significance_level, ok):
    from numpy import arange, flipud, ascontiguousarray
    from scipy.special import betaincinv

    alpha = significance_level

    k0 = arange(1, nranks + 1)
    k1 = flipud(k0).copy()

    k0 = ascontiguousarray(k0[ok])
    k1 = ascontiguousarray(k1[ok])

    my_ok = k1 / k0 / (k1[0] / k0[0]) > 1e-4
    k0 = ascontiguousarray(k0[my_ok])
    k1 = ascontiguousarray(k1[my_ok])

    top = betaincinv(k0, k1, 1 - alpha)
    bottom = betaincinv(k0, k1, alpha)

    return (my_ok, bottom, top)
def main():
    VRP_cost = 0
    VRP_Route = []
    for k in range(nber_of_vehicles):
        n = len(Repartition[1][k])
        res = str(Repartition[1][k])[1:-1]
        Intres = [int(u) for u in res if u.isdigit()]
        Intres.insert(0, depot)
        ResCity = [cityList[int(u)] for u in res if u.isdigit()]
        ResCity.insert(0, cityList[depot])
        for k in range(3):
            bestRoute = geneticAlgorithmPlot(population=ResCity,
                                             popSize=100,
                                             eliteSize=20,
                                             mutationRate=0.01,
                                             generations=500)
            bestRouteList = []
            sX = []
            sY = []
            IndexRoute = []
            for j in range(len(bestRoute)):
                bestRouteList.append((bestRoute[j].x, bestRoute[j].y))
                sX.append(bestRoute[j].x)
                sY.append(bestRoute[j].y)
                IndexRoute.append(key_list[val_list.index(bestRouteList[j])])
                sX.append(bestRoute[0].x)
                sY.append(bestRoute[0].y)
                #plotPath(sX, sY)
                #print(IndexRoute)
                crowd.append(IndexRoute)
                #print(agg_matrix(crowd))
            agg = agg_matrix(crowd)
            Inv_Agg = np.zeros((n, n))

            for k in range(n):
                for j in range(n):
                    Inv_Agg[k, j] = 1 - sc.betaincinv(2.8, 3.2, agg[k, j] / n)

            r = range(n)
            dist = {(i, j): Inv_Agg[i, j] for i in r for j in r}
            aggRoute = tsp.tsp(r, dist)[1]
            sortedaggRoute = [Intres[i] for i in aggRoute]
            #import pdb; pdb.set_trace()
            cost = 0
            for u in range(n - 1):
                cost += City.distance(ResCity[u], ResCity[u + 1])
            cost += City.distance(ResCity[n - 1], ResCity[0])
            #import pdb; pdb.set_trace()
        VRP_cost += cost
        VRP_Route.append(sortedaggRoute)
        print(VRP_Route, VRP_cost)
    '''
示例#20
0
def predictRecallMedian(prior, tnow, percentile=0.5):
    """Median (or percentile) of the immediate recall probability.

  Same arguments as `ebisu.predictRecall`, see that docstring for details.

  An extra keyword argument, `percentile`, is a float between 0 and 1, and
  specifies the percentile rather than 50% (median).
  """
    # [1] `Integrate[p**((a-t)/t) * (1-p**(1/t))**(b-1) / t / Beta[a,b], p]`
    # and see "Alternate form assuming a, b, p, and t are positive".
    from scipy.special import betaincinv
    alpha, beta, t = prior
    dt = tnow / t
    return betaincinv(alpha, beta, percentile)**dt
示例#21
0
def binom_conf_interval(k, n, conf=0.68269):
    """Binomial proportion confidence interval given k successes,
    n trials, adopting Bayesian approach with Jeffreys prior."""

    if conf < 0.0 or conf > 1.0:
        raise ValueError("conf must be between 0. and 1.")
    alpha = 1.0 - conf

    k = np.asarray(k).astype(int)
    n = np.asarray(n).astype(int)

    if (n <= 0).any():
        log.warning("%(funcName)s: n must be positive")
        return 0, 0
    if (k < 0).any() or (k > n).any():
        log.warning("%(funcName)s: k must be in {0, 1, .., n}")
        return 0, 0

    lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha)
    upperbound = betaincinv(k + 0.5, n - k + 0.5, 1.0 - 0.5 * alpha)

    # Set lower or upper bound to k/n when k/n = 0 or 1
    # We have to treat the special case of k/n being scalars,
    # which is an ugly kludge
    if lowerbound.ndim == 0:
        if k == 0:
            lowerbound = 0.0
        elif k == n:
            upperbound = 1.0
    else:
        lowerbound[k == 0] = 0
        upperbound[k == n] = 1

    conf_interval = np.array([lowerbound, upperbound])

    return conf_interval
示例#22
0
 def vangel_approx(self, n=None, i=None, j=None, p=None, g=None):
     if n is None:
         n = self.n
     if i is None:
         i = 1
     if j is None:
         j = self.j + 1
     if p is None:
         p = self.p
     if g is None:
         g = self.g
     betatmp = betainc(j, n - j + 1, p)
     a = g - betatmp
     b = 1.0 - betatmp
     q = betaincinv(i, j - i, a / b)
     return np.log(((p) * (n + 1)) / j) / np.log(q)
示例#23
0
文件: dists.py 项目: thingimon/nengo
    def ppf(self, y):
        """Percent point function (inverse cumulative distribution).

        Requires Scipy.

        Parameters
        ----------
        y : ndarray
            Cumulative probabilities in [0, 1].

        Returns
        -------
        ndarray
            Evaluation points `x` in [0, 1] such that `P(X <= x) = y`.
        """
        from scipy.special import betaincinv
        sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y)
        return np.sqrt(sq_x)
示例#24
0
文件: dists.py 项目: sir-sim/sirsim
    def ppf(self, y):
        """Percent point function (inverse cumulative distribution).

        .. note:: Requires SciPy.

        Parameters
        ----------
        y : array_like
            Cumulative probabilities in [0, 1].

        Returns
        -------
        ppf : array_like
            Evaluation points ``x`` in [0, 1] such that ``P(X <= x) = y``.
        """
        from scipy.special import betaincinv
        sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y)
        return np.sqrt(sq_x)
def main():
    for k in range(2):
        bestRoute = geneticAlgorithmPlot(population=cityList,
                                         popSize=100,
                                         eliteSize=20,
                                         mutationRate=0.01,
                                         generations=500)
        bestRouteList = []
        sX = []
        sY = []
        IndexRoute = []
        for j in range(len(bestRoute)):
            bestRouteList.append((bestRoute[j].x, bestRoute[j].y))
            sX.append(bestRoute[j].x)
            sY.append(bestRoute[j].y)
            #import pdb; pdb.set_trace()
            IndexRoute.append(key_list[val_list.index(bestRouteList[j])])
        sX.append(bestRoute[0].x)
        sY.append(bestRoute[0].y)
        #plotPath(sX, sY)
        #print(IndexRoute)
        crowd.append(IndexRoute)
    #print(agg_matrix(crowd))
    #import pdb; pdb.set_trace()
    agg = agg_matrix(crowd)
    Inv_Agg = np.zeros((n, n))
    for k in range(n):
        for j in range(n):
            Inv_Agg[k, j] = 1 - sc.betaincinv(2.8, 3.2, agg[k, j] / n)

    r = range(n)
    dist = {(i, j): Inv_Agg[i, j] for i in r for j in r}
    aggRoute = tsp.tsp(r, dist)[1]
    cost = 0
    for u in range(n - 1):
        cost += R_D[aggRoute[u], aggRoute[u + 1]]
    cost += R_D[n - 1, 0]
    print(aggRoute, cost)
    '''
示例#26
0
文件: math.py 项目: dooglus/p2pool
 def f(left_a):
     left, right = max(1e-8, special.betaincinv(x+1, n-x+1, left_a)), min(1-1e-8, special.betaincinv(x+1, n-x+1, left_a + conf))
     top = right**(x+1) * (1-right)**(n-x+1) * left*(1-left) - left**(x+1) * (1-left)**(n-x+1) * right * (1-right)
     bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right)
     return top/bottom/b
示例#27
0
 def ppf(self, y):
     from scipy.special import betaincinv
     y_reflect = np.where(y < 0.5, y, 1 - y)
     z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect)
     x = np.arcsin(np.sqrt(z_sq)) / np.pi
     return np.where(y < 0.5, x, 1 - x)
示例#28
0
 def get_invBeta(self) -> None:
     self.invBeta = sc.betaincinv(0.5 * self.nu, 0.5,
                                  1 - self.confidence_level)
示例#29
0
def binom_conf_interval(k, n, conf=0.68269, interval='wilson'):
    r"""Binomial proportion confidence interval given k successes,
    n trials.

    Parameters
    ----------
    k : int or numpy.ndarray
        Number of successes (0 <= ``k`` <= ``n``).
    n : int or numpy.ndarray
        Number of trials (``n`` > 0).  If both ``k`` and ``n`` are arrays,
        they must have the same shape.
    conf : float in [0, 1], optional
        Desired probability content of interval. Default is 0.68269,
        corresponding to 1 sigma in a 1-dimensional Gaussian distribution.
    interval : {'wilson', 'jeffreys', 'flat', 'wald'}, optional
        Formula used for confidence interval. See notes for details.  The
        ``'wilson'`` and ``'jeffreys'`` intervals generally give similar
        results, while 'flat' is somewhat different, especially for small
        values of ``n``.  ``'wilson'`` should be somewhat faster than
        ``'flat'`` or ``'jeffreys'``.  The 'wald' interval is generally not
        recommended.  It is provided for comparison purposes.  Default is
        ``'wilson'``.

    Returns
    -------
    conf_interval : numpy.ndarray
        ``conf_interval[0]`` and ``conf_interval[1]`` correspond to the lower
        and upper limits, respectively, for each element in ``k``, ``n``.

    Notes
    -----
    In situations where a probability of success is not known, it can
    be estimated from a number of trials (N) and number of
    observed successes (k). For example, this is done in Monte
    Carlo experiments designed to estimate a detection efficiency. It
    is simple to take the sample proportion of successes (k/N)
    as a reasonable best estimate of the true probability
    :math:`\epsilon`. However, deriving an accurate confidence
    interval on :math:`\epsilon` is non-trivial. There are several
    formulas for this interval (see [1]_). Four intervals are implemented
    here:

    **1. The Wilson Interval.** This interval, attributed to Wilson [2]_,
    is given by

    .. math::

        CI_{\rm Wilson} = \frac{k + \kappa^2/2}{N + \kappa^2}
        \pm \frac{\kappa n^{1/2}}{n + \kappa^2}
        ((\hat{\epsilon}(1 - \hat{\epsilon}) + \kappa^2/(4n))^{1/2}

    where :math:`\hat{\epsilon} = k / N` and :math:`\kappa` is the
    number of standard deviations corresponding to the desired
    confidence interval for a *normal* distribution (for example,
    1.0 for a confidence interval of 68.269%). For a
    confidence interval of 100(1 - :math:`\alpha`)%,

    .. math::

        \kappa = \Phi^{-1}(1-\alpha/2) = \sqrt{2}{\rm erf}^{-1}(1-\alpha).

    **2. The Jeffreys Interval.** This interval is derived by applying
    Bayes' theorem to the binomial distribution with the
    noninformative Jeffreys prior [3]_, [4]_. The noninformative Jeffreys
    prior is the Beta distribution, Beta(1/2, 1/2), which has the density
    function

    .. math::

        f(\epsilon) = \pi^{-1} \epsilon^{-1/2}(1-\epsilon)^{-1/2}.

    The justification for this prior is that it is invariant under
    reparameterizations of the binomial proportion.
    The posterior density function is also a Beta distribution: Beta(k
    + 1/2, N - k + 1/2). The interval is then chosen so that it is
    *equal-tailed*: Each tail (outside the interval) contains
    :math:`\alpha`/2 of the posterior probability, and the interval
    itself contains 1 - :math:`\alpha`. This interval must be
    calculated numerically. Additionally, when k = 0 the lower limit
    is set to 0 and when k = N the upper limit is set to 1, so that in
    these cases, there is only one tail containing :math:`\alpha`/2
    and the interval itself contains 1 - :math:`\alpha`/2 rather than
    the nominal 1 - :math:`\alpha`.

    **3. A Flat prior.** This is similar to the Jeffreys interval,
    but uses a flat (uniform) prior on the binomial proportion
    over the range 0 to 1 rather than the reparametrization-invariant
    Jeffreys prior.  The posterior density function is a Beta distribution:
    Beta(k + 1, N - k + 1).  The same comments about the nature of the
    interval (equal-tailed, etc.) also apply to this option.

    **4. The Wald Interval.** This interval is given by

    .. math::

       CI_{\rm Wald} = \hat{\epsilon} \pm
       \kappa \sqrt{\frac{\hat{\epsilon}(1-\hat{\epsilon})}{N}}

    The Wald interval gives acceptable results in some limiting
    cases. Particularly, when N is very large, and the true proportion
    :math:`\epsilon` is not "too close" to 0 or 1. However, as the
    later is not verifiable when trying to estimate :math:`\epsilon`,
    this is not very helpful. Its use is not recommended, but it is
    provided here for comparison purposes due to its prevalence in
    everyday practical statistics.

    References
    ----------
    .. [1] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001).
       "Interval Estimation for a Binomial Proportion". Statistical
       Science 16 (2): 101-133. doi:10.1214/ss/1009213286

    .. [2] Wilson, E. B. (1927). "Probable inference, the law of
       succession, and statistical inference". Journal of the American
       Statistical Association 22: 209-212.

    .. [3] Jeffreys, Harold (1946). "An Invariant Form for the Prior
       Probability in Estimation Problems". Proc. R. Soc. Lond.. A 24 186
       (1007): 453-461. doi:10.1098/rspa.1946.0056

    .. [4] Jeffreys, Harold (1998). Theory of Probability. Oxford
       University Press, 3rd edition. ISBN 978-0198503682

    Examples
    --------
    Integer inputs return an array with shape (2,):

    >>> binom_conf_interval(4, 5, interval='wilson')
    array([ 0.57921724,  0.92078259])

    Arrays of arbitrary dimension are supported. The Wilson and Jeffreys
    intervals give similar results, even for small k, N:

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wilson')
    array([[ 0.        ,  0.07921741,  0.21597328,  0.83333304],
           [ 0.16666696,  0.42078276,  0.61736012,  1.        ]])

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='jeffreys')
    array([[ 0.        ,  0.0842525 ,  0.21789949,  0.82788246],
           [ 0.17211754,  0.42218001,  0.61753691,  1.        ]])

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='flat')
    array([[ 0.        ,  0.12139799,  0.24309021,  0.73577037],
           [ 0.26422963,  0.45401727,  0.61535699,  1.        ]])

    In contrast, the Wald interval gives poor results for small k, N.
    For k = 0 or k = N, the interval always has zero length.

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald')
    array([[ 0.        ,  0.02111437,  0.18091075,  1.        ],
           [ 0.        ,  0.37888563,  0.61908925,  1.        ]])

    For confidence intervals approaching 1, the Wald interval for
    0 < k < N can give intervals that extend outside [0, 1]:

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald', conf=0.99)
    array([[ 0.        , -0.26077835, -0.16433593,  1.        ],
           [ 0.        ,  0.66077835,  0.96433593,  1.        ]])

    """

    if conf < 0. or conf > 1.:
        raise ValueError('conf must be between 0. and 1.')
    alpha = 1. - conf

    k = np.asarray(k).astype(np.int)
    n = np.asarray(n).astype(np.int)

    if (n <= 0).any():
        raise ValueError('n must be positive')
    if (k < 0).any() or (k > n).any():
        raise ValueError('k must be in {0, 1, .., n}')

    if interval == 'wilson' or interval == 'wald':
        from scipy.special import erfinv
        kappa = np.sqrt(2.) * min(erfinv(conf), 1.e10)  # Avoid overflows.
        k = k.astype(np.float)
        n = n.astype(np.float)
        p = k / n

        if interval == 'wilson':
            midpoint = (k + kappa**2 / 2.) / (n + kappa**2)
            halflength = (kappa * np.sqrt(n)) / (n + kappa ** 2) * \
                np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n))
            conf_interval = np.array(
                [midpoint - halflength, midpoint + halflength])

            # Correct intervals out of range due to floating point errors.
            conf_interval[conf_interval < 0.] = 0.
            conf_interval[conf_interval > 1.] = 1.
        else:
            midpoint = p
            halflength = kappa * np.sqrt(p * (1. - p) / n)
            conf_interval = np.array(
                [midpoint - halflength, midpoint + halflength])

    elif interval == 'jeffreys' or interval == 'flat':
        from scipy.special import betaincinv

        if interval == 'jeffreys':
            lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha)
            upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - 0.5 * alpha)
        else:
            lowerbound = betaincinv(k + 1, n - k + 1, 0.5 * alpha)
            upperbound = betaincinv(k + 1, n - k + 1, 1. - 0.5 * alpha)

        # Set lower or upper bound to k/n when k/n = 0 or 1
        #  We have to treat the special case of k/n being scalars,
        #  which is an ugly kludge
        if lowerbound.ndim == 0:
            if k == 0:
                lowerbound = 0.
            elif k == n:
                upperbound = 1.
        else:
            lowerbound[k == 0] = 0
            upperbound[k == n] = 1

        conf_interval = np.array([lowerbound, upperbound])
    else:
        raise ValueError('Unrecognized interval: {0:s}'.format(interval))

    return conf_interval
示例#30
0
 def ppf(self, y):
     """Evaluates the inverse CDF along the values ``x``."""
     y_reflect = np.where(y < 0.5, y, 1 - y)
     z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect)
     x = np.arcsin(np.sqrt(z_sq)) / np.pi
     return np.where(y < 0.5, x, 1 - x)
示例#31
0
 def get_quantile(self, q, X=None):
     a, b = self._get_alphabeta(X)
     return special.betaincinv(a, b, q)
示例#32
0
def bernoulli_trial_probability(m, n):
    c = 0.95
    x1 = betaincinv(m + 1, n - m + 1, (1 - c) / 2)
    x2 = betaincinv(m + 1, n - m + 1, (1 + c) / 2)
    return x1, x2
示例#33
0
 def ppf(self, y):
     y_reflect = np.where(y < 0.5, y, 1 - y)
     z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect)
     x = np.arcsin(np.sqrt(z_sq)) / np.pi
     return np.where(y < 0.5, x, 1 - x)
示例#34
0
 def median(aa, bb):
     if (aa <= 0 or bb <= 0):
         raise ValueError("aa and bb must be bigger than 0")
     return sp.betaincinv(aa, bb, 1 / 2)
示例#35
0
 def get_quantile(self, q, X=None):
     a, b = self._get_alphabeta(X)
     return special.betaincinv(a, b, q)
示例#36
0
                np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n))
            conf_interval = np.array([midpoint - halflength,                                      midpoint + halflength])

                        conf_interval[conf_interval < 0.] = 0.
            conf_interval[conf_interval > 1.] = 1.
            return conf_interval

        else:
            midpoint = p
            halflength = kappa * np.sqrt(p * (1. - p) / n)
            return np.array([midpoint - halflength, midpoint + halflength])

    elif interval == 'jeffreys':
        from scipy.special import betaincinv

        lowerbound = betaincinv(k + 0.5, n - k + 0.5, alpha / 2.)
        upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - alpha / 2.)
    
                lowerbound[k == 0] = 0.
        upperbound[k == n] = 1.

        return np.array([lowerbound, upperbound])
    
    else:
        raise ValueError('Unrecognized interval: {0:s}'.format(interval))
def binned_binom_proportion(x, success, bins=10, range=None, conf=0.68269,                            interval='wilson'):
    """Binomial proportion and confidence interval in bins of a continuous
    variable `x`.

    Given a set of datapoint pairs where the `x` values are
    continuously distributed and the `success` values are binomial
示例#37
0
subs1 = np.array([0, 1, 4, 5, 6, 9, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21])

nans1 = np.isnan(responses[subs1, :]).sum(axis=0)
p1 = np.nan_to_num(responses[subs1, :]).sum(axis=0)

#edhmm group
subs2 = np.array([2, 3, 7, 8, 10, 17])

nans2 = np.isnan(responses[subs2, :]).sum(axis=0)
p2 = np.nan_to_num(responses[subs2, :]).sum(axis=0)

trials = np.arange(126, 161, 1)

#compute 90% Jeffreys interval
n1 = len(subs1)
u1 = betaincinv(p1 + 1 / 2, n1 - nans1 - p1 + 1 / 2, .95)
l1 = betaincinv(p1 + 1 / 2, n1 - nans1 - p1 + 1 / 2, .05)
n2 = len(subs2)
u2 = betaincinv(p2 + 1 / 2, n2 - nans2 - p2 + 1 / 2, .95)
l2 = betaincinv(p2 + 1 / 2, n2 - nans2 - p2 + 1 / 2, .05)

#compute the mean response
m1 = p1 / (n1 - nans1 + 1)
m2 = p2 / (n2 - nans2 + 1)

#make the figure
fig, ax = plt.subplots(1, 2, figsize=(12, 5), sharex=True)

ax[0].plot(trials, m1, label='DU-RW group', color='r')
ax[0].fill_between(trials, u1, l1, color='r', alpha=.2)
ax[0].plot(trials, m2, color='b', label='ED-HMM group')
示例#38
0
def binom_conf_interval(k, n, conf=0.68269, interval='wilson'):
    r"""Binomial proportion confidence interval given k successes,
    n trials.

    Parameters
    ----------
    k : int or numpy.ndarray
        Number of successes (0 <= ``k`` <= ``n``).
    n : int or numpy.ndarray
        Number of trials (``n`` > 0).  If both ``k`` and ``n`` are arrays,
        they must have the same shape.
    conf : float in [0, 1], optional
        Desired probability content of interval. Default is 0.68269,
        corresponding to 1 sigma in a 1-dimensional Gaussian distribution.
    interval : {'wilson', 'jeffreys', 'flat', 'wald'}, optional
        Formula used for confidence interval. See notes for details.  The
        ``'wilson'`` and ``'jeffreys'`` intervals generally give similar
        results, while 'flat' is somewhat different, especially for small
        values of ``n``.  ``'wilson'`` should be somewhat faster than
        ``'flat'`` or ``'jeffreys'``.  The 'wald' interval is generally not
        recommended.  It is provided for comparison purposes.  Default is
        ``'wilson'``.

    Returns
    -------
    conf_interval : numpy.ndarray
        ``conf_interval[0]`` and ``conf_interval[1]`` correspond to the lower
        and upper limits, respectively, for each element in ``k``, ``n``.

    Notes
    -----
    In situations where a probability of success is not known, it can
    be estimated from a number of trials (N) and number of
    observed successes (k). For example, this is done in Monte
    Carlo experiments designed to estimate a detection efficiency. It
    is simple to take the sample proportion of successes (k/N)
    as a reasonable best estimate of the true probability
    :math:`\epsilon`. However, deriving an accurate confidence
    interval on :math:`\epsilon` is non-trivial. There are several
    formulas for this interval (see [1]_). Four intervals are implemented
    here:

    **1. The Wilson Interval.** This interval, attributed to Wilson [2]_,
    is given by

    .. math::

        CI_{\rm Wilson} = \frac{k + \kappa^2/2}{N + \kappa^2}
        \pm \frac{\kappa n^{1/2}}{n + \kappa^2}
        ((\hat{\epsilon}(1 - \hat{\epsilon}) + \kappa^2/(4n))^{1/2}

    where :math:`\hat{\epsilon} = k / N` and :math:`\kappa` is the
    number of standard deviations corresponding to the desired
    confidence interval for a *normal* distribution (for example,
    1.0 for a confidence interval of 68.269%). For a
    confidence interval of 100(1 - :math:`\alpha`)%,

    .. math::

        \kappa = \Phi^{-1}(1-\alpha/2) = \sqrt{2}{\rm erf}^{-1}(1-\alpha).

    **2. The Jeffreys Interval.** This interval is derived by applying
    Bayes' theorem to the binomial distribution with the
    noninformative Jeffreys prior [3]_, [4]_. The noninformative Jeffreys
    prior is the Beta distribution, Beta(1/2, 1/2), which has the density
    function

    .. math::

        f(\epsilon) = \pi^{-1} \epsilon^{-1/2}(1-\epsilon)^{-1/2}.

    The justification for this prior is that it is invariant under
    reparameterizations of the binomial proportion.
    The posterior density function is also a Beta distribution: Beta(k
    + 1/2, N - k + 1/2). The interval is then chosen so that it is
    *equal-tailed*: Each tail (outside the interval) contains
    :math:`\alpha`/2 of the posterior probability, and the interval
    itself contains 1 - :math:`\alpha`. This interval must be
    calculated numerically. Additionally, when k = 0 the lower limit
    is set to 0 and when k = N the upper limit is set to 1, so that in
    these cases, there is only one tail containing :math:`\alpha`/2
    and the interval itself contains 1 - :math:`\alpha`/2 rather than
    the nominal 1 - :math:`\alpha`.

    **3. A Flat prior.** This is similar to the Jeffreys interval,
    but uses a flat (uniform) prior on the binomial proportion
    over the range 0 to 1 rather than the reparametrization-invariant
    Jeffreys prior.  The posterior density function is a Beta distribution:
    Beta(k + 1, N - k + 1).  The same comments about the nature of the
    interval (equal-tailed, etc.) also apply to this option.

    **4. The Wald Interval.** This interval is given by

    .. math::

       CI_{\rm Wald} = \hat{\epsilon} \pm
       \kappa \sqrt{\frac{\hat{\epsilon}(1-\hat{\epsilon})}{N}}

    The Wald interval gives acceptable results in some limiting
    cases. Particularly, when N is very large, and the true proportion
    :math:`\epsilon` is not "too close" to 0 or 1. However, as the
    later is not verifiable when trying to estimate :math:`\epsilon`,
    this is not very helpful. Its use is not recommended, but it is
    provided here for comparison purposes due to its prevalence in
    everyday practical statistics.

    References
    ----------
    .. [1] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001).
       "Interval Estimation for a Binomial Proportion". Statistical
       Science 16 (2): 101-133. doi:10.1214/ss/1009213286

    .. [2] Wilson, E. B. (1927). "Probable inference, the law of
       succession, and statistical inference". Journal of the American
       Statistical Association 22: 209-212.

    .. [3] Jeffreys, Harold (1946). "An Invariant Form for the Prior
       Probability in Estimation Problems". Proc. R. Soc. Lond.. A 24 186
       (1007): 453-461. doi:10.1098/rspa.1946.0056

    .. [4] Jeffreys, Harold (1998). Theory of Probability. Oxford
       University Press, 3rd edition. ISBN 978-0198503682

    Examples
    --------
    Integer inputs return an array with shape (2,):

    >>> binom_conf_interval(4, 5, interval='wilson')
    array([ 0.57921724,  0.92078259])

    Arrays of arbitrary dimension are supported. The Wilson and Jeffreys
    intervals give similar results, even for small k, N:

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wilson')
    array([[ 0.        ,  0.07921741,  0.21597328,  0.83333304],
           [ 0.16666696,  0.42078276,  0.61736012,  1.        ]])

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='jeffreys')
    array([[ 0.        ,  0.0842525 ,  0.21789949,  0.82788246],
           [ 0.17211754,  0.42218001,  0.61753691,  1.        ]])

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='flat')
    array([[ 0.        ,  0.12139799,  0.24309021,  0.73577037],
           [ 0.26422963,  0.45401727,  0.61535699,  1.        ]])

    In contrast, the Wald interval gives poor results for small k, N.
    For k = 0 or k = N, the interval always has zero length.

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald')
    array([[ 0.        ,  0.02111437,  0.18091075,  1.        ],
           [ 0.        ,  0.37888563,  0.61908925,  1.        ]])

    For confidence intervals approaching 1, the Wald interval for
    0 < k < N can give intervals that extend outside [0, 1]:

    >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald', conf=0.99)
    array([[ 0.        , -0.26077835, -0.16433593,  1.        ],
           [ 0.        ,  0.66077835,  0.96433593,  1.        ]])

    """

    if conf < 0. or conf > 1.:
        raise ValueError('conf must be between 0. and 1.')
    alpha = 1. - conf

    k = np.asarray(k).astype(np.int)
    n = np.asarray(n).astype(np.int)

    if (n <= 0).any():
        raise ValueError('n must be positive')
    if (k < 0).any() or (k > n).any():
        raise ValueError('k must be in {0, 1, .., n}')

    if interval == 'wilson' or interval == 'wald':
        from scipy.special import erfinv
        kappa = np.sqrt(2.) * min(erfinv(conf), 1.e10)  # Avoid overflows.
        k = k.astype(np.float)
        n = n.astype(np.float)
        p = k / n

        if interval == 'wilson':
            midpoint = (k + kappa ** 2 / 2.) / (n + kappa ** 2)
            halflength = (kappa * np.sqrt(n)) / (n + kappa ** 2) * \
                np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n))
            conf_interval = np.array([midpoint - halflength,
                                      midpoint + halflength])

            # Correct intervals out of range due to floating point errors.
            conf_interval[conf_interval < 0.] = 0.
            conf_interval[conf_interval > 1.] = 1.
        else:
            midpoint = p
            halflength = kappa * np.sqrt(p * (1. - p) / n)
            conf_interval = np.array([midpoint - halflength,
                                      midpoint + halflength])

    elif interval == 'jeffreys' or interval == 'flat':
        from scipy.special import betaincinv

        if interval == 'jeffreys':
            lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha)
            upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - 0.5 * alpha)
        else:
            lowerbound = betaincinv(k + 1, n - k + 1, 0.5 * alpha)
            upperbound = betaincinv(k + 1, n - k + 1, 1. - 0.5 * alpha)

        # Set lower or upper bound to k/n when k/n = 0 or 1
        #  We have to treat the special case of k/n being scalars,
        #  which is an ugly kludge
        if lowerbound.ndim == 0:
            if k == 0:
                lowerbound = 0.
            elif k == n:
                upperbound = 1.
        else:
            lowerbound[k == 0] = 0
            upperbound[k == n] = 1

        conf_interval = np.array([lowerbound, upperbound])
    else:
        raise ValueError('Unrecognized interval: {0:s}'.format(interval))

    return conf_interval
示例#39
0
文件: math.py 项目: boinggg/p2pool
 def f(left_a):
     left, right = max(1e-8, float(special.betaincinv(x+1, n-x+1, left_a))), min(1-1e-8, float(special.betaincinv(x+1, n-x+1, left_a + conf)))
     top = math.exp(math.log(right)*(x+1) + math.log(1-right)*(n-x+1) + math.log(left) + math.log(1-left) - bl) - math.exp(math.log(left)*(x+1) + math.log(1-left)*(n-x+1) + math.log(right) + math.log(1-right) - bl)
     bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right)
     return top/bottom
示例#40
0
valArray1, valArray2 = np.meshgrid(valVect, valVect)
valArray = np.array([valArray1.flatten(), valArray2.flatten()])

print(valArray.shape)
tempss = valArray[0, 1]
print(tempss)
# initial drawing scheme
numChoices = np.size(valArray, 1)
probArray = np.ones((numChoices, 1)) * (1 / numChoices)

# correct vs incorrect counts
correctAndIncorrectCounts = np.ones((numChoices, 2))

for k in np.arange(50):

    drawInd = np.random.choice(numChoices, 1, p=probArray.flatten())

    # update count array depending on response
    correctFlag = 1
    if correctFlag:
        correctAndIncorrectCounts[drawInd, 0] += 1
    else:
        correctAndIncorrectCounts[drawInd, 1] += 1

    # update probabilities based on counts
    probArray = 1 - betaincinv(correctAndIncorrectCounts[:, 0],
                               correctAndIncorrectCounts[:, 1], 1 / (1 + 2))
    probArray = probArray / sum(probArray)

print(probArray)