Exemplo n.º 1
0
def posterior(x, n, p1, p2):
    """
    Calculates the posterior probability that the probability of
    developing severe side effects falls within a specific range
    given the data
    """
    if type(n) is not int or n < 1:
        raise ValueError('n must be a positive integer')

    if type(x) is not int or x < 0:
        raise ValueError('x must be an integer that is ' +
                         'greater than or equal to 0')

    if x > n:
        raise ValueError('x cannot be greater than n')

    if type(p1) is not float or p1 < 0 or p1 > 1:
        raise ValueError('p1 must be a float in the range [0, 1]')

    if type(p2) is not float or p2 < 0 or p2 > 1:
        raise ValueError('p2 must be a float in the range [0, 1]')

    if p2 <= p1:
        raise ValueError('p2 must be greater than p1')

    cdf_beta1 = beta.cdf(p1, x + 1, n - x + 1)
    cdf_beta2 = beta.cdf(p2, x + 1, n - x + 1)
    my_posterior = cdf_beta2 - cdf_beta1
    return my_posterior
Exemplo n.º 2
0
    def warp_input(self, X, alpha=None, beta=None):
        bounds = np.array(self.bounds)
        if alpha is None:
            alpha = self._alpha
        if beta is None:
            beta = self._beta
        if X is None:
            return None

        X = np.array(X)
        X_warped = np.empty((X).shape)
        for n in range(self.ndim):
            # a hack way to deal with the numpy shapes problem. This should be fixed
            try:

                X_warped[:, n:n +
                         1] = (X[:, n:n + 1] - bounds[n, 0]) / (bounds[n, 1] -
                                                                bounds[n, 0])
                # use beta CDF warping
                X_warped[:, n:n + 1] = beta_dist.cdf(X_warped[:, n:n + 1],
                                                     alpha[n], beta[n])
                X_warped = (bounds[n, 1] - bounds[n, 0]) * X_warped + bounds[n,
                                                                             0]
            except:
                X_warped = (X - bounds[n, 0]) / (bounds[n, 1] - bounds[n, 0])
                # use beta CDF warping
                X_warped[:] = beta_dist.cdf(X_warped[:], alpha[n], beta[n])
                X_warped = (bounds[n, 1] - bounds[n, 0]) * X_warped + bounds[n,
                                                                             0]
        return X_warped
Exemplo n.º 3
0
def create_gauss_scalingfactors(cdfs):
    """ Create the scaling factor distributions needed to sample
        uncertainty in the Gaussian non-CO2 radiative forcings"""
    #Scale based on combined gaussian components
    rf_2011_mid = np.array([1.82, 2.83, 0.35, 0.07, -0.15, 0.04, -0.9])
    rf_2011_up = [2.18, 3.4, 0.559, 0.121, -0.047, 0.09, -0.1]
    rf_2011_low = [1.46, 2.26, 0.141, 0.019, -0.253, 0.019, -1.9]
    #Estimate sd using 5-95 intervals
    erf_sigs = (np.array(rf_2011_up) - np.array(rf_2011_low)) / (2 * 1.654)
    sig_wmghg = np.copy(erf_sigs[1])
    #Find the non-CO2 GHG forcing uncertainty
    sig_owmghg = np.sqrt(erf_sigs[1]**2 - erf_sigs[0]**2)
    erf_sigs[1] = sig_owmghg
    sig_tot = np.sqrt(np.sum(erf_sigs[1:-2]**2))
    rf_2011_mid_a = np.copy(rf_2011_mid)
    rf_2011_mid_a[1] = rf_2011_mid[1] - rf_2011_mid[0]
    #Calculate the scaling factors
    #Derive the scaling factors to span 5-95% AR5 guassian forcing uncertainty
    #assuming +/- 20% uncertainty in WMGHG forcings
    #Map the TCR cdf to the forcing scaling cdf using a beta function cdf
    beta_cdf_sf = root(lambda var: 0.05 - beta.cdf(0.5 - 1.0 / 3.0, var, var),
                       x0=2.0).x[0]
    cdfs_gauss = 1.0 - beta.cdf(cdfs, beta_cdf_sf, beta_cdf_sf)
    sf_gauss = (np.sum(rf_2011_mid_a[1:-2]) +
                np.sqrt(2.0) * erfinv(2 * cdfs_gauss - 1) * sig_tot) / np.sum(
                    rf_2011_mid_a[1:-2])

    return sf_gauss
Exemplo n.º 4
0
def Beta(length, popularity, be=10):
    x = [i / length for i in range(length + 1)]
    cdfs = [
        beta.cdf(x[i + 1], popularity, be) - beta.cdf(x[i], popularity, be)
        for i in range(length)
    ]
    return cdfs
Exemplo n.º 5
0
 def Jeffrey(self, df, x, y, z):
     #df is a dataframe object that contains columns x, y and z
     #x aggregation variable (rating grade for PD test)
     #y modelled variable
     #z observed variable (expected to be binary)
     #alpha = D + 1/2
     #beta = Nc- D + 1/2
     aggregation = df.groupby(x).agg({
         x: 'count',
         y: ['sum', 'count', 'mean'],
         z: ['sum', 'count', 'mean']
     })
     aggregation['Observed'] = aggregation[(z, 'mean')]
     aggregation['alpha'] = aggregation[(z, 'sum')] + 1 / 2
     aggregation['beta'] = aggregation[(z,
                                        'count')] - aggregation['alpha'] + 1
     aggregation['H0'] = aggregation[(y, 'mean')]
     aggregation['p_val'] = beta.cdf(aggregation['H0'],
                                     aggregation['alpha'],
                                     aggregation['beta'])
     aggregation.loc['Portfolio'] = aggregation.sum()
     aggregation['Observed'].loc['Portfolio'] = df.agg({z: 'mean'}).values
     aggregation['alpha'].loc['Portfolio'] = df.agg({
         z: 'sum'
     }).values + 1 / 2
     aggregation['beta'].loc['Portfolio'] = df.agg({
         z: 'count'
     }).values - aggregation['alpha'].loc['Portfolio'] + 1
     aggregation['H0'].loc['Portfolio'] = df.agg({y: 'mean'}).values
     aggregation['p_val'].loc['Portfolio'] = beta.cdf(
         aggregation['H0'].loc['Portfolio'],
         aggregation['alpha'].loc['Portfolio'],
         aggregation['beta'].loc['Portfolio'])
     return aggregation
Exemplo n.º 6
0
    def allocated_rollouts(self):
        fraction_of_budget_used = (self.currently_used_budget +
                                   1) / self.max_budget

        yvals = beta.cdf([fraction_of_budget_used], self.alpha_param,
                         self.beta_param)

        fractional_improvment_we_should_be_at = yvals[0]

        # This is based on the total overall improvement how much effort should we have spent
        number_of_rollouts_we_shouldve_used = fractional_improvment_we_should_be_at * self.max_rollout

        if not self.waste_unused_rollouts:
            # Subtracts the already spent rollouts from the rollouts we shouldve used
            number_of_free_rollouts = number_of_rollouts_we_shouldve_used - self.currently_used_rollout
        else:
            #not wasting rollouts. Thus, number_of_free_rollouts = {cdf (budget+1) - cdf(budget)} * max_rollouts.
            # for a given cdf i.e. fixed alpha and beta
            fractional_improvment_we_are_at = \
            beta.cdf([(self.currently_used_budget) / self.max_budget], self.alpha_param, self.beta_param)[0]
            number_of_rollouts_we_have_used = fractional_improvment_we_are_at * self.max_rollout
            number_of_free_rollouts = number_of_rollouts_we_shouldve_used - number_of_rollouts_we_have_used

        logging.getLogger(
            self.logger_name).debug(f"Rollouts {number_of_free_rollouts}")
        if number_of_free_rollouts < self.K + 2:
            return self.K + 2
        return int(number_of_free_rollouts)
Exemplo n.º 7
0
def PyNy(y_hat, PD_LEN):
    PD_y_hat = y_hat[:PD_LEN]
    PD_y_hat_mean = Mean_1d(PD_y_hat)
    PD_y_hat_var = Variance_1d(PD_y_hat, PD_y_hat_mean)
    PD_a, PD_b = AandB(PD_y_hat_mean, PD_y_hat_var)

    nonPD_y_hat = y_hat[PD_LEN:]
    nonPD_y_hat_mean = Mean_1d(nonPD_y_hat)
    nonPD_y_hat_var = Variance_1d(nonPD_y_hat, nonPD_y_hat_mean)
    N_a, N_b = AandB(nonPD_y_hat_mean, nonPD_y_hat_var)

    cdf_x = np.linspace(0, 1, 1001)

    P_y = beta.cdf(cdf_x, PD_a, PD_b)
    N_y = beta.cdf(cdf_x, N_a, N_b)

    # plot 을 안쓰실때는 # 으로 막아두시길
    for i in range(len(N_y)):
        N_y[i] = 1 - N_y[i]

    # 측정된 cdf 값을 그리는 plt
    plt.plot(cdf_x, P_y, 'red')
    #main emotion
    plt.plot(cdf_x, N_y, 'blue')
    #non_emotion

    plt.ylim(0, 2)
    plt.show()

    # y_hat 값에 대한 histogram 을 그리는 plt
    if PD_a > 0:
        plt.hist([y_hat[:PD_LEN], y_hat[PD_LEN:]])
        plt.show()

    return P_y, N_y
 def integrand(x):
     pdf1 = beta.pdf(x,a1,b1)
     pdf2 = beta.pdf(x,a2,b2)
     cdf1 = beta.cdf(x,a1,b1)
     cdf2 = beta.cdf(x,a2,b2)
     rho = pdf1 * cdf2 + pdf2 * cdf1
     return -rho * np.log(rho)
def f_dp(x, alpha_aa, alpha_c, a_aa0, b_aa0, a_c0, b_c0, a_aa1, b_aa1, a_c1,
         b_c1):
    f_c = (lambda x_c: alpha_c * beta.cdf(x_c, a_c1, b_c1, 0, 1) +
           (1 - alpha_c) * beta.cdf(x_c, a_c0, b_c0, 0, 1))
    inv_f_c = inversefunc(f_c, domain=[0, 1], open_domain=[False, False])
    return float(
        inv_f_c(alpha_aa * beta.cdf(x, a_aa1, b_aa1) +
                (1 - alpha_aa) * beta.cdf(x, a_aa0, b_aa0)))
Exemplo n.º 10
0
def diff_between(k1, n1, k2, n2, p=0.96, a=0.05, b=1, Nsamp=10000):
    """ Bayesian maximum a posteriori estimate of the difference in prevalence 
    when the same test is applied to two groups
    
    k1 : number of participants significant in group 1 out of 
    n1 : total number of participants in group 1
    k2 : number of participants significant in group 2 out of 
    n2 : total number of participants in group 2
    p  : coverage for highest-posterior density interval (in [0 1])
    a  : alpha value of within-participant test (default=0.05)
    b  : sensitivity/beta of within-participant test (default=1)
    Nsamp : number of samples from the posterior

    Outputs:
    map    : maximum a posteriori estimate of the difference in prevalence:
             gamma_1 - gamma_2
    post_x : x-axis for kernel density fit of posterior distribution of the
             above
    post   : posterior distribution from kernel density fit
    hpdi   : highest-posterior density interval with coverage p 
    probGT : estimated posterior probability that the prevalence is higher in group 1
    logoddsGT : estimated log odds in favour of the hypothesis that the prevalence is higher in group 1
    samples : posterior samples
    
    """

    # gamma priors = Beta(r,s)
    r1 = 1; s1 = 1
    r2 = 1; s2 = 1

    # Parameters for Beta posteriors
    m11 = k1 + r1
    m12 = n1 - k1 + s1
    m21 = k2 + r2
    m22 = n2 - k2 + s2

    # Generate truncated beta samples
    th1 = beta.ppf(np.random.uniform(beta.cdf(a,m11,m12),beta.cdf(b,m11,m12), Nsamp), m11, m12)
    th2 = beta.ppf(np.random.uniform(beta.cdf(a,m21,m22),beta.cdf(b,m21,m22), Nsamp), m21, m22)

    # vector of estimates of prevalence differences
    samples = (th1 - th2) / (b-a)

    # kernel density estimate of posterior
    post_x = np.linspace(-1,1,200)
    kde = sp.stats.gaussian_kde(samples)
    post = kde(post_x)
    map = post_x[np.argmax(post)]

    # Estimate the posterior probability, and logodds, that the prevalence is higher for group 1.
    # Laplace's rule of succession used to avoid estimates of 0 or 1
    probGT = (np.sum(samples>0)+1)/(Nsamp+2)
    logoddsGT = np.log(probGT / (1-probGT))
    hpdi = _hpdi(samples, p)

    res = {"map": map, "post_x": post_x, "post": post, "hpdi": hpdi, "probGT": probGT, 
           "logoddsGT": logoddsGT, "samples": samples}
    return res
Exemplo n.º 11
0
    def _Cough_Chen(self):
        """
            Distribution fitted from Chen.

        :return:
        """

        # Small droplets.  < 10micron
        nparticles_small = 230
        k = 3.75
        D_small = numpy.arange(0, 20, 0.1)
        Fcdf_small = gamma.cdf(D_small, 3.75)

        Dsmall_avg = (D_small[:-1] + D_small[1:]) / 2.
        F_small = numpy.diff(Fcdf_small)

        Volume_small = (4.0 / 3.0) * numpy.pi * (
            Dsmall_avg * 1e-6)**3 * F_small * nparticles_small
        vol_small_ml = (Volume_small.sum() * m**3).asUnit(ml)

        # medium droplets 10micron < x < 225 micron
        # upto 100 evaporates in air.
        nparticles_medium = 210
        params = dict(a=0.2, b=1, loc=53, scale=200)
        D_medium = numpy.arange(10, 100, 1)
        Fcdf_medium = beta.cdf(D_medium, **params)
        Dmedium_avg = (D_medium[:-1] + D_medium[1:]) / 2.
        F_medium = numpy.diff(Fcdf_medium)

        Volume_small = (4.0 / 3.0) * numpy.pi * (
            Dmedium_avg * 1e-6)**3 * F_medium * nparticles_medium
        vol_medium_ml = (Volume_small.sum() * m**3).asUnit(ml)

        evaporatingDropletsVolume = vol_small_ml + vol_medium_ml

        ##### == None evaporating
        D_medium = numpy.arange(100, 225, 1)
        Fcdf_medium = beta.cdf(D_medium, **params)
        Dmedium_avg = (D_medium[:-1] + D_medium[1:]) / 2.
        F_medium = numpy.diff(Fcdf_medium)

        Volume_small = (4.0 / 3.0) * numpy.pi * (
            Dmedium_avg * 1e-6)**3 * F_medium * nparticles_medium
        vol_medium_ml = (Volume_small.sum() * m**3).asUnit(ml)

        nparticles_large = 20
        D_large = numpy.arange(225, 800, 1)
        Fcdf_large = uniform.cdf(D_large, loc=225, scale=800 - 225)
        Dlarge_avg = (D_large[:-1] + D_large[1:]) / 2.
        F_large = numpy.diff(Fcdf_large)

        Volume_small = (4.0 / 3.0) * numpy.pi * (
            Dlarge_avg * 1e-6)**3 * F_large * nparticles_large
        vol_large_ml = (Volume_small.sum() * m**3).asUnit(ml)

        nonEvaporatingDropletsVolume = vol_large_ml + vol_medium_ml

        return evaporatingDropletsVolume, nonEvaporatingDropletsVolume
Exemplo n.º 12
0
def rhomax_integrand(x,a1,a2,b1,b2):
    pdf1 = beta.pdf(x,a1,b1)
    pdf2 = beta.pdf(x,a2,b2)
    cdf1 = beta.cdf(x,a1,b1)
    cdf2 = beta.cdf(x,a2,b2)
    
    rho = pdf1 * cdf2 + pdf2 * cdf1

    integrand = -rho * np.log( rho )
    return integrand
Exemplo n.º 13
0
def Rhomax(x, a1, a2, b1, b2):
    # Takes numbers and returns a prob. number

    pdf1 = beta.pdf(x, a1, b1)
    pdf2 = beta.pdf(x, a2, b2)
    cdf1 = beta.cdf(x, a1, b1)
    cdf2 = beta.cdf(x, a2, b2)

    rho = pdf1 * cdf2 + pdf2 * cdf1

    return rho
Exemplo n.º 14
0
def bibeta_roc(Y,p):
    def logL(ab):
        a0,b0,a1,b1 = ab
        LL = beta.logpdf(p[Y==0],a0,b0).sum() + beta.logpdf(p[Y==1],a1,b1).sum() 
        return -LL
    result = minimize(logL,[1,3,3,1],bounds=[(1e-7,None)]*4)
    a0,b0,a1,b1 = result.x
    threshold = np.linspace(0,1,1000)
    fpr = 1-beta.cdf(threshold,a0,b0)
    tpr = 1-beta.cdf(threshold,a1,b1)
    return threshold,fpr,tpr
Exemplo n.º 15
0
def demand_sampler(random_state, size):
    demand = numpy.zeros([size, T, len(I) * len(J)])
    for i_idx, i in enumerate(I):
        for j_idx, j in enumerate(J):
            gamma_k, gamma_theta, beta_a, beta_b = airFare.loc[i, j][:4]
            G = random_state.gamma(gamma_k, gamma_theta,
                                   size=size) * (1 - Rate[i])
            for t in range(1, T):
                B = (beta.cdf(1 - day[t] / day[0], beta_a, beta_b) -
                     beta.cdf(1 - day[t - 1] / day[0], beta_a, beta_b))
                demand[:, t, 6 * i_idx + j_idx] = random_state.poisson(G * B)
    return demand
Exemplo n.º 16
0
def rhomax_integrand(x, a1, a2, b1, b2):
    # Takes numbers and returns a prob. number

    pdf1 = beta.pdf(x, a1, b1)
    pdf2 = beta.pdf(x, a2, b2)
    cdf1 = beta.cdf(x, a1, b1)
    cdf2 = beta.cdf(x, a2, b2)

    rho = pdf1 * cdf2 + pdf2 * cdf1

    integrand = -rho * np.log(rho)
    return integrand
Exemplo n.º 17
0
    def target_func_AnotB(z_AnotB):
        if (z_AnotB < 0 or z_AnotB > 1 or (pA - z_AnotB * (1 - pB)) / pB < 0
                or (pA - z_AnotB * (1 - pB)) / pB > 1):
            return sys.float_info.max

        cdf_AnotB = beta.cdf(z_AnotB, a2, b2)
        #substitute z_AB
        cdf_AB = beta.cdf((pA - z_AnotB * (1 - pB)) / pB, a1, b1)

        if (cdf_AB == 0 or cdf_AnotB == 0):
            return sys.float_info.max
        return max(cdf_AnotB / cdf_AB, cdf_AB / cdf_AnotB)
Exemplo n.º 18
0
    def target_func_AB(z_AB):
        if (z_AB < 0 or z_AB > 1 or (pA - z_AB * pB) / (1 - pB) < 0
                or (pA - z_AB * pB) / (1 - pB) > 1):
            return sys.float_info.max

        cdf_AB = beta.cdf(z_AB, a1, b1)
        #substitute z_AnotB
        cdf_AnotB = beta.cdf((pA - z_AB * pB) / (1 - pB), a2, b2)

        if (cdf_AB == 0 or cdf_AnotB == 0):
            return sys.float_info.max
        return max(cdf_AnotB / cdf_AB, cdf_AB / cdf_AnotB)
def eva_policy(theta_aa, theta_c, a_aa0, b_aa0, a_aa1, b_aa1, a_c0, b_c0, a_c1,
               b_c1):
    tpr = []
    fpr = []

    tpr.append(1 - beta.cdf(theta_aa, a_aa1, b_aa1))
    tpr.append(1 - beta.cdf(theta_c, a_c1, b_c1))

    fpr.append(1 - beta.cdf(theta_aa, a_aa0, b_aa0))
    fpr.append(1 - beta.cdf(theta_c, a_c0, b_c0))

    return tpr, fpr
Exemplo n.º 20
0
	def compute_covariance(self, x, y, diag=False):
		x, y = format_data(x), format_data(y)
		x, y = np.copy(x), np.copy(y)
		assert(x.shape[1] == self.N and y.shape[1] == self.N)

		for i in range(x.shape[1]):
			a, b = self.parameters[-2*self.N + 2*i].value, self.parameters[-2*self.N + 2*i + 1].value
			x[:,i] = beta.cdf(x[:,i], a, b)
			y[:,i] = beta.cdf(y[:,i], a, b)

		val =  self.kernel.compute_covariance(x, y, diag=diag)

		return val
Exemplo n.º 21
0
def hopkins(X: np.array, alpha=0.05):
    '''
    Hopkins is a metric of how uniformly distributed an array is.
        - Close to 1 is evidence of substructure
        - Close to .5 is normally distributed
        - Close to 0 indicates regularity
    Input: 
        X: An (n,) or (n,m) list or numpy array
    
    Output:
        H: float, Hopkin's Statistic
        D: string, Decision on if we have regularity, no structure, or clusters
        p: float, P-Value from the cdf of Beta(m,m) distribution
    '''
    X = np.array(X)
    if len(X.shape) == 1:
        X = X.reshape(-1,1)
    d = X.shape[1]
    n = len(X)
    m = int(0.1 * n)
    nbrs = NearestNeighbors(n_neighbors=1).fit(X)
    rand_X = sample(range(0, n, 1), m)
    ujd = []
    wjd = []
    for j in range(0, m):
        # Get distance to a random point
        random_uniform = np.random.uniform(np.min(X, axis=0), np.max(X, axis=0), d).reshape(1,-1)
        u_dist, _ = nbrs.kneighbors(random_uniform, 2, return_distance=True)
        ujd.append(u_dist[0][1]**d)
        # Get distance to another sample
        random_sample = X[rand_X[j]].reshape(1,-1)
        w_dist, _ = nbrs.kneighbors(random_sample, 2, return_distance=True)
        wjd.append(w_dist[0][1]**d)
    denom = np.sum(ujd) + np.sum(wjd)
    if denom == 0:
        raise RuntimeWarning('The Hopkins denominator was 0, cannot proceed')
    else:
        H = np.sum(ujd) / denom
        if H > 0.5:
            p = 1 - beta.cdf(H, m, m)
            if p < alpha:
                D = 'There is evidence of clusters'
            else:
                D = 'There is no evidence of structure'
        else:
            p = beta.cdf(H, m, m)
            if p < alpha:
                D = 'There is evidence of regularity'
            else:
                D = 'There is no evidence of structure'
    return H, D, p
Exemplo n.º 22
0
    def generate_conditional_ps(self):
        # p(TiN|Somatic) and p(TiN|Germline)
        n_af_w = np.zeros([self.number_of_sites, len(self.af)])
        t_af_w = np.zeros([self.number_of_sites, len(self.af)])
        t_het_direction = np.ones([self.number_of_sites, len(self.af)])
        t_het_direction[:, 0:np.int(np.round(np.true_divide(len(self.af), 2))
                                    )] = -1
        for i, f in enumerate(self.af):
            n_af_w[:,
                   i] = self.rv_normal_af.cdf(f) - self.rv_normal_af.cdf(f -
                                                                         0.005)
            t_af_w[:,
                   i] = self.rv_tumor_af.cdf(f) - self.rv_tumor_af.cdf(f -
                                                                       0.005)
            # ac given somatic
            t_af = np.multiply(f, self.n_depth)
            n_ac_given_tin = np.multiply(t_af[:, np.newaxis], self.CN_ratio)

            # ac given heterozygous
            f_t_af = 0.5 - np.abs((.5 - f))
            psi_t_af = 0.5 - f_t_af
            psi_t_af = np.multiply(psi_t_af, t_het_direction[:, i])
            exp_f = 0.5 + np.multiply(psi_t_af[:, np.newaxis], self.CN_ratio)
            n_het_ac_given_tin = np.multiply(exp_f, self.n_depth[:,
                                                                 np.newaxis])

            for TiN_idx, TiN in enumerate(self.TiN_range):
                self.p_TiN_given_S[:, TiN_idx] += np.multiply(
                    beta.cdf(
                        self.normal_f[:] + .01, n_ac_given_tin[:, TiN_idx] + 1,
                        self.n_depth[:] - n_ac_given_tin[:, TiN_idx] + 1) -
                    beta.cdf(self.normal_f[:], n_ac_given_tin[:, TiN_idx] + 1,
                             self.n_depth[:] - n_ac_given_tin[:, TiN_idx] + 1),
                    t_af_w[:, i])
                self.p_TiN_given_het[:, TiN_idx] += np.multiply(
                    beta.cdf(
                        self.normal_f[:] + .01,
                        n_het_ac_given_tin[:, TiN_idx] + 1,
                        self.n_depth[:] - n_het_ac_given_tin[:, TiN_idx] + 1) -
                    beta.cdf(
                        self.normal_f[:], n_het_ac_given_tin[:, TiN_idx] + 1,
                        self.n_depth[:] - n_het_ac_given_tin[:, TiN_idx] + 1),
                    t_af_w[:, i])

        self.p_artifact = self.rv_tumor_af.cdf(self.normal_f +
                                               .01) - self.rv_tumor_af.cdf(
                                                   self.normal_f)
        self.p_TiN_given_G = np.multiply(1 - self.p_artifact[:, np.newaxis],
                                         self.p_TiN_given_het) + np.multiply(
                                             self.p_artifact[:, np.newaxis],
                                             1 - self.p_TiN_given_het)
Exemplo n.º 23
0
def calculate_aep_curve(elt, grid_size=2**14, max_loss_factor=5):
    """ This function calculates the OEP of a given ELT
    ----------
    elt : pandas dataframe containing PLT
    grid_size: grid size used for ep calculations
    max_loss_factor: factor used to extimate max loss

    Returns
    -------
    EPCurve :
         exceedance probability curve

    """
    elt_lambda = ELT(elt).get_lambda()
    max_loss = _max_loss(elt, max_loss_factor)
    dx = max_loss / grid_size
    xx = np.arange(1, grid_size + 1) * dx

    severity_distribution, severity_density_function = calculate_severity_distribution(
        elt, grid_size, max_loss_factor)

    elt['dx'] = dx / elt['ExpValue']
    elt['xx'] = ''
    elt['FX'] = ''
    elt['FX2'] = ''
    fx2 = [0] * grid_size
    for index, row in elt.iterrows():
        row['xx'] = np.true_divide(xx, row['ExpValue'])
        row['FX'] = beta.cdf(row['xx'], row['alpha'], row['beta'])
        row['FX2'] = beta.cdf(row['xx'][0] - row['dx'] / 2, row['alpha'],
                              row['beta'])
        cdf2 = beta.cdf(row['xx'][:-1] + row['dx'] / 2, row['alpha'],
                        row['beta'])
        cdf1 = beta.cdf(row['xx'][:-1] - row['dx'] / 2, row['alpha'],
                        row['beta'])
        row['FX2'] = np.insert(cdf2 - cdf1, 0, row['FX2'])
        fx2 = fx2 + (row['Rate'] / elt_lambda) * row['FX2']
        elt.at[index] = row

    fx_hat = fft(fx2)
    fs_hat = numpy.exp(-elt_lambda * (1 - fx_hat))
    fs = numpy.real(ifft(fs_hat, norm="forward") / grid_size)
    fs_cum = numpy.cumsum(fs)
    severity_density_function['AEP'] = 1 - fs_cum
    aep = severity_density_function.rename(columns={
        'AEP': 'Probability',
        'threshold': 'Loss'
    })

    return ep_curve.EPCurve(aep, ep_type=ep_curve.EPType.AEP)
Exemplo n.º 24
0
def hpdi(p, k, n, a=0.05, b=1):
    """ Bayesian highest posterior density interval of population prevalence gamma
    under a uniform prior

    p : HPDI to return (e.g. 0.95 for 95%)
    k : number of participants significant out of 
    n : total number of participants
    a : alpha value of within-participant test (default=0.05)
    b : sensitivity/beta of within-participant test (default=1)

    """

    b1 = k+r
    b2 = n-k+s

    # truncated beta pdf/cdf/icdf
    tbpdf = lambda x: beta.pdf(x,b1,b2) / (beta.cdf(b,b1,b2)-beta.cdf(a,b1,b2))
    tbcdf = lambda x: (beta.cdf(x,b1,b2)-beta.cdf(a,b1,b2)) / (beta.cdf(b,b1,b2)-beta.cdf(a,b1,b2))
    tbicdf = lambda x: beta.ppf( (1-x)*beta.cdf(a,b1,b2) + x*beta.cdf(b,b1,b2),b1,b2 )

    if k==a:
        x = np.array([a, tbicdf(p)])
    elif k==n:
        x = np.array([tbicdf(1-p), b])
    else:
        f = lambda x:  np.array([tbcdf(x[1])-tbcdf(x[0])-p, tbpdf(x[1])-tbpdf(x[0])]);
        x, info, ier, mesg = fsolve(f, np.array([tbicdf((1-p)/2), tbicdf((1+p)/2)]),full_output=True )

    # limit to valid theta values
    if (x[0]<a) or (x[1]<x[0]):
        x = np.array([a, tbicdf(p)])
    if x[1]>b: 
        x = np.array([tbicdf(1-p), b])
    hpdi = (x-a)/(b-a)
    return hpdi
Exemplo n.º 25
0
def Beta_Entropy(w0, l0, w1, l1):
    global MC_samples
    seq.reset()
    x = seq.get(MC_samples)
    x = np.reshape(x, MC_samples)

    pdf0 = beta.pdf(x, w0+1, l0+1)
    pdf1 = beta.pdf(x, w1+1, l1+1)
    cdf0 = beta.cdf(x, w0+1, l0+1)
    cdf1 = beta.cdf(x, w1+1, l1+1)
    
    rho = pdf0 * cdf1 + pdf1 * cdf0 + 1E-4
    integral = np.mean( -rho * np.log( rho ) )

    return integral
Exemplo n.º 26
0
def func_2b():
    sample_x = np.random.beta(8, 5, 1000)
    sample_y = np.random.beta(4, 7, 1000)
    observed_result = np.zeros((5, 5))
    estimated_result = np.zeros((5, 5))
    difference_result = np.zeros((5, 5))
    estimated_x = [0] * 5
    estimated_y = [0] * 5
    chi_squared_result = 0
    #take (sample_x[i],sample_y[i]) as a sample of (X,Y)
    #print sample_x,"\n",sample_y
    #divide the 2-way table into 5*5 areas
    for i in range(0, 5):  #row
        for j in range(0, 5):  #column
            row_down = 0.2 * i
            row_up = 0.2 * i + 0.2
            column_down = 0.2 * j
            column_up = 0.2 * j + 0.2
            for sample_index in range(0, 1000):
                if (int(sample_y[sample_index] >= row_down)
                        & int(sample_y[sample_index] < row_up)):
                    if (int(sample_x[sample_index] >= column_down)
                            & int(sample_x[sample_index] < column_up)):
                        observed_result[i][j] += 1
            #calculation_result[i][j]/=1000
    for i in range(0, 5):
        estimated_x[i] = beta.cdf(0.2 * i + 0.2, 8, 5) - beta.cdf(
            0.2 * i, 8, 5)
        estimated_y[i] = beta.cdf(0.2 * i + 0.2, 4, 7) - beta.cdf(
            0.2 * i, 4, 7)
    for i in range(0, 5):
        for j in range(0, 5):
            estimated_result[i][j] = (estimated_y[i] * estimated_x[j] * 1000)
    difference_result = estimated_result - observed_result
    for i in range(0, 5):
        for j in range(0, 5):
            chi_squared_result += (difference_result[i][j]**2 /
                                   estimated_result[i][j])
    #print estimated_result
    print "The observed result of RV X&Y is:\n", observed_result
    #print difference_result
    print "The Result of Chi-squared_test=\n", chi_squared_result
    #Chi-squared distribution threshold can be acquired from charts, pick 95.0%
    #the degree of freedom is 4*4=16, threshold=26.3
    if chi_squared_result <= 26.3:
        print "X & Y can be considered as independent"
    else:
        print "X & Y cannot be considered as independent"
Exemplo n.º 27
0
def probf_baharev(df1, df2, noncen, fcrit):
    x = 1 - special.btdtri(df1, df2, fcrit)
    eps = 1.0e-7
    itr_cnt = 0
    f = None

    while itr_cnt <= 10:
        mu = noncen / 2.0
        ql = poisson.ppf(eps, mu)
        qu = poisson.ppf(1 - eps, mu)
        k = qu
        c = beta.cdf(x, df1 + k, df2)
        d = x * (1.0 - x) / (df1 + k - 1.0) * beta.pdf(x, df1 + k - 1, df2, 0)
        p = poisson.pmf(k, mu)
        f = p * c
        p = k / mu * p

        k = qu - 1
        while k >= ql:
            c = c + d
            d = (df1 + k) / (x * (df1 + k + df2 - 1)) * d
            f = f + p * c
            p = k / mu * p
            k = k - 1
        itr_cnt = itr_cnt + 1

    if (itr_cnt == 11):
        print("newton iteration failed")

    return f
Exemplo n.º 28
0
 def checkBlockedUser(a, b, th=0.95):
     # return beta.cdf(0.5, a, b) > th
     s = beta.cdf(0.5, a, b)
     blocked = False
     if s > th:
         blocked = True
     return blocked
Exemplo n.º 29
0
def t_inv_beta(p0):
    """ A function to compute the inverse template for the beta family
  
  Parameters
  -----------------
  p0: a numpy.ndarray of shape (B,m) ,
      where m is the number of null hypotheses and B is typically the number 
      of permutations/bootstraps that contains the values on which to apply (column wise)
      the inverse beta reference family

  Returns
  -----------------
  a numpy.ndarray of shape ()
  
  Examples
data = np.random.uniform(0,1,(10,10))
pr.t_inv_beta(data)
  -----------------
  """
    # Obtain the number of null hypotheses
    m = p0.shape[1]

    # Initialize the matrix of transformed p-values
    transformed_pvalues = np.zeros(p0.shape)

    # Transformed each column via the beta pdf
    # (t_k^B)^{-1}(lambda) = F(lambda) where F (beta.pdf) is the cdf of the
    # beta(k, m+1-k) distribution.
    for k in np.arange(m):
        transformed_pvalues[:, k] = beta.cdf(p0[:, k], k + 1, m + 1 - (k + 1))

    return transformed_pvalues
Exemplo n.º 30
0
def mid_p_interval(total, passed, conf=0.682689492137, is_upper=True):
    alpha = 1. - conf
    alpha_min = alpha / 2
    vmin = alpha_min if is_upper else (1. - alpha_min)
    tol = 1e-9  # tolerance
    pmin = 0
    pmax = 1
    p = 0

    # treat special case for 0<passed<1
    # do a linear interpolation of the upper limit values
    if passed > 0 and passed < 1:
        p0 = mid_p_interval(total, 0.0, is_upper)
        p1 = mid_p_interval(total, 1.0, is_upper)
        p = (p1 - p0) * passed + p0
        return p

    while abs(pmax - pmin) > tol:
        p = (pmin + pmax) / 2
        # make it work for non integer using the binomial - beta relationship
        v = 0.5 * beta.pdf(p, passed + 1., total - passed + 1) / (total + 1)
        # compute the binomial cdf at passed -1
        if passed >= 1:
            v += 1 - beta.cdf(p, passed, total - passed + 1)

        if v > vmin:
            pmin = p
        else:
            pmax = p

    return p
Exemplo n.º 31
0
def _fap_cvm(freq, psd, psd_best_period):
    """
    Computes false alarm probability for the cvm-distance-minimised beta distribution
    """
    from scipy import optimize
    from scipy.stats import beta
    clip = 0.00001

    temp = np.mean(psd) * (-np.mean(psd) + np.mean(psd) ** 2 + np.var(psd))
    theta_1 = - temp / np.var(psd)
    if theta_1 < 0:
        theta_1 = clip

    theta_2 = (theta_1 - theta_1 * np.mean(psd)) / np.mean(psd)
    if theta_2 < 0:
        theta_2 = clip

    cvm_minimize = optimize.minimize(
        _cvm,
        [theta_1, theta_2],
        args=(psd,),
        bounds=((clip, None), (clip, None)),
    )
    fap = 1 - beta.cdf(psd_best_period, cvm_minimize.x[0], cvm_minimize.x[1]) ** len(freq)

    return fap
Exemplo n.º 32
0
    def mapy2beta(self, y): 
        """
        mapping y to beta
        
        parameters
        ----------
        self: class object
            hypothesis criteria

        y: array_like
            Input array

        returns
        -------
        pair of beta CDF: Float 
            Returns a float.
        """
        a = beta.cdf(y, self._betaA, self._betaB)
        i = 0
        if y < self._ymin: 
            return 0
        elif y > self._ymax: 
            return 1
        else: 
            for i, z in enumerate(self._Y): 
                if y < z: 
                    i = self._Y.index(z)
                    break
        return a
Exemplo n.º 33
0
 def pInitial(self):
     """
     Returns a distribution over initial states (z)
     The distribution is discretized into the number of bins specified by self.bins
     """
     cdf = Beta.cdf(self.quantiles,self.i[0],self.i[1])
     return np.diff(cdf)
Exemplo n.º 34
0
def plot_beta(a, b, fill = False):
    betap = partial(beta.pdf, a, b)
    xs = np.linspace(0,1, num = 150)
    #y = list(map(betap, x))
    y = [beta.pdf(x, a, b) for x in xs]
    fills = [ x <= 0.5 for x in xs]    


    #sio = cStringIO.StringIO()
    sio = io.BytesIO()
    #sio = io.StringIO()
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    #ax1.set_ylim(bottom=0)

    #ax1.set_ylim(bottom
    if fill: ax1.fill_between(xs ,y,0,where=fills  , color='0.8')
    ax1.plot(xs, y)
    fig.savefig(sio, format='png')

    enc = base64.b64encode(sio.getvalue())
    plt.close("all")
    #hex = enc.strip()
    hex = enc.decode('utf-8')
    #hex = sio.getvalue().encode("base64").strip()
    prob = beta.cdf(0.5, a, b)
    return hex, prob
Exemplo n.º 35
0
    def getbcdf_pval_swc(self, swc): 
        """
        get p-value of beta CDF with candidated sigma weight
        parameters
        ----------

        returns
        -------
        pval: 
        d: 
        Y: 
        ba: 
        bb: 
        bcdf: beta.cdf
        """
        Y = [self.kernelizeisw(x, swc) for x in self._data]

        Y.sort()
        Y = featureScaling(Y)
        y_m = np.mean(Y)
        y_v = np.var(Y, ddof = 1)
        if math.isnan(y_v) or y_v == 0: 
            return 0

        ba = y_m ** 2 * ((1 - y_m) / y_v - 1 / y_m)
        bb = ba * (1 - y_m) / y_m
        bcdf = beta.cdf(Y, ba, bb)
                    
        # Y = featureScaling(Y)
        d, pval = scistats.kstest(Y, lambda cdf: bcdf)
        
        params = p3c(pval, d, Y, [x for x in swc], ba, bb, 0, bcdf)
        return params
Exemplo n.º 36
0
		def g(x):
			prob_xx=beta.cdf(x,alpha_param,beta_param)+z
			if prob_xx>1: #Numerical precision paranoia
				prob_xx=1
			xx=beta.ppf(prob_xx,alpha_param,beta_param)
			prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param)
			#print '   x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff)
			return(prob_diff)
		def wat(x, s_n, ar):
			f = 1
			for i in xrange(len(ar)):
				if i is s_n:
					f = f*beta.pdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1)	#f(Sa|Dt)	#Dt is information available before reward
				else:
					f = f*beta.cdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1)	#F(S < Sa|Dt)	#D is information available before reward
			return f
Exemplo n.º 38
0
 def pTransition(self,z,x):
     """
     Returns a distribution over transitions to z given the current state (z) and observation (x)
     The distribution is discretized into the number of bins specified by self.bins
     """
     alpha = self.w[0]*z + sum([x[i]*w for i, w in enumerate(self.w[1:])])
     cdf = Beta.cdf(self.quantiles, alpha[0], alpha[1])
     return np.diff(cdf)
Exemplo n.º 39
0
    def kstest(self, x):
        s = Sample(x).set(self.centroid['mean'], self.centroid['std'])
        fss = featurescaling(s, x_min=self._stats_['min'], x_max=self._stats_['max'])
        p = scibeta.cdf(fss, self.alpha, self.beta)
        if Sample.x == Sample.m or Sample.x == Sample.c:
            p = 1. - p

        return 0 if math.isnan(p) else p
def betadist(betaparams,B,pcF):
    #defining beta distribution parameters
    a=float(betaparams['a'].value)
    b=float(betaparams['b'].value)
    loc=float(betaparams['loc'].value)
    scale=float(betaparams['scale'].value)
    #creating fitted data
    model_pcF=beta.cdf(B,a,b,loc=loc,scale=scale)         
    #returning residual
    return (model_pcF-pcF)
Exemplo n.º 41
0
def _fap_pre(time, freq, psd_best_period):
    """
    Computes false alarm probability for the pre-defined beta distribution
    """
    from scipy.stats import beta
    a = (3 - 1) / 2
    b = (len(time) - 3) / 2
    fap = 1 - beta.cdf(psd_best_period, a, b) ** len(freq)

    return fap
Exemplo n.º 42
0
def beta_rain(a,b,Pt,P):
    try:
        a>0
        b>0
        P['c_rain']=beta.cdf(np.linspace(0,1,len(P)),a,b)*Pt
        P['i_rain']=P['c_rain'].diff(periods=1)
        P.fillna(value=0,inplace=True)
        return P
    except:
        print "parameter error/s"
def credible_interval(c, n, CI=0.95, alpha=1, beta=1):
    lower_tail = (1.0 - CI) / 2.0
    upper_tail = 1.0 - lower_tail
    x = np.linspace(0, 1, 1000)
    a = c + alpha
    b = n - c + beta
    mean = a / (a + b)
    cdf = beta_distribution.cdf(x, a, b)
    lower_bound = x[cdf > lower_tail][0]
    upper_bound = x[cdf < upper_tail][-1]
    return mean, lower_bound, upper_bound
def beta_slope_profile(numprofiles, max_a, min_a, max_b, min_b, elev,base):
    #generating random parameters for beta distribution
    a_list=np.random.rand(numprofiles)*(max_a-min_a)+min_a
    b_list=np.random.rand(numprofiles)*(max_b-min_b)+min_b
    b_list=a_list/b_list
    x=np.linspace(0,base,numpts)
    y_list=[]
    for j in range(len(a_list)):
        #3. Generating topo profiles
        y=beta.cdf(x,a_list[j],b_list[j],loc=0,scale=base)
        y_list.append(y[::-1])
    return x,y_list
Exemplo n.º 45
0
    def __check__(self):
        self.all_distances = [d / max(self.all_distances) for d in self.all_distances]
        mean = numpy.mean(self.all_distances)
        var = numpy.var(self.all_distances, ddof=1)

        ii = len(self.all_distances)
        while var >= (mean * (1 - mean)):
            ii -= 1
            mean = numpy.mean(self.all_distances[:ii])
            var = numpy.var(self.all_distances[:ii], ddof=1)

        alpha1 = mean * (mean * (1 - mean) / var - 1)
        beta1 = alpha1 * (1 - mean) / mean
        for d in sorted(self.all_distances):
            print d, beta.cdf(d, alpha1, beta1)
def betadist_leastsquare_fitting(B,pcF,ai,bi,loc,scale):
    #defining power law variables
    betaparams = Parameters()
    betaparams.add('a', value=ai, vary=True, min=0.001, max=None)
    betaparams.add('b', value=bi, vary=True, min=0.001, max=None)
    betaparams.add('loc', value=loc, vary=True, min=0, max=loc)
    betaparams.add('scale', value=scale, vary=True, min=scale, max=90)
    #minimizing residuals
    result = minimize(betadist, betaparams, args=(B,pcF))
    Bfit = np.linspace(betaparams['loc'].value,betaparams['scale']+betaparams['loc'].value,90*10+1)
    pcFfit=beta.cdf(Bfit,
                    betaparams['a'].value,
                    betaparams['b'].value,
                    loc=betaparams['loc'].value,
                    scale=betaparams['scale'].value)
    
    return Bfit,pcFfit, np.sqrt(np.mean((result.residual)**2)),betaparams['a'].value,betaparams['b'].value,betaparams['loc'].value,betaparams['scale'].value
Exemplo n.º 47
0
def getProb(post):
	m = post.shape[0]
	prob = np.zeros((m, 1))
	for i in xrange(m):
		b1, b2, b3 = post[i][:]
		#p = dblquad(integrand, 0.0, 0.5, lambda p2 : p2, lambda p2 : 1-p2, args=(b1, b2, b3))[0]
		p = 1-beta.cdf(0.5, b1, b2)
		#print(p)
		'''
		if b3 == 0:
			p *= gamma(b1+b2)/(gamma(b1)*gamma(b2))
		else:
			p *= gamma(b1+b2+b3)/(gamma(b1)*gamma(b2)*gamma(b3))
		p /= 5000000
		'''
		prob[i] = p
	return prob
Exemplo n.º 48
0
def _cvm(param, data):
    """
    Cramer-von-Mises distance for beta distribution
    """
    from scipy.stats import beta
    a, b = param
    ordered_data = np.sort(data, axis=None)
    sumbeta = 0
    for n in range(len(data)):
        cdf = beta.cdf(ordered_data[n], a, b)
        sumbeta += (cdf - (n - 0.5) / len(data)) ** 2.0

    cvm_dist = (1. / len(data)) * sumbeta + 1. / (12 * (len(data) ** 2.))
    mask = np.isfinite(cvm_dist)
    cvm = cvm_dist[mask]

    return cvm
Exemplo n.º 49
0
def _fap_nll(time, freq, psd, psd_best_period):
    """
    Computes false alarm probability for the negative logarithmic likelihood-minimised beta distribution
    """
    from scipy import optimize
    from scipy.stats import beta
    a = (3 - 1) / 2
    b = (len(time) - 3) / 2
    clip = 0.00001
    nll_minimize = optimize.minimize(
        _nll,
        [a, b],
        args=(psd,),
        bounds=((clip, None), (clip, None)),
    )
    fap = 1 - beta.cdf(psd_best_period, nll_minimize.x[0], nll_minimize.x[1]) ** len(freq)

    return fap
Exemplo n.º 50
0
    def __check__(self):
        """
        purely for debugging and dev - trying to understand the distribution of points in a cluster
        :return:
        """
        self.all_distances = [d/max(self.all_distances) for d in self.all_distances]
        mean=numpy.mean(self.all_distances)
        var=numpy.var(self.all_distances,ddof=1)

        ii = len(self.all_distances)
        while var >= (mean*(1-mean)):
            ii -= 1
            mean=numpy.mean(self.all_distances[:ii])
            var=numpy.var(self.all_distances[:ii],ddof=1)

        alpha1=mean*(mean*(1-mean)/var-1)
        beta1=alpha1*(1-mean)/mean
        for d in sorted(self.all_distances):
            print d,beta.cdf(d,alpha1,beta1)
Exemplo n.º 51
0
def inverse_beta_ml_band(z,i,N):
	alpha_param=i+1
	beta_param=N-i+1

	small=.5
	if i<small:
		x=0
		xx=beta.ppf(z,alpha_param,beta_param)
	elif i>N-small:
		x=beta.ppf(1.0-z,alpha_param,beta_param)
		xx=1
		if np.isnan(x):
			print "NaN failure in beta edge case"
			IPython.embed()			
	else:
		x_min=0
		x_max=beta.ppf(1.0-z,alpha_param,beta_param)


		def g(x):
			prob_xx=beta.cdf(x,alpha_param,beta_param)+z
			if prob_xx>1: #Numerical precision paranoia
				prob_xx=1
			xx=beta.ppf(prob_xx,alpha_param,beta_param)
			prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param)
			#print '   x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff)
			return(prob_diff)
		try:
			#print '*** (x_min,x_max)=%f,%f'%(x_min,x_max)
			x=brentq(g,x_min,x_max)
			#print '    (x)=%f\n'%(x)
		except:
			print "Failure at 'brentq'."
			IPython.embed()			
		prob_xx=beta.cdf(x,alpha_param,beta_param)+z
		if prob_xx>1: #Numerical precision paranoia
			prob_xx=1
		xx=beta.ppf(prob_xx,alpha_param,beta_param)
	larger_x=np.max((x,xx))
	smaller_x=np.min((x,xx))	
	return((smaller_x,larger_x))
Exemplo n.º 52
0
def guess_beta(num_weights,results,CI=.9,alpha=.3,color='green'):
	num_coins=fc.total_num_coins(results)
	x=np.linspace(0,1,num_weights)
	y=np.zeros(num_weights)
	lower=np.zeros(num_weights)
	upper=np.zeros(num_weights)
	for i,xx in enumerate(x):
		for num_flips in results:
			for heads in results[num_flips]:
				#print 'num_flips=%d,heads=%d'%(num_flips,heads)
				sum_weight=float(results[num_flips][heads])/float(num_coins)
				y[i]+=sum_weight * beta.cdf(xx,heads+1,num_flips-heads+1)
	if y[-1]>1.0:
		#print 'y[-1] = %f !  Fixing...'%(y[-1])
		y/=y[-1]
	for i,xx in enumerate(x):
		(lower[i],upper[i])=nest.inverse_beta_ml_band(CI,y[i]*float(num_coins),num_coins)

	plt.plot(x,y,color=color,label='Estimate with %.1f%% credible interval'%(100.0*CI))
	ax=plt.gca()
	ax.fill_between(x,lower,upper,alpha=alpha,color=color)
Exemplo n.º 53
0
from scipy.stats import beta
print(beta.cdf(2,3,4))
Exemplo n.º 54
0
def gridSetting(data,options,Seed):
    
    # Initialisierung
    d = np.size(options['borders'],0)
    X1D = []
    '''Equal steps in cumulative distribution'''
    
    if options['gridSetType'] == 'cumDist':
        Like1D = np.zeros([options['GridSetEval'], 1])
        for idx in range(d):
            if options['borders'][idx, 0] < options['borders'][idx,1]:
                X1D.append(np.zeros([1, options['stepN'][idx]]))
                local_N_eval = options['GridSetEval']
                while any(np.diff(X1D[idx]) == 0):
                    Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval)
                    alpha = Seed[0]
                    beta = Seed[1]
                    l = Seed[2]
                    gamma = Seed[3]
                    varscale = Seed[4]
                    
                    if idx == 1:
                        alpha = Xtest1D
                    elif idx == 2:
                        beta = Xtest1D
                    elif idx == 3:
                        l = Xtest1D
                    elif idx == 4:
                        gamma = Xtest1D
                    elif idx == 5:
                        varscale = Xtest1D
                    
                    Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale])
                    Like1D = Like1D + np.mean(Like1D)*options['UniformWeight']
                    Like1D = np.cumsum(Like1D)
                    Like1D = Like1D/max(Like1D)
                    wanted = np.linspace(0,1,options['stepN'][idx])
                    
                    for igrid in range(options['stepN'][idx]):
                        X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #TODO check
                        
                    local_N_eval = 10*local_N_eval
            else: 
                X1D.append(copy.deepcopy(options['borders'][idx,0]))
        
        ''' equal steps in cumulative  second derivative'''
    elif (options['gridSetType'] in ['2', '2ndDerivative']):
        Like1D = np.zeros([options['GridSetEval'], 1])
        
        for idx in range(d):
            if options['borders'][idx,0] < options['borders'][idx,1]:
                X1D.append(np.zeros([1,options['stepN'][idx]]))
                local_N_eval = options['GridSetEval']
                while any(np.diff(X1D[idx] == 0)):
                    
                    Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval)
                    alpha = Seed[0]
                    beta = Seed[1]
                    l = Seed[2]
                    gamma = Seed[3]
                    varscale = Seed[4]
                    
                    if idx == 1:
                        alpha = Xtest1D
                    elif idx == 2:
                        beta = Xtest1D
                    elif idx == 3:
                        l = Xtest1D
                    elif idx == 4:
                        gamma = Xtest1D
                    elif idx == 5:
                        varscale = Xtest1D
                        
                    # calc likelihood on the line                        
                    Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale])
                    Like1D = np.abs(np.convolve(np.squeeze(Like1D), np.array([1,-2,1]), mode='same'))
                    Like1D = Like1D + np.mean(Like1D)*options['UniformWeight']
                    Like1D = np.cumsum(Like1D)
                    Like1D = Like1D/max(Like1D)
                    wanted = np.linspace(0,1,options['stepN'][idx])
        
                    for igrid in range(options['stepN'][idx]):
                        X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #ToDo
                    local_N_eval = 10*local_N_eval
                    
                    if local_N_eval > 10**7:
                        X1D[idx] = np.unique(np.array(X1D)) # TODO check
                        break
            else: 
                X1D.append(options['borders'][idx,0])
    
        ''' different choices for the varscale '''
        ''' We use STD now directly as parametrisation'''
    elif options['gridSetType'] in ['priorlike', 'STD', 'exp', '4power']:
        for i in range(4):
            if options['borders'](i,0) < options['borders'](i,1):
                X1D.append(np.linspace(options['borders'][i,0], options['borders'][i,1], options['stepN'][i]))
            else:
                X1D.append(copy.deepcopy(options['borders'][id,0]))
        if options['gridSetType'] == 'priorlike':
            maximum = b.cdf(options['borders'][4,1],1,options['betaPrior'])
            minimum = b.cdf(options['borders'][4,0],1,options['betaPrior'])
            X1D.append(b.ppf(np.linspace(minimum, maximum, options['stepN'][4]), 1, options['betaPrior']))
        elif options['gridSetType'] == 'STD':
            maximum = np.sqrt(options['borders'][4,1])
            minimum = np.sqrt(options['borders'][4,0])
            X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**2)
        elif options['gridSetType'] == 'exp':
            p = np.linspace(1,1,options['stepN'][4])
            X1D.append(np.log(p)/np.log(.1)*(options['borders'][4,1] - options['borders'][4,0]) + options['borders'][4,0])
        elif options['gridSetType'] == '4power':
            maximum = np.sqrt(options['borders'][4,1])
            minimum = np.sqrt(options['borders'][4,0])
            X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**4) 
        
        
    return X1D
Exemplo n.º 55
0
        (0.5, 0.5),
        (1, 1),
        (4, 3),
        (2, 5),
        (6, 6)
    ]
    for p in params:
        y = beta.pdf(x, p[0], p[1])
        plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p)
    plt.xlabel("$\\theta$, Fairness")
    plt.ylabel("PDF")
    plt.legend(title="Parameters")
    plt.show()

    for p in params:
        y = beta.cdf(x, p[0], p[1])
        plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p)
    plt.xlabel("$\\theta$, Fairness")
    plt.ylabel("CDF")
    plt.legend(title="Parameters")
    plt.show()

    for p in params:
        y = beta.ppf(x, p[0], p[1])
        plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p)
    plt.xlabel("$\\theta$, Fairness")
    plt.ylabel("PPF")
    plt.legend(title="Parameters")
    plt.show()

Exemplo n.º 56
0
def betascores(r):
    x = np.asarray(r, np.float)
    n = x.size
    x.sort()
    p = beta.cdf(x=x, a=np.arange(1, n + 1), b=np.arange(n, 0, -1))
    return p
Exemplo n.º 57
0
def betacdf(j,alpha_a,beta_a,skala):
    return beta.cdf(j,alpha_a,beta_a,loc=0, scale=skala)
Exemplo n.º 58
0
        print name,len(times[name])
        print correct,incorrect
        print np.mean(correct_times),np.mean(incorrect_times)
        #print sum([1 for c in correct_times if c >= min(incorrect_times)])/float(len(correct_times))
        #print
        max_time = max(max(correct_times),max(incorrect_times))
        min_time = min(min(correct_times),min(incorrect_times))
        data = correct_times
        data = [(t-min_time)/float(max_time-min_time) for t in data]
        #print data
        a,b,lower,scale = beta.fit(data)
        #print a,b,lower,scale
        #print
        #print beta.cdf(0.8,a,b)
        #----------------Fit using moments----------------
        mean=np.mean(data)
        var=np.var(data,ddof=1)
        alpha1=mean**2*(1-mean)/var-mean
        beta1=alpha1*(1-mean)/mean


        print beta.cdf((incorrect_times[-1]-min_time)/(max_time-min_time),alpha1,beta1)
        print
        #break

        #print correct_times
        #print incorrect_times


#print times.keys()
#print user_collection.find_one({"name":"kellinora"})
Exemplo n.º 59
0
def betacdf(j,alpha_a,beta_a,skala):
    return beta.cdf(j,alpha_a,beta_a,loc=0, scale=skala)

def betapdf(j,alpha_a,beta_a,skala):
    return beta.pdf(j,alpha_a,beta_a,loc=0, scale=skala)

while j < akhir:
    #simulasi dalam detik
    #utk tiap video:
    
    if j >= multiple_of[0]*7200:

        TASKS = [ (betacdf,(j, catalog[k]['alpha'],catalog[k]['beta'],skala) for k in range(numbervideo)]+[ (betapdf,(j, catalog[k]['alpha'],catalog[k]['beta'],skala) for k in range(numbervideo)]
        
        for k in range(numbervideo):
            alpha_a=catalog[k]['alpha']
            beta_a=catalog[k]['beta']
            #hitung pdf dan cdf
            cdf_a = beta.cdf(j,alpha_a,beta_a,loc=0,scale=skala)
            pdf_a = beta.pdf(j,alpha_a,beta_a,loc=0,scale=skala)
            catalog[k]['cdf'][j]=cdf_a
            catalog[k]['pdf'][j]=pdf_a
        multiple_of.pop(0)
        print j
    j+=1    

with open('catalog.pickle', 'wb') as handle:
    pickle.dump(catalog, handle)
handle.close()