Пример #1
0
  def __init__(self,sigLocal,sig0,N0):
    # Convert significance to p-value
    pLocal = norm.sf(sigLocal)
    p0 = norm.sf(sig0)
    
    # Get the test statistic value corresponding to the p-value
    u = chi2.isf(pLocal*2,1)
    u0 = chi2.isf(p0*2,1)
    
    # The main equations
    N = N0 * exp(-(u-u0)/2.)
    pGlobal = N + chi2.sf(u,1)/2.
    
    # Further info
    sigGlobal = norm.isf(pGlobal)
    trialFactor = pGlobal/pLocal

    self.sigGlobal = sigGlobal
    self.sigLocal = sigLocal
    self.sig0 = sig0
    self.pGlobal = pGlobal
    self.pLocal = pLocal
    self.p0 = p0
    self.N0 = N0
    self.N = N
    self.u0 = u0
    self.u = u
    self.trialFactor = trialFactor
Пример #2
0
def calculate_bayes_error(model):
    """
    Returns the bayes error of the given mixture model. This is calculated by integrating the false class density
    around the decision boundary of the model.

    :requires: the model is composed of two gaussian components!

    :param model: the mixture model of a given gene
    :type model: sklearn.mixture.GMM

    :returns: the bayes error of the classifier as a float
    """
    #first we find the intersection point
    coeffs = model.weights_
    mus = [x[0] for x in model.means_]
    sigmas = [x[0] ** 0.5 for x in model.covars_]
    r1, r2 = findIntersection(mus[0], sigmas[0], mus[1], sigmas[1])

    root = 0
    if r1 < max(mus[0], mus[1]) and r1 > min(mus[0], mus[1]):
        root = r1
    else:
        root = r2

    #now that we have the intersectionm we need the CDF/survival function of both plots
    err = 0
    if(root < mus[0]):
        err += norm.sf(root, loc=mus[1], scale=sigmas[1]) * coeffs[1]
        err += norm.cdf(root, loc=mus[0], scale=sigmas[0]) * coeffs[0]
    else:
        err += norm.sf(root, loc=mus[0], scale=sigmas[0]) * coeffs[0]
        err += norm.cdf(root, loc=mus[1], scale=sigmas[1]) * coeffs[1]

    return err #/ (norm.sf(-10000, loc=mus[0], scale=sigmas[0]) + norm.sf(-10000, loc=mus[1], scale=sigmas[1]) - err)
Пример #3
0
def normal(dist, point, twosided=True, tail='right'):
    '''hypothesis testing assuming normal distribution

    Parameters
    ----------
    dist : array-like
        empirical (null) parameter distribution
    point : array-like
        point estimate of parameter
    twosided : boolean
        if True, calculates two-sided p-values
    tail : str or array
        specify 'left' or 'right', an array of 
        such strings for one-sided tests. 'right' 
        implies a one-sided test that the point
        estimate is greater than the null

    Returns
    -------
    pvalue : array-like
        pvalues, of shape similar to point estimate
    '''
    
    if twosided:
        return 2*norm.sf(abs(point)/dist.std(axis=0))
    else:
        left_tail = norm.cdf((point)/dist.std(axis=0))
        right_tail = norm.sf((point)/dist.std(axis=0))

        pvalue = np.zeros(point.shape)
        pvalue[tail == 'left'] = left_tail[tail == 'left']
        pvalue[tail == 'right'] = right_tail[tail == 'right']
        return pvalue
Пример #4
0
def calculate_rms(psfFlux, psfFluxErr):
    xObs = psfFlux / psfFluxErr
    xMean = (1/ (norm.sf(-xObs )*np.sqrt(2*np.pi))) * np.exp(-(xObs**2.0) / 2.0) + xObs
    delX = xObs - xMean
    I1 = norm.sf(-xObs)
    I0bysig2 = 0.5*erf(xObs/np.sqrt(2)) + (1.0/np.sqrt(2*np.pi))*np.exp(-(xObs**2.0) / 2.0)*(2*delX - xObs) + 0.5 + delX*delX*norm.sf(-xObs)
    xRMS = np.sqrt(I0bysig2 / I1)
    return  xRMS * psfFluxErr
Пример #5
0
def set_scale_surgauss(max_r, max_w, min_w):
    "Set the scale factor of the surgauss kernel."
    A = max_w/norm.sf(0)
    scale = minimize(lambda x: (A*norm.sf(max_r, scale=x)-min_w)**2,
                     x0=np.array([max_r]), method='BFGS',
                     tol=1e-8, bounds=(0, None))
    scale = scale['x'][0]
    return scale
Пример #6
0
def convertN(sig,sig0,N0):
  # Convert significance to p-value
  p = norm.sf(sig)
  p0 = norm.sf(sig0)
  # Get the test statistic value corresponding to the p-value
  u = chi2.isf(p*2,1)
  u0 = chi2.isf(p0*2,1)
  # The main equation
  N = N0 * exp(-(u-u0)/2.)
  return N
Пример #7
0
def stouffer_liptak(pvals, sigma):
    qvals = norm.isf(pvals).reshape(len(pvals), 1)
    try:
        C = np.asmatrix(chol(sigma)).I
    except np.linalg.linalg.LinAlgError:
        # for non positive definite matrix default to z-score correction.
        z, L = np.mean(norm.isf(pvals)), len(pvals)
        sz = 1.0 / L * np.sqrt(L + 2 * np.tril(sigma, k=-1).sum())
        return norm.sf(z / sz)

    qvals = C * qvals
    Cp = qvals.sum() / np.sqrt(len(qvals))
    return norm.sf(Cp)
Пример #8
0
    def get_score_df(self, correction_method=None):
        '''

        :param correction_method: str or None, correction method from statsmodels.stats.multitest.multipletests
         'fdr_bh' is recommended.
        :return: pd.DataFrame
        '''
        # From https://people.kth.se/~lang/Effect_size.pdf
        # Shinichi Nakagawa1 and Innes C. Cuthill. 2007. In Biological Reviews 82.
        X = self._get_X().astype(np.float64)
        X = X / X.sum(axis=1)
        cat_X, ncat_X = self._get_cat_and_ncat(X)
        n1, n2 = float(cat_X.shape[1]), float(ncat_X.shape[1])
        n = n1 + n2
        m1 = cat_X.mean(axis=0).A1
        m2 = ncat_X.mean(axis=0).A1
        v1 = cat_X.var(axis=0).A1
        v2 = ncat_X.var(axis=0).A1
        s_pooled = np.sqrt(((n2 - 1) * v2 + (n1 - 1) * v1) / (n - 2.))
        cohens_d = (m1 - m2) / s_pooled
        cohens_d_se = np.sqrt(((n - 1.) / (n - 3)) * (4. / n) * (1 + np.square(cohens_d)))
        cohens_d_z = cohens_d / cohens_d_se
        cohens_d_p = norm.sf(cohens_d_z)
        hedges_r = cohens_d * (1 - 3. / ((4. * (n - 2)) - 1))
        hedges_r_se = np.sqrt(n / (n1 * n2) + np.square(hedges_r) / (n - 2.))
        hedges_r_z = hedges_r / hedges_r_se
        hedges_r_p = norm.sf(hedges_r_z)

        score_df = pd.DataFrame({
            'cohens_d': cohens_d,
            'cohens_d_se': cohens_d_se,
            'cohens_d_z': cohens_d_z,
            'cohens_d_p': cohens_d_p,
            'hedges_r': hedges_r,
            'hedges_r_se': hedges_r_se,
            'hedges_r_z': hedges_r_z,
            'hedges_r_p': hedges_r_p,
            'm1': m1,
            'm2': m2,
        }, index=self.corpus_.get_terms()).fillna(0)
        if correction_method is not None:
            from statsmodels.stats.multitest import multipletests
            score_df['hedges_r_p_corr'] = 0.5
            for method in ['cohens_d', 'hedges_r']:
                score_df[method + '_p_corr'] = 0.5
                score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p_corr'] = (
                    multipletests(score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p'],
                                  method=correction_method)[1]
                )

        return score_df
Пример #9
0
 def known_stdev(self, alpha, stdev1, stdev2):
     n1, n2, y1, y2 = self.n1, self.n2, self.y1, self.y2
     z0 = (y1 - y2) / (np.sqrt(stdev1 ** 2. / n1 + stdev2 ** 2. / n2))
     # hypothesis testing2
     H1a = norm.ppf(1 - alpha / 2.) < np.abs(z0)
     H1b = norm.ppf(1 - alpha) < z0
     H1c = norm.ppf(alpha) > z0
     # p-value
     p1a = norm.sf(np.abs(z0)) * 2
     p1b = norm.sf(z0)
     p1c = norm.cdf(z0)
     c1 = y1 - y2 - norm.ppf(1 - alpha / 2.) * np.sqrt(stdev1 ** 2. / n1 + stdev2 ** 2. / n2)
     c2 = y1 - y2 + norm.ppf(1 - alpha / 2.) * np.sqrt(stdev1 ** 2. / n1 + stdev2 ** 2. / n2)
     return H1a, H1b, H1c, p1a, p1b, p1c, (c1, c2)
Пример #10
0
def fdr_threshold(z_vals, alpha):
    """ return the Benjamini-Hochberg FDR threshold for the input z_vals
    
    Parameters
    ----------
    z_vals: array,
            a set of z-variates from which the FDR is computed
    alpha: float,
           desired FDR control
    
    Returns
    -------
    threshold: float,
               FDR-controling threshold from the Benjamini-Hochberg procedure
    """
    if alpha < 0 or alpha > 1:
        raise ValueError('alpha should be between 0 and 1')
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)
    pos = p_vals < alpha * np.linspace(
        .5 / n_samples, 1 - .5 / n_samples, n_samples)
    if pos.any():
        return (z_vals_[pos][-1] - 1.e-12)
    else:
        return np.infty
def test_full_pvals(n=100, p=40, rho=0.3, snr=4):

    X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho)
    FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))

    from scipy.stats import norm as ndist
    pval = []
    completed_yet = False
    for i in range(min(n, p)):
        FS.next()
        var_select, pval_select = FS.model_pivots(i+1, alternative='twosided',
                                                  which_var=[FS.variables[-1]],
                                                  saturated=False,
                                                  burnin=2000,
                                                  ndraw=8000)[0]
        pval_saturated = FS.model_pivots(i+1, alternative='twosided',
                                         which_var=[FS.variables[-1]],
                                         saturated=True)[0][1]

        # now, nominal ones

        LSfunc = np.linalg.pinv(FS.X[:,FS.variables])
        Z = np.dot(LSfunc[-1], FS.Y) / (np.linalg.norm(LSfunc[-1]) * sigma)
        pval_nominal = 2 * ndist.sf(np.fabs(Z))
        pval.append((var_select, pval_select, pval_saturated, pval_nominal))
            
        if set(active).issubset(np.array(pval)[:,0]) and not completed_yet:
            completed_yet = True
            completion_index = i + 1

    return X, y, beta, active, sigma, np.array(pval), completion_index
Пример #12
0
def moran_KP(w, u, sig2i):
    """
    Calculates Moran-flavoured tests 

    Parameters
    ----------

    w           : W
                  PySAL weights instance aligned with y
    u           : array
                  nx1 array of naive residuals
    sig2i       : array
                  nx1 array of individual variance               
    """
    try:
        w = w.sparse
    except:
        pass
    moran_num = np.dot(u.T, (w * u))
    E = SP.lil_matrix(w.get_shape())
    E.setdiag(sig2i.flat)
    E = E.asformat('csr')
    WE = w * E
    moran_den = np.sqrt(np.sum((WE * WE + (w.T * E) * WE).diagonal()))
    moran = float(1.0 * moran_num / moran_den)
    moran = np.array([moran, norm.sf(abs(moran)) * 2.])
    return moran
Пример #13
0
def combine_p_values(the_p_values, method='z', default_quantile=7.):
    """Combines p-values from repeat measurements into a single
    p-value.

    the_p_values: a list of p-values.

    method: String. 'z'|'fisher'.  'z' for using the weighted z-score.
    'fisher' for using fisher's combined probability test.

    default_quantile: Float.  Only used for z method.  The quantile to
    use when the software's normal inverse cdf(p-value) is infinite
    """
    if len(the_p_values) == 1 or sum(the_p_values) == 0:
        combined_p_value = sum(the_p_values)
        
    elif method.lower() == 'z':
        #combine p-values using weighted z-score.  To not deal with inifinite
        #values replace 
        the_quantiles = []
        for the_p in the_p_values:
            the_quantile = norm.ppf(1.-the_p)
            if isinf(the_quantile):
                the_quantile = default_quantile
            the_quantiles.append(the_quantile)
        combined_p_value = norm.sf(sum(the_quantiles) / len(the_quantiles)**0.5)
    elif method.lower() == 'fisher':
        combined_p_value = 1-chi2.cdf(-2*sum(map(log,
                                                    the_p_values)),
                                         2*len(the_p_values))


    return combined_p_value
Пример #14
0
    def finalizeSampling(self):
        #from scikits.talkbox.tools.correlations import  acorr
        from pyhrf.stats import acorr
        if 0 and self.smplHistory is not None:
            logger.info('Compute autocorrelation of %s samples, shape=%s',
                        self.name, str(self.smplHistory.shape))
            trajectory = self.smplHistory[self.samplerEngine.nbSweeps::2]
            self.autocorrelation = acorr(trajectory)
            sn = trajectory.shape[0] ** .5
            t95 = 1.959963984540054 / sn
            self.autocorrelation_test = np.zeros(self.autocorrelation.shape,
                                                 dtype=np.int32)
            self.autocorrelation_test[np.where(self.autocorrelation > t95)] = 1
            self.autocorrelation_test[
                np.where(self.autocorrelation < -t95)] = -1

            self.autocorrelation_thresh = t95

            from scipy.stats import norm
            self.autocorrelation_pvalue = np.zeros(self.autocorrelation.shape,
                                                   dtype=np.float32)
            m_pos = np.where(self.autocorrelation > 0)
            if len(m_pos[0]) > 0:
                ac_pos = self.autocorrelation[m_pos]
                self.autocorrelation_pvalue[m_pos] = norm.sf(ac_pos * sn)

            m_neg = np.where(self.autocorrelation < 0)
            if len(m_neg[0]) > 0:
                ac_neg = self.autocorrelation[m_neg]
                self.autocorrelation_pvalue[m_neg] = norm.cdf(ac_neg * sn)

            logger.info('Compute posterior median for %s ', self.name)
            self.median = np.median(self.smplHistory, axis=0)

        self.check_final_value()
Пример #15
0
    def fdr(self, theta):
        """Given a threshold theta, find the estimated FDR

        Parameters
        ----------
        theta : float or array of shape (n_samples)
            values to test

        Returns
        -------
        afp : value of array of shape(n)
        """
        from scipy.stats import norm
        self.fdrcurve()
        if np.isscalar(theta):
            if theta > self.sorted_x[ - 1]:
                return 0
            maj = np.where(self.sorted_x >= theta)[0][0]
            efp = (self.p0 * norm.sf(theta, self.mu, self.sigma) * self.n\
                  / np.sum(self.x >= theta))
            efp = np.maximum(self.sorted_fdr[maj], efp)
        else:
            efp = []
            for th in theta:
                if th > self.sorted_x[ - 1]:
                    efp.append(0)
                    continue
                maj = self.sorted_fdr[np.where(self.sorted_x >= th)[0][0]]
                efp.append(np.maximum(maj, self.p0 * st.norm.sf(th, self.mu,
                           self.sigma) * self.n / np.sum(self.x >= th)))
            efp = np.array(efp)
            #
        efp = np.minimum(efp, 1)
        return efp
Пример #16
0
def s_to_p(s):
    """Convert significance to one-sided tail probability.

    Parameters
    ----------
    s : array_like
        Significance

    Returns
    -------
    p : ndarray
        One-sided tail probability

    See Also
    --------
    p_to_s, s_to_p_limit

    Examples
    --------
    >>> s_to_p(0)
    0.5
    >>> s_to_p(1)
    0.15865525393145707
    >>> s_to_p(3)
    0.0013498980316300933
    >>> s_to_p(5)
    2.8665157187919328e-07
    >>> s_to_p(10)
    7.6198530241604696e-24
    """
    from scipy.stats import norm
    return norm.sf(s)
Пример #17
0
 def pvalues(self):
     """
     (array) The p-values associated with the z-statistics of the
     coefficients. Note that the coefficients are assumed to have a Normal
     distribution.
     """
     return norm.sf(np.abs(self.zvalues)) * 2
Пример #18
0
def test_z_score():
    p = np.random.rand(10)
    assert_array_almost_equal(norm.sf(z_score(p)), p)
    # check the numerical precision
    for p in [1.e-250, 1 - 1.e-16]:
        assert_array_almost_equal(z_score(p), norm.isf(p))
    assert_array_almost_equal(z_score(np.float32(1.e-100)), norm.isf(1.e-300))
Пример #19
0
def rescore(df, tol_ms1=10, tol_ms2=20):
    df = df[(df['Precursor m/z Error (ppm)'] < tol_ms1) & (df['Precursor m/z Error (ppm)'] > -tol_ms1)]
    ppmArray = []
    for i, x in df.iterrows():
        t = eval(x['Theoretical Products'])
        m = eval(x['Nearest Matches'])
        p = [ (e[0] - e[1]) * 1e6 / e[1] for e in zip(m, t)]
        ppmArray.append(p)

    df['Nearest Matches (ppm)'] = ppmArray

    target_fits = _calc_parameters(df, decoy=False, tol_ms2=tol_ms2)
    decoy_fits  = _calc_parameters(df, decoy=True, tol_ms2=tol_ms2)
    
    # print target_fits
    # print decoy_fits

    target_frac_mean, target_frac_std = target_fits[0]
    target_ms1_mean, target_ms1_std = target_fits[1]
    target_ms2_mean, target_ms2_std = target_fits[2]

    decoy_frac_mean, decoy_frac_std = decoy_fits[0]

    ms2_L_limit = target_ms2_mean - target_ms2_std * 2
    ms2_R_limit = target_ms2_mean + target_ms2_std * 2

    sys.stderr.write("MS1_SD:%.3f  MS2_SD:%.3f\n" % (target_ms1_std, target_ms2_std) )

    csvout.writerow(df.columns)
    for i, x in df.iterrows():
        # sub score S1: p-value of precursor mass error
        ms1Score = 2 * norm.sf(abs(x['Precursor m/z Error (ppm)'] - target_ms1_mean) / target_ms1_std)
        
        # sub score S2: counts of matched fragment peaks and complementary pairs.
        matching = map(lambda p: ms2_L_limit < p < ms2_R_limit, x['Nearest Matches (ppm)'])
        matchingScore = matching.count(True)
        for b in xrange(len(matching)):
            if b % 2 == 0 and matching[b] and matching[b+1]:
                matchingScore += 1


        # sub score S3: probability ratio of ion intensity fraction
        frac = x['Fraction of Intensity Matching']
        fracScore = frac # use the value from original Morpheus result
        
        frac = numpy.log(frac)
        frac_pdf_decoy = norm.pdf((frac - decoy_frac_mean)/decoy_frac_std)
        frac_pdf_target = norm.pdf((frac - target_frac_mean)/target_frac_std)
        fracScore = (frac_pdf_target - frac_pdf_decoy) / (frac_pdf_target + frac_pdf_decoy)
        
        # print i, matchingScore, ms1Score, fracScore

        # final PSM score
        if ms1Score >= 0.0001:
            x['Morpheus Score'] = matchingScore + fracScore + ms1Score
        else:
            x['Morpheus Score'] = 0

        csvout.writerow(x.values)
Пример #20
0
def test_sequentially_constrained():
    S = -np.identity(10)[:3]
    b = -6 * np.ones(3)
    C = constraints(S, b)
    W = sample(C, 5000, temps=np.linspace(0, 200, 1001))
    U = np.linspace(0, 1, 101)
    D = sm.distributions.ECDF((ndist.cdf(W[0]) - ndist.cdf(6)) / ndist.sf(6))
    plt.plot(U, D(U))
Пример #21
0
 def slopes_z_stat(self):
     if 'slopes_z_stat' not in self._cache:
         zStat = self.slopes.reshape(len(self.slopes),)/self.slopes_std_err
         rs = {}
         for i in range(len(self.slopes)):
             rs[i] = (zStat[i],norm.sf(abs(zStat[i]))*2)
         self._cache['slopes_z_stat'] = rs.values()
     return self._cache['slopes_z_stat']    
Пример #22
0
def _dpln_pdf(x, alpha, beta, nu, tau2):
    A1 = np.exp(alpha * nu + alpha**2 * tau2/2)
    A2 = np.exp(-beta*nu + beta**2 * tau2/2)
    term1 = A1 * x**(-alpha-1) * \
        norm.cdf((np.log(x)-nu-alpha * tau2)/np.sqrt(tau2))
    term2 = A2*x**(beta-1) * \
        norm.sf((np.log(x)-nu+beta*tau2)/np.sqrt(tau2))
    return alpha*beta/(alpha+beta)*(term2+term1)
def z_score_combine(pvals, sigma):
    L = len(pvals)
    pvals = np.array(pvals, dtype=np.float64)
    pvals[pvals == 1] = 1.0 - 9e-16
    z = np.mean(norm.isf(pvals, loc=0, scale=1))
    sz = 1.0 /L * np.sqrt(L + 2 * np.tril(sigma, k=-1).sum())
    res = {'p': norm.sf(z/sz), 'OK': True}
    return res
Пример #24
0
def zscore_cluster(formula, methylations, covs, coef, robust=False):
    r = _combine_cluster(formula, methylations, covs, coef)
    z, L = np.mean(norm.isf(r["p"])), len(r["p"])
    sz = 1.0 / L * np.sqrt(L + 2 * np.tril(r["corr"], k=-1).sum())
    r["p"] = norm.sf(z / sz)
    r["t"], r["coef"] = r["t"].mean(), r["coef"].mean()
    r.pop("corr")
    return r
Пример #25
0
 def inteFun(p):
     '''Function for integration'''
     #print('inteFun: %.4f'%(p))
     #print(ZbOfProp(p))
     #print('density: %.4f'%(norm.pdf(p, loc=aprop, scale=sd)))
     power = 1 - norm.sf(ZbOfProp(p))
     #print('power: %.4f'%(power))
     return  power * norm.pdf(p, loc=aprop, scale=sd)
Пример #26
0
def p_label(bhattacharya,dist_truth,dist_measuring):
    bcc = "BCC: {:.3g}".format(1-bhattacharya)
    mu,sigma = np.mean(dist_truth),np.std(dist_truth)
    z_scores = (dist_measuring-mu)/sigma
    p_values = norm.sf(abs(z_scores))*2 #twosided
    p_value = np.mean(p_values)
    p_value_label = r"$<p_{\mathrm{i.i.d.}}>$"
    to_ret = "{:s}\n".format(bcc) + p_value_label + ":{:.2g}".format(p_value)
    return to_ret
Пример #27
0
 def z_stat(self):
     if 'z_stat' not in self._cache:
         variance = self.vm.diagonal()
         zStat = self.betas.reshape(len(self.betas),) / np.sqrt(variance)
         rs = {}
         for i in range(len(self.betas)):
             rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2)
         self._cache['z_stat'] = rs.values()
     return self._cache['z_stat']
Пример #28
0
def get_zI(I, ei, vi):
    """
    Standardized I

    Returns two-sided p-values as provided in the GeoDa family
    """
    z = abs((I - ei) / np.sqrt(vi))
    pval = norm.sf(z) * 2.
    return (z, pval)
 def testUnifSpaceParamsWithShiftAndStretch(self):
     pts, weights = gt.unif_spaced_param(15, -7.0, 7.0, 1, 2, False)
     for pt, weight in zip(pts, weights):
         if np.isclose(pt,-7.0):
             self.assertTrue(np.isclose(weight, norm.cdf(-6.5, loc=1, scale=2)))
         elif np.isclose(pt,7.0):
             self.assertTrue(np.isclose(weight, norm.sf(6.5, loc=1, scale=2)))
         else:
             self.assertTrue(np.isclose(weight, norm.cdf(pt+0.5, loc=1, scale=2) -norm.cdf(pt-0.5, loc=1, scale=2)))
Пример #30
0
def ci_test_gauss(data_matrix, x, y, s, **kwargs):

    assert 'corr_matrix' in kwargs
    cm = kwargs['corr_matrix']
    n = data_matrix.shape[0]

    z = zstat(x, y, list(s), cm, n)
    p_val = 2.0 * norm.sf(np.absolute(z))
    return p_val
Пример #31
0
def test_bimodality(x, bins=30, kde=True, plot=False):
    """Test for bimodal distribution."""
    from scipy.stats import gaussian_kde, norm

    lb, ub = np.min(x), np.percentile(x, 99.9)
    grid = np.linspace(lb, ub if ub <= lb else np.max(x), bins)
    kde_grid = (
        gaussian_kde(x)(grid) if kde else np.histogram(x, bins=grid, density=True)[0]
    )

    idx = int(bins / 2) - 2
    idx += np.argmin(kde_grid[idx : idx + 4])

    peak_0 = kde_grid[:idx].argmax()
    peak_1 = kde_grid[idx:].argmax()
    kde_peak = kde_grid[idx:][
        peak_1
    ]  # min(kde_grid[:idx][peak_0], kde_grid[idx:][peak_1])
    kde_mid = kde_grid[idx:].mean()  # kde_grid[idx]

    t_stat = (kde_peak - kde_mid) / np.clip(np.std(kde_grid) / np.sqrt(bins), 1, None)
    p_val = norm.sf(t_stat)

    grid_0 = grid[:idx]
    grid_1 = grid[idx:]
    means = [
        (grid_0[peak_0] + grid_0[min(peak_0 + 1, len(grid_0) - 1)]) / 2,
        (grid_1[peak_1] + grid_1[min(peak_1 + 1, len(grid_1) - 1)]) / 2,
    ]

    if plot:
        color = "grey"
        if kde:
            pl.plot(grid, kde_grid, color=color)
            pl.fill_between(grid, 0, kde_grid, alpha=0.4, color=color)
        else:
            pl.hist(x, bins=grid, alpha=0.4, density=True, color=color)
        pl.axvline(means[0], color=color)
        pl.axvline(means[1], color=color)
        pl.axhline(kde_mid, alpha=0.2, linestyle="--", color=color)
        pl.show()

    return t_stat, p_val, means  # ~ t_test (reject unimodality if t_stat > 3)
Пример #32
0
def spiegelhalter(y_true, y_score):
    import numpy as np
    from scipy.stats import norm

    try:
        if type(y_true) is not np.ndarray:
            y_true = y_true.values.ravel()
        top = np.sum((y_true - y_score) * (1 - 2 * y_score))
        bot = np.sum((1 - 2 * y_score)**2 * y_score * (1 - y_score))
        sh = top / np.sqrt(bot)

        # https://en.wikipedia.org/wiki/Z-test
        # Two-tailed test
        # Re: p-value, higher the better Goodness-of-Fit
        p_value = norm.sf(np.abs(sh)) * 2

        return p_value
    except:
        return 0
Пример #33
0
def age_stratification(sextable, sex_assign):
    by_age = filter(lambda z: z[8] is not None,
                    sorted(sextable, key=operator.itemgetter(8)))
    mid_age = int(by_age[1 + len(by_age) / 2][8])
    young_m = 0
    young_f = 0
    old_m = 0
    old_f = 0
    for i, _a in enumerate(by_age):
        age = _a[8]
        if age is None:
            continue
        if age < mid_age:
            if sex_assign[i] == 0:
                young_m += 1
            elif sex_assign[i] == 1:
                young_f += 1
        else:
            if sex_assign[i] == 0:
                old_m += 1
            elif sex_assign[i] == 1:
                old_f += 1

    # two proportion z-test, normal approx., one sided
    n1 = young_m + young_f
    n2 = old_m + old_f
    p1 = float(young_m) / n1
    p2 = float(old_m) / n2
    phat = (p1 * n2 + p2 * n1) / (n1 + n2)
    z = (p2 - p1) / math.sqrt(phat * (1.0 - phat) * (1.0 / n1 + 1.0 / n2))
    if z < 0:
        pval = 1.0
    else:
        pval = norm.sf(z)
    #print(n1,n2,p1,p2,phat,z,pval)

    print(
        "{} ({} m, {} f) are younger than {} yBP\n{} ({} m, {} f) are the same age or older than {} yBP"
        .format(young_m + young_f, young_m, young_f, mid_age, old_m + old_f,
                old_m, old_f, mid_age))
    print(
        "Do older samples have a greater male bias? z={:.2f}, p={:.2f}".format(
            z, pval))
def scatter_plot(args, ps_tc_results, mpbs_name_list, conditions):
    tf_activity_score1 = np.zeros(len(mpbs_name_list))
    tf_activity_score2 = np.zeros(len(mpbs_name_list))

    for i, mpbs_name in enumerate(mpbs_name_list):
        tf_activity_score1[i] = float(ps_tc_results[i][0][0]) + float(
            ps_tc_results[i][1][0])
        tf_activity_score2[i] = float(ps_tc_results[i][0][1]) + float(
            ps_tc_results[i][1][1])

    tf_activity_score = np.subtract(tf_activity_score2, tf_activity_score1)
    z_score = zscore(tf_activity_score)
    p_values = norm.sf(abs(z_score)) * 2

    # add TF activity score, z score and p values to the result dictionary
    for i, mpbs_name in enumerate(mpbs_name_list):
        ps_tc_results[i].append(
            [tf_activity_score[i], z_score[i], p_values[i]])

    # plot TF activity score
    x_axis = np.random.uniform(low=-0.1, high=0.1, size=len(p_values))

    fig, ax = plt.subplots(figsize=(10, 12))
    for i, mpbs_name in enumerate(mpbs_name_list):
        if p_values[i] < args.fdr:
            ax.scatter(x_axis[i], tf_activity_score[i], c="red")
            ax.annotate(mpbs_name, (x_axis[i], tf_activity_score[i]),
                        alpha=0.6)
        else:
            ax.scatter(x_axis[i], tf_activity_score[i], c="black", alpha=0.6)
    ax.margins(0.05)
    ax.set_xticks([])

    ax.set_ylabel("Activity Score \n {} $\longleftrightarrow$ {}".format(
        conditions[0], conditions[1]),
                  rotation=90,
                  fontsize=20)

    figure_name = os.path.join(args.output_location,
                               "{}_statistics.pdf".format(args.output_prefix))
    fig.savefig(figure_name, format="pdf", dpi=300)

    return ps_tc_results
    def generate_intervals(self):

        X2, Y2 = self.X2[:, self.active_set], self.Y2
        if len(self.active_set) > 0 and len(self.active_set) < X2.shape[0]:
            s = len(self.active_set)
            X2i = np.linalg.inv(X2.T.dot(X2))
            beta2 = X2i.dot(X2.T.dot(Y2))
            resid2 = Y2 - X2.dot(beta2)
            n2 = X2.shape[0]
            sigma2 = np.sqrt((resid2**2).sum() / (n2 - s))
            alpha = 1 - self.confidence
            Z_quant = ndist.ppf(1 - alpha / 2)
            upper = beta2 + Z_quant * np.sqrt(sigma2**2 * np.diag(X2i))
            lower = beta2 - Z_quant * np.sqrt(sigma2**2 * np.diag(X2i))
            Zval = np.fabs(beta2) / np.sqrt(sigma2**2 * np.diag(X2i))
            pval = 2 * ndist.sf(Zval)
            return self.active_set, lower, upper, pval
        else:
            return [], [], [], []
Пример #36
0
def calc_delong(preds1, preds2, stat, auc1=None, auc2=None):
    """Calculates the one-sided version of DeLong's test statistic.

    Args:
        preds1, preds2 (np.array)
            Vectors of continuous predicted labels. This function tests to
            what extent we can reject the hypothesis that `preds1` does not
            better predict the ground truth labels than `preds2`.

        stat (np.array): The ground truth binary class labels.
        auc1, auc2 (float, optional)
            Pre-computed AUCs can be given if possible which will save time.

    Returns:
        delong_val (float)

    """
    strc1 = np.greater.outer(preds1[stat], preds1[~stat]).astype(float)
    strc1 += 0.5 * np.equal.outer(preds1[stat], preds1[~stat]).astype(float)
    strc2 = np.greater.outer(preds2[stat], preds2[~stat]).astype(float)
    strc2 += 0.5 * np.equal.outer(preds2[stat], preds2[~stat]).astype(float)

    if auc1 is None:
        auc1 = strc1.mean()
    if auc2 is None:
        auc2 = strc2.mean()

    mut_n, wt_n = strc1.shape
    vvecs1 = strc1.mean(axis=1), strc1.mean(axis=0)
    vvecs2 = strc2.mean(axis=1), strc2.mean(axis=0)

    smat1 = [[((vv_i[0] - auc_i) * (vv_j[0] - auc_j)).sum() / (mut_n - 1)
              for vv_j, auc_j in zip([vvecs1, vvecs2], [auc1, auc2])]
             for vv_i, auc_i in zip([vvecs1, vvecs2], [auc1, auc2])]
    smat2 = [[((vv_i[1] - auc_i) * (vv_j[1] - auc_j)).sum() / (wt_n - 1)
              for vv_j, auc_j in zip([vvecs1, vvecs2], [auc1, auc2])]
             for vv_i, auc_i in zip([vvecs1, vvecs2], [auc1, auc2])]

    smat = np.array(smat1) / strc1.shape[0] + np.array(smat2) / strc1.shape[1]
    z_scr = (auc1 - auc2) / np.sqrt(smat[0, 0] + smat[1, 1] - 2 * smat[1, 0])

    return norm.sf(z_scr)
Пример #37
0
def dailyfc_visual(files):

    for onefile in files:
        lfpdata, chnAreas, fs = lfp_extract([onefile])

        if lfpdata.shape[2] < 80:
            continue

        print(onefile)
        ciCOHs = calc_ciCOHs_rest(lfpdata)

        # permutation test: use the lfp data whose ciCOHs are the largest to get  distribution
        [i, j] = np.unravel_index(np.argmax(ciCOHs), shape=ciCOHs.shape)
        lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :]
        _, mu, std = pval_permciCOH_rest(lfp1,
                                         lfp2,
                                         ciCOHs[i, j],
                                         shuffleN=1000)
        pvals = norm.sf(abs(ciCOHs), loc=mu, scale=std) * 2

        # multiple comparison correction, get weights
        reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
        [rows, cols] = np.where(reject == True)
        weight = np.zeros(ciCOHs.shape)
        if len(rows) > 0:
            weight[rows, cols] = ciCOHs[rows, cols]

        # visual and save
        filename = os.path.basename(onefile)
        datestr = re.search('[0-9]{8}', filename).group()
        cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9]

        save_prefix = 'all'
        saveFCGraph = os.path.join(
            savefolder, cond + '_' + save_prefix + '_' + datestr + '.png')
        weight_visual_save(weight,
                           chnInf=assign_coord2chnArea(
                               area_coord_file=area_coord_file,
                               chnAreas=chnAreas),
                           savefile=saveFCGraph,
                           texts=None,
                           threds_edge=None)
Пример #38
0
def fdr( zscores, q=.1, cV=1, invert_zscores=False, mask=None ):
    """
    Adapted from https://brainder.org/2011/09/05/fdr-corrected-fdr-adjusted-p-values/
    using a default value of cV
    """
    
    if mask is None:
        mask = np.ones(zscores.shape, dtype=bool)

    inv = -1 if invert_zscores else 1
    zscores = inv * zscores
    
    mask *= (zscores != 0)
    zscores = zscores[mask]
    pvals = norm.sf(zscores)
    
    oidx = np.argsort( pvals )
    pvals = pvals[oidx]
    
    V = pvals.size
    idx = np.arange(1, V+1)
    thrline = idx * q / ( V * cV )
    
    select = pvals <= thrline
    if len(pvals[select]):
        thr = np.max(pvals[select])
        zthr = zscores[oidx][select][-1] * inv
    else:
        thr = None
        zthr = None
        
    pcor = pvals * V * cV / idx
    oidx_r = np.argsort(oidx)
        
    padj = np.zeros(len(pvals))
    prev = 1
    
    for i in idx[::-1]:
        padj[i-1] = np.min( [prev, pvals[i-1] * V * cV / i] )
        prev = padj[i-1]
        
    return thr, zthr, pvals, thrline, pcor, padj
Пример #39
0
    def __soft_shuffle(self, aln, shuffle):
        """
    Soft shuffles a given alignment aln and calculates the z-score based p-value
    for the energy of the consensus sequence. The soft shuffle applies at least 0.1*len(aln) and
    at most 0.4*len(aln) changes to the alignment.

    Keyword arguments:
    aln -- query alignment that has to be shuffled
    mfe -- the mfe of the corresponding consensus secondary structure
    covar -- covariance score of the corresponding consensus secondary structure
    shuffle -- determines how many different sequences are generated
    """
        aln = [str(x.seq) for x in aln]
        mfe, covar, structure = self.__rna_alifold(aln)
        aln = list(map(list, aln))

        wi = len(aln[0])
        min_shuffle = int(wi * 0.1)

        mfes = [mfe - covar]

        for i in range(0, shuffle):
            # print("z: {}".format(i))
            k = random.randint(min_shuffle, min_shuffle + int(
                (wi - min_shuffle) * 0.45))
            ary = np.array(aln).T

            for j in range(0, k):
                p = random.sample(range(wi), 2)
                tmp = np.copy(ary[p[0]])
                ary[p[0]] = ary[p[1]]
                ary[p[1]] = tmp

            new_ary = list(map("".join, ary.T))
            mfe, covar, structure = self.__rna_alifold(new_ary)
            mfes.append(mfe - covar)

        a = np.array(mfes)
        z = zscore(a)[0]
        p_values = norm.sf(abs(z)) * 2

        return z, p_values
Пример #40
0
    def score(self, X, nbhds, nn_matrix=None):
        k = len(nbhds[0])

        super().score(X, nbhds)  # handle multi-test factor determination
        # Wilcoxon rank sum testa
        # overall_exprs = X.todense().transpose().tolist()

        n_genes = X.shape[1]

        if nn_matrix is None:
            nn_matrix = to_sparse_adjacency(nbhds, n_cells=X.shape[0])

        wts = rankdata(X.todense(), axis=0)  # gene rankings
        wts = nn_matrix @ wts  # nbhd_ranksums; only want to store one big matrix

        n1 = k
        n2 = X.shape[0] - k
        sd = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0)
        meanrank = n1 * n2 / 2.0

        # #sign is pos if mean rank is higher than average, negative otherwise.
        # signs = 2*(wts >= meanrank).astype('int') - 1

        wts = wts - ((n1 * (n1 + 1)) / 2.0)  # calc U for x, u1

        is_neg = (wts < meanrank)  # remember where it was negative

        wts = np.maximum(wts, n1 * n2 - wts)  # bigu

        wts = ((wts - meanrank) / sd)  # z values

        wts = 2 * norm.sf(np.abs(wts))  # p values

        if self.corrector is not None:
            wts = self.corrector.correct(wts)

        wts = -1 * np.log(wts)  # convert to info scores

        # sign them
        wts[is_neg] *= -1

        return (csr_matrix(wts))
def test_multi_cluster_stats():
    shape = (9, 10, 11)
    data = np.random.randn(*shape)
    threshold = norm.sf(data.max() + 1)
    data[2:4, 5:7, 6:8] = np.maximum(10, data.max() + 2)
    data[6:7, 8:9, 9:10] = np.maximum(11, data.max() + 1)
    stat_img = nib.Nifti1Image(data, np.eye(4))
    mask_img = nib.Nifti1Image(np.ones(shape), np.eye(4))

    # test 1
    clusters, _ = cluster_stats(stat_img,
                                mask_img,
                                threshold,
                                height_control='fpr',
                                cluster_th=0)
    assert_true(len(clusters) == 2)
    cluster = clusters[1]
    assert_true(cluster['size'] == 1)
    assert_array_almost_equal(cluster['z_score'], 11)
    assert_array_almost_equal(cluster['maxima'], np.array([[6, 8, 9]]))
Пример #42
0
def percentile_from_sigma(sigma, lower):
    """
    Converts a limit in standard deviation into the corresponding
    percentile. This function assumes a two sided interval,
    e.g., sigma == 2 and lower == False will return 0.977.

    Arguments:
        sigma {float} -- Number of standard deviations
        lower {bool} -- If lower == True returns the lower percentile,
        if False the upper percentile will be returned

    Returns:
        float -- The percentile as a value between 0 and 1
    """
    percentile = -1
    if lower:
        percentile = norm.sf(sigma)
    else:
        percentile = norm.cdf(sigma)
    return percentile
Пример #43
0
def gen_correlated(sigma, n, observed=None):
    """
    generate autocorrelated data according to the matrix
    sigma. if X is None, then data will be sampled from
    the uniform distibution. Otherwise, it will be sampled
    from X. Where X is then *all* observed
    p-values.
    """
    C = np.matrix(chol(sigma))
    if observed is None:
        X = np.random.uniform(0, 1, size=(n, sigma.shape[0]))
    else:
        assert n * sigma.shape[0] < observed.shape[0]
        idxs = np.random.random_integers(0, len(observed) - 1,
                                         size=sigma.shape[0] * n)
        X = observed[idxs].reshape((n, sigma.shape[0]))

    Q = np.matrix(qnorm(X))
    for row in  np.array(1 - norm.sf((Q * C).T)).T:
        yield row
Пример #44
0
def pval_perm_dynciCOH_SKT(dynciCOH, lfptrials):
    """
        pvalues using permutation test for dynamic ciCOHs

        Arg:
            dynciCOH: dynamic ciCOHs [nchns * nchns * ntemp]

            lfptrials: the lfp trial data calculatingthe dynciCOH

        Return:
            pvals: p-value for each value in dynciCOH, shape = dynciCOH.shape
    """

    #
    [i, j, _] = np.unravel_index(np.argmax(dynciCOH), shape=dynciCOH.shape)
    lfp1, lfp2 = lfptrials[i, :, :], lfptrials[j, :, :]
    mu, std = permdist_dynciCOH_SKT(lfp1, lfp2, shuffleN=100)
    pvals = norm.sf(abs(dynciCOH), loc=mu, scale=std) * 2

    return pvals
Пример #45
0
def plot_1b(eb_n0_dB: array):
    """
    Plots capacity vs Eb/N0 for 1 bit hard quatization
    """
    eb_n0 = 10**(eb_n0_dB / 10)

    p = norm.sf(sqrt(2 * eb_n0))
    h2 = -p * log2(p) - (1 - p) * log2(1 - p)
    c = 1 - h2

    ax = plt.subplot(111)

    ax.plot(eb_n0_dB, c, label="1 bit")
    plt.ticklabel_format(style='plain', axis='x', scilimits=(0, 0))
    ax.set_xlim(min(eb_n0_dB), max(eb_n0_dB))
    #    ax.set_ylim(0,1.05)
    ax.set_xlabel("Eb/N0 [dB]")
    ax.set_ylabel("Channel capacity [bits per channel use]")
    ax.grid()
    return ax
Пример #46
0
def bh_graph(n=5000, alpha=0.1):
	rhos = np.arange(0, 1.05, 0.05)
	means = np.zeros(len(rhos))
	stds = np.zeros(len(rhos))

	for j, rho in enumerate(rhos):
		print(rho)
		q = np.zeros(n)
		for i in range(n):
			x = generate_data(rho=rho)
			p_vals = norm.sf(x)
			rejected = fdr_bh(p_vals, alpha)
			q[i] = len(rejected[rejected < m0]) / max(len(rejected), 1)
		means[j] = np.mean(q)
		stds[j] = np.std(q)

	plt.errorbar(rhos, means, stds, linestyle='None', marker='^')
	plt.xticks(rhos)
	plt.title("mean and standard deviation as a function of rho")
	plt.show()
Пример #47
0
def test_BH_procedure():

    def BH_cutoff():
        Z = np.random.standard_normal(100)

        BH = stepup.BH(Z,
                       np.identity(100),
                       1.)

        cutoff = BH.stepup_Z / np.sqrt(2)
        return cutoff
    
    BH_cutoffs = BH_cutoff()

    for _ in range(50):
        Z = np.random.standard_normal(100)
        Z[:20] += 3

        np.testing.assert_allclose(sorted(BHfilter(2 * ndist.sf(np.fabs(Z)), q=0.2)),
                                   sorted(stepup_selection(Z, BH_cutoffs)[1]))
Пример #48
0
def cpt_ppm_a_norm(mean, variance, alpha=0.):
    """ Compute a Posterior Probability Map (fixed alpha) by assuming a Gaussian
    distribution.

    Parameters
    ----------
    mean : array_like
        mean value(s) of the Gaussian distribution(s)
    variance : array_like
        variance(s) of the Gaussian distribution(s)
    alpha : array_like, optional
        quantile value(s) (default=0)

    Returns
    -------
    ppm : array_like
        Posterior Probability Map evaluated at alpha
    """

    return norm.sf(alpha, mean, variance**.5)
Пример #49
0
def test_independent_estimator(n=100, n1=50, q=0.2, signal=3, p=100):

    Z = np.random.standard_normal((n, p))
    Z[:, :10] += signal / np.sqrt(n)
    Z1 = Z[:n1]

    Zbar = np.mean(Z, 0)
    Zbar1 = np.mean(Z1, 0)
    perturb = Zbar1 - Zbar

    frac = n1 * 1. / n
    BH_select = stepup.BH(Zbar,
                          np.identity(p) / n,
                          np.sqrt((1 - frac) / (n * frac)),
                          q=q)
    selected = BH_select.fit(perturb=perturb)

    observed_target = Zbar[selected]
    cov_target = np.identity(selected.sum()) / n
    cross_cov = -np.identity(p)[selected] / n

    (observed_target1, cov_target1, cross_cov1,
     _) = BH_select.marginal_targets(selected)

    assert (np.linalg.norm(observed_target - observed_target1) /
            np.linalg.norm(observed_target) < 1.e-7)
    assert (np.linalg.norm(cov_target - cov_target1) /
            np.linalg.norm(cov_target) < 1.e-7)
    assert (np.linalg.norm(cross_cov - cross_cov1) / np.linalg.norm(cross_cov)
            < 1.e-7)

    result = BH_select.selective_MLE(observed_target, cov_target, cross_cov)[0]
    Z = result['Zvalue']
    ind_unbiased_estimator = result['unbiased']
    Zbar2 = Z[n1:].mean(0)[selected]

    assert (np.linalg.norm(ind_unbiased_estimator - Zbar2) /
            np.linalg.norm(Zbar2) < 1.e-6)
    np.testing.assert_allclose(
        sorted(np.nonzero(selected)[0]),
        sorted(BHfilter(2 * ndist.sf(np.fabs(np.sqrt(n1) * Zbar1)))))
Пример #50
0
def computePvalueProportion(att_name,
                            att_value,
                            current_file,
                            top_K,
                            round_default=2):
    """
    Compute p-value using Proportion oracle, i.e., z-test method of 4.1.3 in "A survey on measuring indirect discrimination in machine learning".

    Attributes:
        att_name: sensitive attribute name
        att_value: value of protected group of above attribute
        current_file: file name that stored the data (with out ".csv" suffix)
        top_K: threshold to decide the positive outcome. Ranked inside top_K is positive outcome. Otherwise is negative outcome.
        round_default: threshold of round function for the returned p-value
    Return:  rounded p-value
    """
    # using z-test method of 4.1.3 in "A survey on measuring indirect discrimination in machine learning"
    # for binary attribute only
    data = pd.read_csv(current_file + "_weightsum.csv")
    total_N = len(data)
    top_data = data[0:top_K]
    # for attribute value, compute the current pairs and estimated fair pairs
    position_lists_val = data[data[att_name] == att_value].index + 1
    size_vi = len(position_lists_val)
    size_other = total_N - size_vi

    size_vi_top = len(top_data[top_data[att_name] == att_value].index + 1)
    size_other_top = top_K - size_vi_top

    p_vi_top = size_vi_top / size_vi
    p_other_top = size_other_top / size_other

    p_vi_rest = 1 - p_vi_top
    p_other_rest = 1 - p_other_top

    pooledSE = sqrt((p_vi_top * p_vi_rest / size_vi) +
                    (p_other_top * p_other_rest / size_other))
    z_test = (p_other_top - p_vi_top) / pooledSE
    p_value = norm.sf(z_test)

    return round(p_value, round_default)
Пример #51
0
    def fitted_endog(self):
        """
        E(y|x, cond)
            cond for left-truncated: y > left-truncated value
            cond for left-truncated: y < right-truncated value
            cond for left- & right-truncated: left-truncated value < y < right-truncated value
        Non-linear fitted endog variables (conditional expectations)
        """
        s = self.params[-1]
        sigma = np.exp(s)
        Xb = self.fittedvalues
        _l = self.model.left
        _r = self.model.right
        check_left = self.model.cens.unique().min()
        check_right = self.model.cens.unique().max()

        if (check_left == -1) & (check_right == 0):
            first_term = (Xb - _l) * norm.cdf(Xb, loc=_l, scale=sigma)
            second_term = sigma * norm.pdf(Xb, loc=_l, scale=sigma)
            return _l + first_term + second_term

        elif (check_left == 0) & (check_right == 1):
            first_term = (Xb - _r) * norm.sf(Xb, loc=_r, scale=sigma)
            second_term = sigma * norm.pdf(Xb, loc=_r, scale=sigma)
            return _r + first_term - second_term

        elif (check_left == -1) & (check_right == 1):
            first_term = (Xb - _l) * norm.cdf(Xb, loc=_l, scale=sigma)
            second_term = (Xb - _r) * norm.cdf(Xb, loc=_r, scale=sigma)
            third_term = sigma * (norm.pdf(Xb, loc=_l, scale=sigma) -
                                  norm.pdf(Xb, loc=_r, scale=sigma))
            return _l + first_term - second_term + third_term

        else:
            warnings.warn(
                '\n\n**********************************************************************\n\n'
                +
                'Equivalent to fitted_endog of uncensored Maximum Likelihood Estimation\n\n'
                +
                '**********************************************************************\n'
            )
Пример #52
0
def pval_permciCOH_rest(lfp1, lfp2, actciCOH, shuffleN=1000):
    """
        

        Arg:
            lfp1, lfp2:  ntemp * nsegs(ntrials)

            shuffleN: the total shuffle times

            actciCOH: an actual ciCOH value (positive or negative)

        Return:
            pval: the p-value base on permutation test 
            mu, std: the mu and std of the fitted normal distribution
    """

    permlfp1, permlfp2 = lfp1.copy(), lfp2.copy()
    permciCOHs = np.zeros(shape=(shuffleN, ))
    for i in range(shuffleN):

        # shuffle permlfp2
        permlfp2 = np.transpose(permlfp2, axes=(1, 0))
        np.random.shuffle(permlfp2)
        permlfp2 = np.transpose(permlfp2, axes=(1, 0))

        permlfp = np.concatenate((np.expand_dims(
            permlfp1, axis=0), np.expand_dims(permlfp2, axis=0)),
                                 axis=0)

        ciCOHM = calc_ciCOHs_rest(permlfp)

        permciCOHs[i] = ciCOHM[0, 1]

        del ciCOHM, permlfp

    # Fit a normal distribution to the data:
    mu, std = norm.fit(permciCOHs)

    pval = norm.sf(abs(actciCOH), loc=mu, scale=std)

    return pval, mu, std
Пример #53
0
	def __estimate_m(self, workload_pred, workload_std=0.0):
		for m in range(1, self.mst_model.m_max+1):
			mst_pred = self.mst_model.predict(m)
			delta = mst_pred - workload_pred

			variance = 0.0
			if self.conf['mst_uncertainty_aware']:
				variance += self.mst_model.std**2
			if self.conf['forecast_uncertainty_aware']:
				variance += workload_std**2

			if 0 < variance:
				std = np.sqrt(variance)
				prob = norm.sf(x=0, loc=delta, scale=std)   # survival function: 1-cdf
				if self.conf['rho'] <= prob:
					break
			elif 0 <= delta:
				# if uncertainty is not considered, 0 <= delta
				break;
                        
		return m
Пример #54
0
def sample_path_u(prob, steps):
    t = norm.isf(prob / 2, loc=0, scale=np.sqrt(steps))
    path = [0]
    prob_path = [prob]

    for k in range(steps):
        if path[-1] < t:
            path.append(path[-1] + float(norm.rvs(loc=0, scale=1, size=1)))
            if k < steps - 1:
                prob_path.append(
                    float(
                        np.minimum(
                            2 * norm.sf(
                                t, loc=path[-1], scale=np.sqrt(steps - k - 1)),
                            1)))
            else:
                prob_path.append(0 if path[-1] < t else 1)
        else:
            prob_path.append(1)

    return prob_path
    def compute_z(self, gene2zscore, fdr=0.05):
        res_z = []
        res_p = []

        for k, pca_genes, pca_weight, sig_1k in self.pca_models:
            z = compute_gwas_z(pca_genes, pca_weight, sig_1k, gene2zscore,
                               self.gene2var)
            res_z.append(z)
            res_p.append(norm.sf(abs(z)) * 2)

        res_z = np.array(res_z)
        res_p = np.array(res_p)
        _, res_adjp, _, _ = multi.multipletests(res_p)

        self.res_z = res_z
        self.res_p = res_p
        self.res_adjp = res_adjp
        self.adj_asso_components = np.where(res_adjp < fdr)[0]
        self.is_computed = True

        return res_z, res_p, res_adjp
Пример #56
0
def HSIC_U_statistic_test(x, y, blocksize=50, nblocks=10):
    Btest = np.zeros(nblocks)
    n = len(x)
    for i in range(nblocks):
        indx1 = i * blocksize
        indx2 = indx1 + blocksize
        kx = kernelGausiano(x[indx1:indx2])
        ky = kernelGausiano(y[indx1:indx2])
        Btest[i] = HSIC_U_statistic(kx, ky)
    Btest_Statistic = sum(Btest) / float(nblocks)
    kx = kernelGausiano(x)
    ky = kernelGausiano(y)
    Btest_nullVar = blocksize**2 * np.var(null_samplesHsic(kx, ky, nblocks))
    z_score = np.sqrt(n * nblocks) * Btest_Statistic / np.sqrt(Btest_nullVar)
    print("perm-pv", normaldist.sf(z_score))

    ft = HSIC_U_statistic(kx, ky)
    st = HSIC_U_statistic(kx, kx) * HSIC_U_statistic(ky, ky)
    r = ft / (np.sqrt(st))
    #test normaldist.sf(ustatistic) < alpha?
    return r
Пример #57
0
    def fitted_endog(self):
        """
        E(y|x, cond)
            cond for left-truncated: y > left-truncated value
            cond for left-truncated: y < right-truncated value
            cond for left- & right-truncated: left-truncated value < y < right-truncated value
        Non-linear fitted endog variables (conditional expectations)
        But, this attribute may be that useful
        """
        s = self.params[-1]
        sigma = np.exp(s)
        Xb = self.fittedvalues
        _l = self.model.left
        _r = self.model.right

        if ~np.isneginf(_l) & np.isposinf(_r):
            first_term = Xb * norm.cdf(Xb, loc=_l, scale=sigma)
            second_term = sigma * norm.pdf(Xb, loc=_l, scale=sigma)
            return first_term + second_term

        elif np.isneginf(_l) & ~np.isposinf(_r):
            first_term = Xb * norm.sf(Xb, loc=_r, scale=sigma)
            second_term = sigma * norm.pdf(Xb, loc=_r, scale=sigma)
            return first_term - second_term

        elif ~np.isneginf(_l) & ~np.isposinf(_r):
            first_term = Xb * norm.cdf(Xb, loc=_l, scale=sigma)
            second_term = Xb * norm.cdf(Xb, loc=_r, scale=sigma)
            third_term = sigma * (norm.pdf(Xb, loc=_l, scale=sigma) -
                                  norm.pdf(Xb, loc=_r, scale=sigma))
            return first_term - second_term + third_term

        else:
            warnings.warn(
                '\n\n**********************************************************************\n\n'
                +
                'Equivalent to untruncated Maximum Likelihood Estimation\n\n' +
                '**********************************************************************\n'
            )
Пример #58
0
def test_bimodality(x, bins=30, kde=True, plot=False):
    from scipy.stats import gaussian_kde, norm

    grid = np.linspace(np.min(x), np.percentile(x, 99), bins)
    kde_grid = gaussian_kde(x)(grid) if kde else np.histogram(
        x, bins=grid, density=True)[0]

    idx = int(bins / 2) - 2
    idx += np.argmin(kde_grid[idx:idx + 4])

    peak_0 = kde_grid[:idx].argmax()
    peak_1 = kde_grid[idx:].argmax()
    kde_peak = kde_grid[idx:][
        peak_1]  # min(kde_grid[:idx][peak_0], kde_grid[idx:][peak_1])
    kde_mid = kde_grid[idx:].mean()  # kde_grid[idx]

    t_stat = (kde_peak - kde_mid) / (np.std(kde_grid) / np.sqrt(bins))
    p_val = norm.sf(t_stat)

    grid_0 = grid[:idx]
    grid_1 = grid[idx:]
    means = [(grid_0[peak_0] + grid_0[min(peak_0 + 1,
                                          len(grid_0) - 1)]) / 2,
             (grid_1[peak_1] + grid_1[min(peak_1 + 1,
                                          len(grid_1) - 1)]) / 2]

    if plot:
        color = 'grey'
        if kde:
            pl.plot(grid, kde_grid, color=color)
            pl.fill_between(grid, 0, kde_grid, alpha=.4, color=color)
        else:
            pl.hist(x, bins=grid, alpha=.4, density=True, color=color)
        pl.axvline(means[0], color=color)
        pl.axvline(means[1], color=color)
        pl.axhline(kde_mid, alpha=.2, linestyle='--', color=color)
        pl.show()

    return t_stat, p_val, means  # ~ t_test (reject unimodality if t_stat > 3)
Пример #59
0
def usThemHelp(trait,parms):
    local=parms['local']
    name=parms['name']
    wald=parms['wald']
    
    snpChr=[x for x in parms['snpChr'] if x!=trait]
    
    snpData=DBLocalRead(name+'process/snpData',parms)
    snpData=snpData[snpData['chr']!=trait]

    traitData=DBLocalRead(name+'process/traitData',parms)
    traitData=traitData[traitData['chr']==trait]

    ail_paper=pd.read_csv(local+'data/ail_paper-Trans.csv',header=0)
    ail_paper=ail_paper[(ail_paper['eqtl_tissue']=='hip')&(ail_paper['target_gene_chrom']==int(trait[3:]))]
    ail_paper=ail_paper[['eqtl_pos_bp','eqtl_chrom','eqtl_pvalue','target_gene']].reset_index(drop=True)
    
    ail_paper=ail_paper.merge(pd.DataFrame({'target_gene':traitData['trait'],'loc':np.arange(len(traitData))}),on='target_gene')
    traitList=ail_paper['loc'].values.flatten()
    #pdb.set_trace()
    pval={}
    for snp in snpChr:
        snpChrom=int(snp[3:])
        print('loading pvals from snp '+snp+' trait '+trait)
        pval[snpChrom]=DBRead(name+'score/p-'+snp+'-'+trait,parms)[:,traitList]
        if wald:
            pval[snpChrom]=2*norm.sf(np.abs(pval[snpChrom]))
    
    ans=[]
    for ind,eqtl in ail_paper.iterrows():
        if not ('chr'+str(int(eqtl['eqtl_chrom'])) in snpChr):
            continue
        t_snpData=snpData[snpData['chr']=='chr'+str(int(eqtl['eqtl_chrom']))]
        ans+=[[eqtl['eqtl_pvalue'],np.min(pval[int(eqtl['eqtl_chrom'])][(t_snpData['Mbp']<eqtl['eqtl_pos_bp']+1e6)&
            (t_snpData['Mbp']>eqtl['eqtl_pos_bp']-1e6),ind].flatten())]]
    
    ans=np.array(ans)
    DBWrite(ans,name+'usThem/'+trait,parms)
    
Пример #60
0
 def play(myScore, theirScore, isLast):
     remainingScore = 100 - myScore
     print('remaining: ', remainingScore) if verbose == True else None
     maxSafe = 0
     searching = True
     while searching:
         check = maxSafe + 1
         thisMean = dieMean * check
         thisVariance = dieVariance * check
         thisDeviation = numpy.sqrt(thisVariance)
         zScore = (remainingScore - thisMean) / thisDeviation
         if zScore > 1.687:
             overshootOdds = 0.0000001
         else:
             overshootOdds = norm.sf(zScore)
         print('checked: ', check, ' overshoot prob: ', overshootOdds,
               zScore) if verbose == True else None
         if overshootOdds < risk:
             maxSafe = check
         else:
             searching = False
     return maxSafe