Пример #1
0
def reg(x, y):
    """Conduct OLS regression on y = beta * x
        Return betas and p_values from t-test of betas
    """
    try:
        dim_x = x.shape
        constant = numpy.ones(dim_x[0])
        x = numpy.append(constant.T, x.T)
        x = x.reshape(dim_x[1] + 1, dim_x[0]).T
        beta = numpy.dot(numpy.dot(numpy.linalg.inv(numpy.dot(x.T, x)),x.T),y)
        epsilon = y - numpy.dot(x, beta.T)
        var_cov = numpy.dot(numpy.dot(epsilon.T, epsilon),numpy.linalg.inv(numpy.dot(x.T, x))) / \
                  (dim_x[0] - dim_x[1])
        std_err = numpy.diagonal(var_cov) ** 0.5
        t_stat = beta / std_err
        p_values = list()
        for i in t_stat:
            if  t.cdf(i,dim_x[0]-dim_x[1]-1)>0.5:
                p_values.append(2*(1-t.cdf(i,dim_x[0]-dim_x[1]-1)))
            else:
                p_values.append(2*t.cdf(i,dim_x[0]-dim_x[1]-1))
    except:
        dim_x = x.shape
        beta = numpy.zeros(dim_x[1] + 1)
        p_values = numpy.zeros(dim_x[1] + 1)
        print("Error: " + str(sys.exc_info()[1]) + " occurs when conducting OLS regression")
    return beta, p_values
def t_to_z(mr, dof):
  
  data = mr.get_data()

  # Select just the nonzero voxels
  nonzero = data[data!=0]

  # We will store our results here
  Z = np.zeros(len(nonzero))

  # Select values less than or == 0, and greater than zero
  c  = np.zeros(len(nonzero))
  k1 = (nonzero <= c)
  k2 = (nonzero > c)

  # Subset the data into two sets
  t1 = nonzero[k1]
  t2 = nonzero[k2]

  # Calculate p values for <=0
  p_values_t1 = t.cdf(t1, df = dof)
  z_values_t1 = norm.ppf(p_values_t1)

  # Calculate p values for > 0
  p_values_t2 = t.cdf(-t2, df = dof)
  z_values_t2 = -norm.ppf(p_values_t2)
  Z[k1] = z_values_t1
  Z[k2] = z_values_t2

  # Create new nifti
  empty_nii = np.zeros(mr.shape)
  empty_nii[mr.get_data()!=0] = Z
  Z_nii_fixed = nib.nifti1.Nifti1Image(empty_nii,affine=mr.get_affine(),header=mr.get_header())
  return Z_nii_fixed
Пример #3
0
def TtoZ(t_stat_map,output_nii,dof):
    '''TtoZ: 
    for details see
    https://github.com/vsoch/TtoZ
    Also provided for command line.

    t_stat_map: 
        file path to t stat image

    output_nii: 
        output nifti file
    
    dof: 
        degrees of freedom (typically number subjects - 2)

    '''
    print("Converting map %s to Z-Scores..." %(t_stat_map))
  
    mr = nibabel.load(t_stat_map)
    data = mr.get_data()

    # Select just the nonzero voxels
    nonzero = data[data!=0]

    # We will store our results here
    Z = np.zeros(len(nonzero))

    # Select values less than or == 0, and greater than zero
    c  = np.zeros(len(nonzero))
    k1 = (nonzero <= c)
    k2 = (nonzero > c)

    # Subset the data into two sets
    t1 = nonzero[k1]
    t2 = nonzero[k2]

    # Calculate p values for <=0
    p_values_t1 = t.cdf(t1, df = dof)
    z_values_t1 = norm.ppf(p_values_t1)

    # Calculate p values for > 0
    p_values_t2 = t.cdf(-t2, df = dof)
    z_values_t2 = -norm.ppf(p_values_t2)
    Z[k1] = z_values_t1
    Z[k2] = z_values_t2

    # Write new image to file
    empty_nii = np.zeros(mr.shape)
    empty_nii[mr.get_data()!=0] = Z
    Z_nii_fixed = nibabel.nifti1.Nifti1Image(empty_nii,
                                             affine=mr.get_affine(),
                                             header=mr.get_header())
    nibabel.save(Z_nii_fixed,output_nii)
Пример #4
0
def t_uneqvar(list_1, list_2, **kwargs):
    """ Performs a t-test without the equal variance
    assumption of Student's t.  For example, see: 
    Ruxton, G. D. (2006). 
    The unequal variance t-test is an alternative to 
    Student's t-test and the Mann-Whitney U test
    Behavioral Ecology, 17(4), 688–690.

    Arguments:
     list_1, list_2: list of values from the first and final condition, respectively

    Returns: a dict containing keys:
     'p': the p-value resulting from a two-tailed test for change
          note two-tailed p-values are preferred to avoid numerical issues
          associated with highly significant p-values
     'dir': the direction
     't': the t statistic
     'df': the calculated degrees of freedom

    """
    from scipy.stats import t 
    from numpy import std, mean
    from math import sqrt

    two_tailed = test_kwarg('two_tailed', kwargs, [True, False])
    the_return_dict = {}

    var_1 = (std(list_1, ddof = 1))**2
    var_2 = (std(list_2, ddof = 1))**2
    the_u = var_2 / var_1
    n_1 = len(list_1) * 1.
    n_2 = len(list_2) * 1.
    df = (1./n_1 + the_u/n_2)**2/(1/(n_1**2*(n_1-1)) + the_u**2/(n_2**2*(n_2-1)))
    # Use 1 - 2 here before calculating p so more positive changes corresponse to smaller p
    t_val = (mean(list_1) - mean(list_2)) / sqrt((var_1 / n_1) + (var_2 / n_2))
    # One-sided p
    the_p = t.cdf(t_val, df)
    t_val = -1. * t_val
    if two_tailed:
        if t_val > 0:
             the_p = 2. * the_p
             the_dir = '+'
        else:
            # It is numerically preferable to avoid 
            # y = 2 * (1 - x) in case x is close to zero
            the_p = 2. * t.cdf(t_val, df)
            the_dir = '-'
    return_dict = {'t': t_val, 'p': the_p, 'df': df, 'dir': the_dir}
    return return_dict
Пример #5
0
def show_bootstrap_statistics(clf, X, y, features):
    num_features = len(features)

    coefs = []
    for i in range(num_features):
        coefs.append([])

    for _ in range(BOOTSTRAP_ITERATIONS):
        X_sample, y_sample = resample(X, y)
        clf.fit(X_sample, y_sample)
        for i, c in enumerate(get_normalized_coefs(clf)):
            coefs[i].append(c)

    poi_index = features.index('POI')
    building_index = features.index('Building')
    coefs[building_index] = coefs[poi_index]

    intervals = []

    print()
    print('***** Bootstrap statistics *****')
    print('{:<20}{:<20}{:<10}{:<10}'.format('Feature', '95% interval', 't-value', 'Pr(>|t|)'))
    print()
    for i, cs in enumerate(coefs):
        values = np.array(cs)
        lo = np.percentile(values, 2.5)
        hi = np.percentile(values, 97.5)
        interval = '({:.3f}, {:.3f})'.format(lo, hi)
        tv = np.mean(values) / np.std(values)
        pr = (1.0 - t.cdf(x=abs(tv), df=len(values))) * 0.5

        stv = '{:.3f}'.format(tv)
        spr = '{:.3f}'.format(pr)
        print('{:<20}{:<20}{:<10}{:<10}'.format(features[i], interval, stv, spr))
Пример #6
0
def neuropowertable(request):

    # Get the template/step status
    sid = get_session_id(request)
    template = "neuropower/neuropowertable.html"
    steps = get_neuropower_steps(template,sid)
    context = {"steps":steps}

    if not ParameterModel.objects.filter(SID=sid):
        # Should not be able to reach this condition
        context["text"] = "No data found. Go to 'Input' and fill out the form."
        return render(request,template,context)

    else:
        sid = request.session.session_key #why are we getting session id again?
        parsdata = ParameterModel.objects.filter(SID=sid)[::-1][0]
        SPM = nib.load(parsdata.location).get_data()
        if parsdata.ZorT == 'T':
            SPM = -norm.ppf(t.cdf(-SPM,df=float(parsdata.DoF)))
        cluster.cluster(SPM,parsdata.ExcZ,parsdata.peaktable)
        peaks = pd.read_csv(parsdata.peaktable,sep="\t")
        if len(peaks) < 30:
            context["text"] = "There are too few peaks for a good estimation.  Either the ROI is too small or the screening threshold is too high."
        else:
            pvalues = np.exp(-float(parsdata.ExcZ)*(np.array(peaks.peak)-float(parsdata.ExcZ)))
            pvalues = [max(10**(-6),p) for p in pvalues]
            peaks['pval'] = pvalues
            peakform = PeakTableForm()
            form = peakform.save(commit=False)
            form.SID = sid
            form.data = peaks
            form.save()            
            context["peaks"] = peaks.to_html(classes=["table table-striped"])
    
    return render(request,template,context)
Пример #7
0
def GeneratePDF(Data, method = 'Robust_Student_t', lower_threshold = 0.15, upper_threshold = 0.85):
    
    '''Generate the pdf estimate of the data
    Input: /Data/   data to estimate pdf on
           /method/ Method of estimation.
                    Available methods: 'Robust_Student_t'; 'KDE'; 'Normal'
           /lower_threshold/ in percentage
           /upper_threshold/ in percentage
    Output: /pdf/   fitted pdf
            /cdf/   fitted cdf
    '''
    x = np.linspace(min(Data), max(Data), 100)
    if method == 'Robust_Student_t':
        nu, mu, sigma = uvtfit(Data)
        pdf = t.pdf(x, nu, mu, sigma)
        cdf = t.cdf(x, nu, mu, sigma)
        lower = t.ppf(lower_threshold, nu, mu, sigma)
        upper = t.ppf(upper_threshold, nu, mu, sigma)
        
    elif method == 'Normal':
        mu, sigma = norm.fit(Data)
        pdf = norm.pdf(x, mu, sigma)
        cdf = norm.cdf(x, mu, sigma)
        lower = norm.ppf(lower_threshold, mu, sigma)
        upper = norm.ppf(upper_threshold, mu, sigma)
        
    elif method == 'KDE':
        kernal = gaussian_kde(Data)
        pdf = kernal.evaluate(x)
        cdf = np.array([kernal.integrate_box(x[0], x[i+1]) for i in range(len(x)-1)])
        lower = np.percentile(cdf, lower_threshold*100)
        upper = np.percentile(cdf, upper_threshold*100)
        
    return x, pdf, cdf, lower, upper
Пример #8
0
    def compute_zscore(self):
        #get background and peri rates
        bg_rates = np.array([t.bg_rate for t in self.trials])
        peri_rates = np.array([t.peri_rate for t in self.trials])
        bg_counts = np.array([t.bg_count for t in self.trials])
        peri_counts = np.array([t.peri_count for t in self.trials])
        rate_diff = peri_rates - bg_rates
        count_diff = peri_counts - bg_counts

        if peri_counts.sum() + bg_counts.sum() < len(self.trials):
            pval = 0.5 #kludge
            z = 0.0
            tstat = 0.0
        else:
            rate_diff_std = rate_diff.std(ddof=1)
            count_diff_std = count_diff.std(ddof=1)
            if rate_diff_std == 0.0:
                print 'Very strange that this happenend, rate_diff_std=%0.3f, count_diff_std=%0.3f, stim_num=%d' % (rate_diff_std, count_diff_std, self.stim_number)
                rate_diff_std = 1.0
            z = rate_diff.mean() / rate_diff_std
            tstat = z*np.sqrt(len(bg_rates))
            pval = (1.0 - tdist.cdf(np.abs(tstat), len(bg_rates)-1))*2 #two-tailed t-test pvalue

        self.zscore = z
        self.tstat = tstat
        self.pval = pval
def t_stat(X, c, beta, MRSS, df):
    """
    parameters
    ----------
    X: 2D array (n_trs * number of regressors)
        design matrix.
    c: a contrast vector.
    betas: 2D array (number of regressors x n_vols)
        estimated betas for linear model.
    MRSS: 1D array of length n_volx
        Mean residual sum of squares.
    df: int
        n - rank of X.

    Returns
    ______
    t: a vector of length n_vols
        t statistics for each voxel.
    p: a vector of length n_vols
        p values for each voxel.
    """
    X = np.asarray(X)
    c = np.atleast_2d(c).T
    # calculate bottom half of t statistic
    SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c)))
    t = c.T.dot(beta) / SE
    # Get p value for t value using cumulative density dunction
    # (CDF) of t distribution
    ltp = t_dist.cdf(t, df)  # lower tail p
    p = 1 - ltp  # upper tail p

    return t, p
Пример #10
0
def pcor(X,Y,Z):
    """
    computes the correlation amtrix of X and Y conditioning on Z
    """
    if X.ndim==1: X = X[:,SP.newaxis]
    if Y.ndim==1: Y = Y[:,SP.newaxis]
    
    if Z is None: return STATS.pearsonr(X,Y)

    if Z.ndim==1: Z = Z[:,SP.newaxis]
    nSamples = X.shape[0]
    betaX, _, _, _ = LA.lstsq(Z,X)
    betaY, _, _, _ = LA.lstsq(Z,Y)
    Xres = X - SP.dot(Z,betaX)
    Yres = Y - SP.dot(Z,betaY)
    corr_cond = SP.corrcoef(Xres[:,0],Yres[:,0])[0,1]
    dz = Z.shape[1]  # dimension of conditioning variable
    df = max(nSamples - dz - 2,0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr_cond / SP.sqrt(1.0 - corr_cond ** 2)  # calculate t statistic
        
    tstat = math.sqrt(df) * tstat
    pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr_cond,pv_cond
Пример #11
0
def t_stat(data, X_matrix):
    """
    Return the estimated betas, t-values, degrees of freedom, and p-values for the glm_multi regression
    
    Parameters
    ----------
    data_4d: numpy array of 4 dimensions 
             The image data of one subject, one run
    X_matrix: numpy array 
       The design matrix for glm_multi
    Note that the fourth dimension of `data_4d` (time or the number 
    of volumes) must be the same as the number of rows that X has. 
    
    Returns
    -------
    beta: estimated beta values
    
    t: t-values of the betas
    
    df: degrees of freedom
    
    p: p-values corresponding to the t-values and degrees of freedom
    """

    beta = glm_beta(data, X_matrix)

    # Calculate the parameters - b hat
    beta = np.reshape(beta, (-1, beta.shape[-1])).T

    fitted = X_matrix.dot(beta)
    # Residual error
    y = np.reshape(data, (-1, data.shape[-1]))
    errors = y.T - fitted
    # Residual sum of squares
    RSS = (errors**2).sum(axis=0)
 
    df = X_matrix.shape[0] - npl.matrix_rank(X_matrix)
    # Mean residual sum of squares
    MRSS = RSS / df
    # calculate bottom half of t statistic
    Cov_beta=npl.pinv(X_matrix.T.dot(X_matrix))

    SE =np.zeros(beta.shape)
    for i in range(X_matrix.shape[-1]):
        c = np.zeros(X_matrix.shape[-1])
        c[i]=1
        c = np.atleast_2d(c).T
        SE[i,:]= np.sqrt(MRSS* c.T.dot(npl.pinv(X_matrix.T.dot(X_matrix)).dot(c)))


    zeros = np.where(SE==0)
    SE[zeros] = 1
    t = beta / SE

    t[:,zeros] =0
    # Get p value for t value using CDF of t didstribution
    ltp = t_dist.cdf(abs(t), df)
    p = 1 - ltp # upper tail
    
    return beta.T, t, df, p
Пример #12
0
def corrParallel(X,Y=None,df=None):
    """
    computes the mxk correlation matrix between the mxn matrix X and the kxn matrix Z
    """    
    if Y is None:
        return corrParallelSym(X,df=df)

    assert X.shape[1]==Y.shape[1], 'ouch, samples do not match'
    nSamples = X.shape[1]
    
    Xstd = X.T
    Xstd-= Xstd.mean(0)
    Xstd/= Xstd.std(0)

    Ystd = Y.T
    Ystd-= Ystd.mean(0)
    Ystd/= Ystd.std(0)

    corr =  SP.dot(Xstd.T,Ystd)/nSamples
    if df is None:
        df = max(nSamples  - 2,0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr / SP.sqrt(1.0 - corr ** 2)  # calculate t statistic

        
  
    tstat = math.sqrt(df) * tstat
    pv = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr,pv
Пример #13
0
def t_fun():
    # accumulate from -infinity to 3.077
    res = t.cdf(3.0777, df=1)
    print(res)
    # probability of middle
    a, b = t.interval(0.95, 1)
    print(a, b)
def _t(M, Rho, nu):
    N = Rho.shape[0]
    mu = np.zeros(N)        # zero mean
    x = mvt.multivariate_t_rvs(mu,Rho,nu,M) # generate T RV's
    U = t.cdf(x, nu)
    
    return U
Пример #15
0
def sample_procedure(dist1, dist2):
    non_bon = 0
    bon = 0
    for i in xrange(trials):
        flag = True
        for _ in xrange(repeats):        
            # First sample
            sample1 = np.random.choice(dist1, (sample_size, ))
            mean1 = np.mean(sample1)
            std1 = np.std(sample1)

            # Second sample
            sample2 = np.random.choice(dist2, (sample_size, ))
            mean2 = np.mean(sample2)
            std2 = np.std(sample2)
                       
            # T test
            result = t_test(mean1, mean2, std1, std2, sample_size, sample_size)
            p_value = (1 - t.cdf(result, sample_size - 1)) * 2 
                                
            if p_value < alpha and flag:
                non_bon += 1
                flag = False
            if p_value < bonferroni:
                bon += 1
                break

    print "Time of reject in alpha: ", non_bon / trials
    print "Time of reject in bonferroni: ", bon / trials
Пример #16
0
def t_test2(data1,data2):
    """
    Compute t test for two samples with significantly different variances (use f_test_var) to test if they have same mean
    H0: samples have same means (p-value close to one).

    Parameters
    ----------
    data1: n,1 - dim array with data
    data2: n,1 - dim array with data

    Returns
    -------
    p-value of t test, the t value itself and the degrees of freedom

    Notes
    -----
    See 3rd Edition of Numerical recipes chapter 14.2.1, p.728
    """
    N1, N2 = len(data1), len(data2)
    mean1, mean2 = np.mean(data1), np.mean(data2)
    var1, var2= np.var(data1,ddof = 1), np.var(data2,ddof = 1)

    T = (mean1 - mean2) / np.sqrt(var1/N1 + var2/N2)	# Eq. 14.2.3
    df = (var1/N1 + var2/N2)**2. / ( (var1/N1)**2./(N1 - 1) + (var2/N2)**2./(N2 - 1))
    return t.cdf(T, df), T, df
Пример #17
0
def t_test1(data1,data2):
    """
    Compute t test for two samples with same variance to test if they have same mean
    H0: samples have same means (p-value close to one).

    Parameters
    ----------
    data1: n,1 - dim array with data
    data2: n,1 - dim array with data

    Returns
    -------
    p-value of t test, the t value itself and the degrees of freedom

    Notes
    -----
    See 3rd Edition of Numerical recipes chapter 14.2.1, p.727
    """
    if not isinstance(data1,np.ndarray):
	data1 = np.array(data1)
    if not isinstance(data2,np.ndarray):
	data2 = np.array(data2)

    N1, N2 = len(data1), len(data2)
    mean1, mean2 = np.mean(data1), np.mean(data2)
    # Eq. 14.2.1
    sD = np.sqrt( (np.sum( (data1 - np.ones(N1) * mean1) ** 2.) + np.sum( (data2 - np.ones(N2) * mean2) ** 2.)) / (N1 + N2 - 2.) * (1./N1 + 1./N2))
    T = (mean1 - mean2) / sD
    return t.cdf(T, N1 + N2 - 2),T,N1 + N2 - 2
Пример #18
0
def calc_cdf(mu_null, n, mean, stddev):
	one_sample_t = ( mean - mu_null)/ (stddev/math.sqrt(n))
	if n > 50:
		cdf_val = normcdf(one_sample_t,0. , 1.)
	else:
		cdf_val = t.cdf(one_sample_t, n - 1)
	
	return cdf_val	
Пример #19
0
def tdist(var, year, x, eu=None):
    s,f = seznam_vzorec(var, year, eu=eu)
    n = len(s)
    x = abs(x)

    # print "=TDIST(%s, %s, 2)" % (x, n-2)
    result = ( 1-t.cdf(x, n-2) ) * 2
    return result
Пример #20
0
def solve_t(t_value=None, f=None, p=None):
    max_1_none(t_value, f, p)
    if t_value == None:
        return t(f, p)
    elif f == None:
        raise NotImplemented("Not implemented yet - sorry")
    elif p == None:
        return sympify(sci_t.cdf(float(t_value), float(f)))
Пример #21
0
def corrsig(N, c=None, p=.95):
    # if c exists, this returns the cutoff
    import numpy as np
    from scipy.stats import t
    if not c is None:
        return t.cdf(c/np.sqrt((1-c**2)/(N-2)), N-2)
    else:
        print "functionality not implemented yet, please query a correlation"
        return
Пример #22
0
def log_principal_anomaly(x, N, Q, S):
    assert N > 3, "N must be more than 3, is %r" % N
    mean = float(S) / N
    val = mean - abs(mean - x)
    scale = sqrt(max(0, (float(N) * Q - pow(S, 2))) / ((N + 1) * (N - 3)))
    if scale == 0:
        raise ("Scale is 0!", N, Q, S)
    t_cdf = t.cdf(val, N - 1, loc=mean, scale=scale)
    return -log(2 * t_cdf)
Пример #23
0
def simulateCopula(simulations=10, type=str('g'), rho=float, lamda=tuple, tDof=4, basketSize=5, useGPU=False):
    result = []
    """

    $\tau = F^{-1}(u) = -\frac{log(1-u)}{\lambda}$

    """
    print 'simulating t distribution' if type == 't' else 'simulating gaussian dist'

    for z in xrange(0, simulations):
        # for the t distribution we use the same method but
        # sample from the chisquared distribution
        # if GPU is enabled, hand over to GPU to provide random number sample
        if useGPU and type == 'g':
            z1, z2, z3, z4, z5 = rng.getPseudoRandomNumbers_Standard_cuda(basketSize)
        else:
            z1, z2, z3, z4, z5 = random.chisquare(tDof, size=basketSize) if type == 't' else random.normal(size=5)
        # z1, z2, z3, z4, z5 = chi2.rvs(1, size=5) if type == 't' else random.normal(size=5)

        x1 = z1

        # using factorised copula procedure
        # $A_i = w_iZ + \sqrt{1-w{^2}{_i}\Epsilon_i $
        x2, x3, x4, x5 = [z1 * rho + sqrt(1 - square(rho)) * zn for zn in [z2, z3, z4, z5]]

        # converting to normal variables from t or normal distribution successfully
        # via cdf of relevant distribution
        if type == 't':
            u1, u2, u3, u4, u5 = [t.cdf(x, 1) for x in [x1, x2, x3, x4, x5]]
        else:
            u1, u2, u3, u4, u5 = [norm.cdf(x) for x in [x1, x2, x3, x4, x5]]
        u = [u1, u2, u3, u4, u5]
        # $\tau_i = -\frac{-log(1-u)}{\lambda_i} $
        tau1, tau2, tau3, tau4, tau5 = [-log(1 - u) / lamda[index] for index, u in enumerate(u)]
        result.append({'z1': z1,
                       'z2': z2,
                       'z3': z3,
                       'z4': z4,
                       'z5': z5,
                       'x1': x1,
                       'x2': x2,
                       'x3': x3,
                       'x4': x4,
                       'x5': x5,
                       'u1': u1,
                       'u2': u2,
                       'u3': u3,
                       'u4': u4,
                       'u5': u5,
                       'tau1': tau1,
                       'tau2': tau2,
                       'tau3': tau3,
                       'tau4': tau4,
                       'tau5': tau5,
                       })

    return DataFrame(result)
Пример #24
0
def main():
  parser = argparse.ArgumentParser(
  description="Convert a whole brain T score map to a Z score map without loss of precision for strongly positive and negative values.")
  parser.add_argument("t_stat_map", help="T-score statistical map in the form of a 3D NIFTI file (.nii or .nii.gz).", type=nifti_file)
  parser.add_argument("dof", help="Degrees of freedom (eg. for a two-sample T-test: number of subjects in group - 2)",type=int)
  parser.add_argument("--output_nii", help="The name for the output Z-Score Map.",type=str,default="z_stat_map.nii")
  args = parser.parse_args()

  print "Converting map %s to Z-Scores..." %(args.t_stat_map)
  
  mr = nib.load(args.t_stat_map)
  data = mr.get_data()

  # Select just the nonzero voxels
  nonzero = data[data!=0]

  # We will store our results here
  Z = np.zeros(len(nonzero))

  # Select values less than or == 0, and greater than zero
  c  = np.zeros(len(nonzero))
  k1 = (nonzero <= c)
  k2 = (nonzero > c)

  # Subset the data into two sets
  t1 = nonzero[k1]
  t2 = nonzero[k2]

  # Calculate p values for <=0
  p_values_t1 = t.cdf(t1, df = args.dof)
  z_values_t1 = norm.ppf(p_values_t1)

  # Calculate p values for > 0
  p_values_t2 = t.cdf(-t2, df = args.dof)
  z_values_t2 = -norm.ppf(p_values_t2)
  Z[k1] = z_values_t1
  Z[k2] = z_values_t2

  # Write new image to file
  empty_nii = np.zeros(mr.shape)
  empty_nii[mr.get_data()!=0] = Z
  Z_nii_fixed = nib.nifti1.Nifti1Image(empty_nii,affine=mr.get_affine(),header=mr.get_header())
  nib.save(Z_nii_fixed,args.output_nii)
Пример #25
0
def pVal(mu1,mu2,s1,s2,n1,n2):
	se = np.sqrt(s1*s1/n1+s2*s2/n2)
	df = (s1**2/n1 + s2**2/n2)**2 / ( ((s1**2 / n1)**2 / (n1 - 1)) + ((s2**2 / n2)**2 / (n2 - 1)))
	tVal = (mu1 - mu2)/se
	return (1 - t.cdf(tVal,df))




#[email protected]:ryu577/base.git
Пример #26
0
    def SimulateUniforms(self):
        """docstring for Simulate"""
        mean = [0.0] * self.size
        cov = self.copula_covariance

        s = chi2.rvs(self.dof)
        Z = multivariate_normal(mean, cov)
        X = [math.sqrt(self.dof)/math.sqrt(s) * z for z in Z]
        Y = [t.cdf(x, self.dof) for x in X]

        return Y
def comparison_test(arr1, arr2, name1, name2, tail=1, bonf=1):
	stats = _stats(arr1, arr2)
	p_value = 1 - t.cdf(t_test(*stats), min(len(arr1), len(arr2)) - 1)
	if tail == 2: #In case for two tail test
		p_value *= 2
	alpha = ALPHA / bonf
	print "The p-value between {0} and {1} is {2:.5f} and it {3} the null hypothesis".format(
			name1, name2, p_value,
			"reject" if p_value < alpha else "does not reject")
	print "Actual Alpha is", format(alpha, '.4f')
	if p_value < alpha:
		print "The effect size is {0:.5f}".format(cohen_d(*stats))
Пример #28
0
def sig_test(r, n, twotailed = True):
	import numpy as np
	from scipy.stats import t as tdist
	df = n - 2

	#Create t-statistic
		#Use absolute value to be able to deal with negative scores
	t = np.abs(r * np.sqrt(df/(1-r**2)))
	p = (1 - tdist.cdf(t,df))
	if twotailed:
		p = p * 2
	return p
Пример #29
0
    def cdf(self, arg):
        """Cumulative density function (CDF).

        Parameters
        ----------
        arg : array
            Grid of point to evaluate CDF at

        Returns
        -------
        array
            CDF values. Same shape as the input.

        """
        a = self.__const_a()
        b = self.__const_b()

        y = (b*arg+a)/(1+np.sign(arg+a/b)*self.lam) * (1-2/self.eta)**(-.5)
        cond = arg < -a/b

        return cond * (1-self.lam) * t.cdf(y, self.eta) \
            + ~cond * (-self.lam + (1+self.lam) * t.cdf(y, self.eta))
Пример #30
0
def naiveTopt(icm,cutoff=.05): #like topt but doesn't correct for tail direction
    """ Returns cluster by fitting t-test and returning residues above cutoff """
    
    param = t.fit(icm,loc=np.median(icm))
    x = np.linspace(-1,1,200)
    cdf = t.cdf(x,param[0],loc=param[1], scale=param[2])

    minx = np.max(x[np.nonzero(cdf<cutoff)])

    # deal with direction of tail:
    cursect = np.array([i for i in range(icm.size) if icm[i]<minx])
        
    return cursect
 def __init__(self,x,y):
     self.x = x
     self.y = y
     (self.n,self.r) = x.shape
     xx = np.dot(x.T,x)
     xy = np.dot(x.T,y)
     self.xxi = np.linalg.inv(xx)
     self.b = np.linalg.solve(xx,xy).reshape(-1,1)
     e = y - np.dot(x,self.b)
     self.resid = e
     self.vb = self.genvariance(e)
     self.se = np.sqrt(np.diagonal(self.vb)).reshape(-1,1)
     self.tstat = np.divide(self.b,self.se)
     self.pval = 2*t.cdf(-np.abs(self.tstat),df=self.n-self.r)
     self.rsq = 1-e.var()/y.var()
     self.adjrsq = 1-(1-self.rsq)*(self.n-1)/(self.n-self.r)
     self.logl = -self.n/2*(np.log(2*np.pi*e.var())+1)
     self.aic = 2*self.r-2*self.logl
     self.bic = np.log(self.n)*self.r-2*self.logl
     nulllike = -self.n/2*(np.log(2*np.pi*y.var())+1)
     self.deviance = 2*(self.logl-nulllike)
Пример #32
0
def two_sample_Welch_t_test(data1, data2, scale_estimator=lambda x: np.std(x)):
    """
    --Independent two-sample test--
    Assuming Gaussian distributions and UNequal variances and unequal sample sizes. 
    Hypothesis H0: mu_1 == mu__2
    scale_estimator is a function that estimates the square root of the variance (~st.dev.)
    """
    sample_mean1 = np.mean(data1)
    sample_mean2 = np.mean(data2)
    n1 = len(data1)
    n2 = len(data2)
    s1 = scale_estimator(data1)
    s2 = scale_estimator(data2)
    s_delta = np.sqrt((s1**2) / n1 + (s2**2) / n2)
    t_statistic = (sample_mean1 - sample_mean2) / s_delta
    degrees_of_freedom = s_delta**4 / (
        (s1**2 / n1)**2 / (n1 - 1) + (s2**2 / n2)**2 /
        (n2 - 1))  # Welch–Satterthwaite equation
    p_value = (1 - t.cdf(abs(t_statistic), degrees_of_freedom)
               ) * 2  # Look up from Student's t-distribution
    return p_value, t_statistic, degrees_of_freedom
Пример #33
0
def student_test(data1, data2):
    print("Student test of Ex = Ey")
    n, m = len(data1), len(data2)

    criteria = t.cdf((1 - alfa) / 2, len(data1) + len(data2) - 2)

    # test = ttest_ind(data1, data2)

    test = ((np.mean(data1) - np.mean(data2)) * math.sqrt(n * m *(n + m - 2))) \
           / (math.sqrt((n + m) * (n * np.var(data1) + m * np.var(data2))))

    if test > criteria:
        print(f"Ho отвергается т.к значение > {criteria}")
        print("Ex != Ey")
    else:
        print(f"Ho подтверждается т.к значение < {criteria}")
        print("Ex = Ey")

    print()

    return criteria, test
Пример #34
0
def independent_ttest(data1, data2, alpha):
    # calculate means
    mean1, mean2 = mean(data1), mean(data2)
    print(mean1)
    print(mean2)
    mean1_glob = mean1
    mean2_glob = mean2
    # calculate standard errors
    se1, se2 = sem(data1), sem(data2)
    # standard error on the difference between the samples
    sed = sqrt(se1**2.0 + se2**2.0)
    # calculate the t statistic
    t_stat = (mean1 - mean2) / sed
    # degrees of freedom
    df = len(data1) + len(data2) - 2
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return everything
    return t_stat, df, cv, p
Пример #35
0
def simple_linear_reg(y, x):
    """一元线性回归"""
    assert len(x) == len(y)

    n = len(x)
    assert n > 1

    mean_x = mean(x)
    mean_y = mean(y)

    beta1 = covariance(x, y) / variance(x)
    beta0 = mean_y - beta1 * mean_x

    y_hat = [beta0 + beta1 * e for e in x]
    ss_residual = sum((e1 - e2)**2 for e1, e2 in zip(y, y_hat))
    se_model = sqrt(ss_residual / (n - 2))

    t_value = beta1 / (se_model / sqrt((n - 1) * variance(x)))
    p = 2 * (1 - t.cdf(abs(t_value), n - 2))

    return beta0, beta1, t_value, n - 2, p
Пример #36
0
def bharath_Ttest1s(sMean,sd,pMean,n,alpha=0.05):
    tstatistics=(sMean-pMean)/(sd/np.sqrt(n))
    if tstatistics>0:
        tstatistics=-tstatistics
    print("\nT Statistics:",tstatistics)
    
    tcritical=[t.ppf(q=alpha/2,df=n-1),-t.ppf(q=alpha/2,df=n-1)]
    print("T critical Values are :",tcritical)
    if tstatistics<tcritical[0] or tstatistics>tcritical[1]:
        print("Reject the Null Hypothesis")
    else:
        print("Fail to reject the Null Hypothesis")

    pvalue=2*t.cdf(tstatistics,df=n-1)
    print("\nPvalue value is:",pvalue)
    if pvalue < 0.05:
        print("Reject the Null Hypothesis")
    else:
        print("Fails to reject the Null Hypothesis")

    print("\n")
Пример #37
0
def data_process_3():
    data = [3, -3, 3, 12, 15, -16, 17, 19, 23, -24, 32]

    # Problem 2 Question 1
    # confidence interval
    c1 = 0.95

    samp_size = len(data)
    avg = np.mean(data)
    sd = np.std(data, ddof=1)

    stand_err = sd / np.sqrt(samp_size)
    t_c = t.ppf(1 - (1 - c1) / 2, df=samp_size - 1)

    intervals = (avg - (t_c * sd) / np.sqrt(samp_size), avg + (t_c * sd) / np.sqrt(samp_size))

    # Problem 2 Question 2
    c2 = 0.9
    t_c_2 = t.ppf(1 - (1 - c2) / 2, df=samp_size - 1)

    intervals2 = (avg - (t_c_2 * sd) / np.sqrt(samp_size), avg + (t_c_2 * sd) / np.sqrt(samp_size))

    # Problem 2 Question 3
    new_sd = 16.836
    new_std_err = new_sd / np.sqrt(samp_size)
    z_c = norm.ppf(1 - (1 - c1) / 2)

    intervals3 = (avg - (z_c * new_sd) / np.sqrt(samp_size), avg + (z_c * new_sd) / np.sqrt(samp_size))

    # Problem 2 Question 4
    # solve for t_c when lower interval endpoint is zero (mu = 0)
    t_c_new = avg / (sd / np.sqrt(samp_size))

    # find p value
    p_val = 2 * t.cdf(-abs(t_c_new), df=samp_size - 1)
    new_c = 1 - p_val
    intervals4 = (avg - (t_c_new * sd) / np.sqrt(samp_size), avg + (t_c_new * sd) / np.sqrt(samp_size))

    return (samp_size, avg, sd, stand_err, t_c, intervals), (t_c_2, intervals2), (avg, new_std_err, z_c, intervals3), (
    t_c_new, p_val, new_c, intervals4)
Пример #38
0
def student_t(t_input: Tuple[str, float],
              radius: float,
              size: float,
              ignore: bool) -> float:
    """
    Function to calculate the false positive fraction for a given sigma level (Mawet et al. 2014).

    Parameters
    ----------
    t_input : tuple(str, float)
        Tuple with the input type ('sigma' or 'fpf') and the input value.
    radius : float
        Aperture radius (pix).
    size : float
        Separation of the aperture center (pix).
    ignore : bool
        Ignore neighboring apertures of the point source to exclude the self-subtraction lobes.

    Returns
    -------
    float
        False positive fraction (FPF).
    """

    num_ap = int(math.pi*radius/size)

    if ignore:
        num_ap -= 2

    # Note that the number of degrees of freedom is given by nu = n-1 with n the number of samples.
    # The number of samples is equal to the number of apertures minus 1 (i.e. the planet aperture).
    # See Section 3 of Mawet et al. (2014) for more details on the Student's t distribution.

    if t_input[0] == 'sigma':
        t_result = 1. - t.cdf(t_input[1], num_ap-2, loc=0., scale=1.)

    elif t_input[0] == 'fpf':
        t_result = t.ppf(1. - t_input[1], num_ap-2, loc=0., scale=1.)

    return t_result
Пример #39
0
    def test_two_sample_welch_test(self):

        sal_a = self.data.loc[self.data['discipline'] == 'A']['salary']
        sal_b = self.data.loc[self.data['discipline'] == 'B']['salary']

        ttest = tTest(y1=sal_a, y2=sal_b)

        test_summary = ttest.test_summary

        assert_almost_equal(test_summary['Sample 1 Mean'], np.mean(sal_a))
        assert_almost_equal(test_summary['Sample 2 Mean'], np.mean(sal_b))
        assert_almost_equal(test_summary['t-statistic'], -3.1386989278486013)
        assert_almost_equal(test_summary['degrees of freedom'],
                            377.89897288941387)
        assert_almost_equal(
            test_summary['p-value'],
            t.cdf(test_summary['t-statistic'],
                  test_summary['degrees of freedom']) * 2)

        assert test_summary['alternative'] == 'two-sided'
        assert test_summary['test description'] == "Two-Sample Welch's t-test"

        ttest_group = tTest(group=self.data['discipline'],
                            y1=self.data['salary'])
        test_group_summary = ttest_group.test_summary

        assert_almost_equal(test_summary['Sample 1 Mean'],
                            test_group_summary['Sample 1 Mean'])
        assert_almost_equal(test_summary['Sample 2 Mean'],
                            test_group_summary['Sample 2 Mean'])
        assert_almost_equal(test_summary['p-value'],
                            test_group_summary['p-value'])
        assert_almost_equal(test_summary['degrees of freedom'],
                            test_group_summary['degrees of freedom'], 5)
        assert_almost_equal(test_summary['t-statistic'],
                            test_group_summary['t-statistic'])

        assert test_group_summary['alternative'] == 'two-sided'
        assert test_group_summary[
            'test description'] == "Two-Sample Welch's t-test"
Пример #40
0
    def dependent_corr(self, xy, xz, yz, n, twotailed=False, method='steiger'):
        """
        Calculates the statistic significance between two dependent correlation coefficients
        @param xy: correlation coefficient between x and y
        @param xz: correlation coefficient between x and z
        @param yz: correlation coefficient between y and z
        @param n: number of elements in x, y and z
        @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
        @param conf_level: confidence level, only works for 'zou' method
        @param method: defines the method uses, 'steiger' or 'zou'
        @return: t and p-val
        """
        if method == 'steiger':
            d = xy - xz
            determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
            av = (xy + xz) / 2
            cube = (1 - yz) * (1 - yz) * (1 - yz)

            t2 = d * np.sqrt(
                (n - 1) * (1 + yz) / (((2 * (n - 1) /
                                        (n - 3)) * determin + av * av * cube)))
            p = 1 - t.cdf(abs(t2), n - 3)

            if twotailed:
                p *= 2

            return t2, p
        elif method == 'zou':
            L1 = self.rz_ci(xy, n)[0]
            U1 = self.rz_ci(xy, n)[1]
            L2 = self.rz_ci(xz, n)[0]
            U2 = self.rz_ci(xz, n)[1]
            rho_r12_r13 = self.rho_rxy_rxz(xy, xz, yz)
            lower = xy - xz - pow((pow((xy - L1), 2) + pow(
                (U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
            upper = xy - xz + pow((pow((U1 - xy), 2) + pow(
                (xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
            return lower, upper
        else:
            raise Exception('Wrong method!')
def dependent_corr(xy, xz, yz, n, twotailed=False, conf_level=None, method='steiger'):
    """
    Calculates the statistic significance between two dependent correlation coefficients
    @param xy: correlation coefficient between x and y
    @param xz: correlation coefficient between x and z
    @param yz: correlation coefficient between y and z
    @param n: number of elements in x, y and z
    @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
    @param conf_level: confidence level, only works for 'zou' method
    @param method: defines the method uses, 'steiger' or 'zou'
    @return: t and p-val
    """
    if method == 'steiger':
        d = xy - xz
        determin = 1 - xy ** 2 - xz ** 2 - yz ** 2 + 2 * xy * xz * yz
        av = (xy + xz)/2
        cube = (1 - yz) * (1 - yz) * (1 - yz)
        e = (n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + (av ** 2) * cube))
        if e < 0:
            return np.nan, np.nan
        t2 = d * np.sqrt(e)
        p = 1 - t.cdf(abs(t2), n - 2)

        if twotailed:
            p *= 2
        " p is the probability of the null hypothesis"
        return t2, p
    elif method == 'zou':
        if conf_level==None:
            conf_level=0.95
        L1 = rz_ci(xy, n, conf_level=conf_level)[0]
        U1 = rz_ci(xy, n, conf_level=conf_level)[1]
        L2 = rz_ci(xz, n, conf_level=conf_level)[0]
        U2 = rz_ci(xz, n, conf_level=conf_level)[1]
        rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
        lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
        upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
        return lower, upper
    else:
        raise Exception('Wrong method!')
Пример #42
0
def DiscretizeNormalizeParam(tau, k_, model, par):
    # This function discretizes the one-step normalized pdf when the
    # distribution is parametrically specified
    # INPUTS
    #  tau    :[scalar] projection horizon
    #  k_     :[scalar] coarseness level
    #  model  :[string] specifies the distribution: shiftedLN,.TStudent t.T,Uniform
    #  par    :[struct] model parameters
    # OUTPUTS
    #  xi     :[1 x k_] centers of the bins
    #  f      :[1 x k_] discretized pdf of invariant

    ## Code

    # grid
    a = -norm.ppf(10**(-15),0,sqrt(tau))
    h = 2*a/k_
    xi = arange(-a+h,a+h,h)

    # discretized initial pdf (standardized)
    if model=='shiftedLN':
        m, s,_ = ShiftedLNMoments(par)
        csi = par.c
        mu = par.mu
        sig = sqrt(par.sig2)
        if sign(par.skew)==1:
            M = (m-csi)/s
            f = 1/h*(lognorm.cdf(xi+h/2+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(xi-h/2+M,sig,scale=exp(mu-log(s))))
            f[k_] = 1/h*(lognorm.cdf(-a+h/2+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(-a+M,sig,scale=exp(mu-log(s))) +\
            lognorm.cdf(a+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(a-h/2+M,sig,scale=exp(mu-log(s))))
        elif sign(par.skew)==-1:
            M = (m+csi)/s
            f = 1/h*(lognorm.cdf(-(xi-h/2+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(xi+h/2+M),sig,scale=exp(mu-log(s))))
            f[k_-1] = 1/h*(lognorm.cdf(-(-a+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(-a+h/2+M),sig,scale=exp(mu-log(s))) +\
            lognorm.cdf(-(a-h/2+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(a+M),sig,scale=exp(mu-log(s))))

    elif model=='Student t':
        nu = par
        f = 1/h*(t.cdf(xi+h/2,nu)-t.cdf(xi-h/2,nu))
        f[k_-1] = 1/h*(t.cdf(-a+h/2,nu)-t.cdf(-a,nu) + t.cdf(a,nu)-t.cdf(a-h/2,nu))

    elif model=='Uniform':
        mu = par.mu
        sigma = par.sigma
        f = zeros(k_)
        f[(xi>=-mu/sigma)&(xi<=(1-mu)/sigma)] = sigma
    return xi, f
    def independent_t_test(self, data1, data2, alpha):
        '''Reference: 
        BrownLee, Jason, 2019, 'How to Code the Student's t-Test from Scratch in Python', MachineLeanring Mastery, retrieved from: https://machinelearningmastery.com/how-to-code-the-students-t-test-from-scratch-in-python/
        '''
        rejected = False
        tReject = False
        pReject = False
        # calculate means
        mean1, mean2 = np.mean(data1), np.mean(data2)
        # calculate standard errors
        se1, se2 = sem(data1), sem(data2)
        # standard error on the difference between the samples
        sed = np.sqrt(se1**2.0 + se2**2.0)
        # calculate the t statistic
        t_stat = (mean1 - mean2) / sed
        # degrees of freedom
        df = len(data1) + len(data2) - 2
        # calculate the critical value
        cv = t.ppf(1.0 - alpha, df)
        # calculate the p-value
        p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
        # return everything
        print('t=%.3f, df=%d, cv=%.3f, p=%.3f' % (t_stat, df, cv, p))
        # interpret via critical value
        if abs(t_stat) <= cv:
            print('Accept null hypothesis that the means are equal.')
        else:
            print('Reject the null hypothesis that the means are equal.')
            tReject = True
        # interpret via p-value
        if p > alpha:
            print('Accept null hypothesis that the means are equal.')
        else:
            print('Reject the null hypothesis that the means are equal.')
            pReject = True

        rejected = tReject and pReject
        #return t_stat, df, cv, p
        return rejected
Пример #44
0
def independent_ttest(data1,data2,alpha):
    from scipy.stats import sem
    from scipy.stats import t
    from numpy import mean
    from math import sqrt
    
    # calcualte means
    mean1,mean2 = mean(data1),mean(data2)
    # caculate standard errors
    se1, se2 = sem(data1),sem(data2)
    # standard error on the difference between the samples
    sed = sqrt(se1**2.0 + se2**2.0)
    # calculate the t statistic
    t_stat = (mean1 - mean2) / sed
    # degrees of freedom
    df = len(data1)+len(data2) - 2
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return results
    return t_stat, df, cv, p
Пример #45
0
def paired_t_test(y1_score, y2_score, alpha):
    """
    成对t检验
    :param y1_score: y1
    :param y2_score: y2
    :param alpha: α
    :return: t_stat, df, cv, pv
    """
    k = len(y1_score)
    d = [y1_score[i] - y2_score[i] for i in range(k)]
    d = np.array(d)
    # d_ 均值
    d_ = np.mean(d)
    # s 标准差
    s = np.std(d)
    # calculate the t statistic
    t_stat = abs(k**0.5 * d_ / s)
    # degrees of freedom
    df = k - 1
    cv = t.ppf(1.0 - alpha, df)
    pv = (1.0 - t.cdf(t_stat, df)) * 2.0
    return t_stat, df, cv, pv
Пример #46
0
def compute_conf_interval(alpha,
                          x,
                          samp_mean,
                          samp_mean_dev,
                          var=None,
                          df=None,
                          central=True):
    if central:
        low_val = (1 - alpha) / 2
        upp_val = (1 + alpha) / 2
    else:
        low_val = 1 - alpha
        upp_val = alpha
    if var is not None:
        cdf_arr = norm.cdf(x, samp_mean, samp_mean_dev)
    else:
        x = (samp_mean - x) / (samp_mean_dev)
        x = x[::-1]
        cdf_arr = t.cdf(x, df)
    low_int = x[cdf_arr < low_val][-1]
    upp_int = x[cdf_arr > upp_val][0]
    if central:
        if var is not None:
            print("Alpha: ", alpha, "; Central CI (Gauss): ",
                  np.round((low_int, upp_int), 2))
        else:
            print(
                "Alpha: ", alpha, "; Central CI (tStudent): ",
                np.round((samp_mean + low_int * samp_mean_dev,
                          samp_mean + upp_int * samp_mean_dev), 2))
    else:
        if var is not None:
            print("Alpha: ", alpha, "; Lower CI (Gauss): ", round(low_int, 2))
            print("Alpha: ", alpha, "; Upper CI (Gauss): ", round(upp_int, 2))
        else:
            print("Alpha: ", alpha, "; Lower CI: ",
                  round(samp_mean + low_int * samp_mean_dev, 2))
            print("Alpha: ", alpha, "; Upper CI: ",
                  round(samp_mean + upp_int * samp_mean_dev, 2))
Пример #47
0
    def fit_model(y, x, covars=None):
        """
        y is n X 1 - phenotype
        x is n X 1 - site under test
        covars (optional) is n X p

        Returns three arrays of (1+p+m) X 1 - coefficients, t-statistic and p-values:
                the first is the coefficients array where coefficients[0] if the coef of the intercept
                                                          coefficients[-1] if the coef of the site under test (the m from input x)
                                                          coefficients[1],..., coefficients[p+1] the coefficient of the covariates
                the second array holds the f-statistics - again index 0 if for the intercept, index -1 for site under test and 1 to p+1 for covars
                the third array holds the p-values - again index 0 if for the intercept, index -1 for site under test and 1 to p+1 for covars
        to sum up - in order to get thecoeffs,  p-values and the t-statistic of the site under test (input x) extract coefficients[-1], t-statistic[-1] and p-values[-1]
        """
        if x.ndim == 1:
            x = x.reshape(-1, 1)  # make sure dim is (n,1) and not(n,)
        if y.ndim == 1:
            y = y.reshape(-1, 1)

        X = x
        if covars is not None:
            X = column_stack((covars, X))

        regr = linear_model.LinearRegression(False)
        n = X.shape[0]  # number of sites
        X = np.concatenate((np.ones((n, 1)), X), axis=1)

        mdl = regr.fit(X, y)
        sse = np.sum(
            (mdl.predict(X) - y)**2, axis=0) / float(X.shape[0] - X.shape[1])
        se = np.array([
            np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X))))
            for i in range(sse.shape[0])
        ])

        Ts = mdl.coef_ / se
        p = 2 * (1 - t.cdf(np.abs(Ts), y.shape[0] - X.shape[1]))
        return mdl.coef_.reshape(-1), Ts.reshape(-1), p.reshape(
            -1)  #coefficients, t-statistic and p-values
Пример #48
0
def independent_ttest(data1, data2, alpha):
    # calculate means
    mean1, mean2 = data1.mean(), data2.mean()
    #   Standard deviation
    std1, std2 = data1.std(), data2.std()
    # Standard errors
    n1, n2 = len(df_columns), len(df_columns2)
    print('n1: ' + str(n1))
    print('n2: ' + str(n2))
    se1, se2 = std1 / math.sqrt(n1), std2 / math.sqrt(n2)
    # standard error on the difference between the samples
    sed = math.sqrt(se1**2.0 + se2**2.0)
    # calculate the t statistic
    t_stat = (mean1 - mean2) / sed
    # degrees of freedom
    df = len(data1) + len(data2) - 2
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return everything
    return t_stat, df, cv, p
Пример #49
0
    def tExpectedImprovement(self, tau, mean, std, nu=3.0):
        """
        Expected Improvement acquisition function. Only to be used with `tStudentProcess` surrogate.

        Parameters
        ----------
        tau: float
            Best observed function evaluation.
        mean: float
            Point mean of the posterior process.
        std: float
            Point std of the posterior process.

        Returns
        -------
        float
            Expected improvement.
        """
        gamma = (mean - tau - self.eps) / (std + self.eps)
        return gamma * std * t.cdf(gamma, df=nu) + std * (1 + (gamma**2 - 1) /
                                                          (nu - 1)) * t.pdf(
                                                              gamma, df=nu)
Пример #50
0
def h_ratio_fit(response, contrast) -> Tuple[int, OptimizeResult]:
    if isinstance(response[0], np.ndarray):
        y, sd = response
    else:
        y = np.nanmean(response, axis=0)
        sd = np.nanstd(response, axis=0)
    initial_values = np.array([2.0 * y[-1], 0.25, 4.0])
    bounds = ((0, None), (0, 1), (1.0, 10.0))
    fit = minimize(h_ratio_ml,
                   initial_values,
                   args=(contrast, y, sd),
                   method="SLSQP",
                   bounds=bounds)
    fit.evaluate = lambda a: h_ratio(fit.x, a)
    fit.df = len(y) - len(initial_values)
    jac = fit.jac.reshape(1, -1)
    fit.var = np.diag(pinv(jac.T.dot(jac)))
    fit.parameter_names = ['r_max', 'c_50', 'n']
    fit.compare = lambda x: (1 - t.cdf(
        np.abs(fit.x - x.x) / np.sqrt(fit.var / fit.df + x.var / x.df), fit.df
        + x.df)) * 2
    return fit
Пример #51
0
def neuropowertable(request):

    # Get the template/step status
    sid = get_session_id(request)
    template = "neuropower/neuropowertable.html"
    context = {}

    # Initiate peak table
    peakform = PeakTableForm()
    form = peakform.save(commit=False)
    form.SID = sid

    # Load model data
    parsdata = ParameterModel.objects.filter(SID=sid)[::-1][0]

    # Compute peaks
    SPM = nib.load(parsdata.location).get_data()
    MASK = nib.load(parsdata.masklocation).get_data()
    if parsdata.ZorT == 'T':
        SPM = -norm.ppf(t.cdf(-SPM, df=float(parsdata.DoF)))
    peaks = cluster.cluster(SPM, float(parsdata.ExcZ), MASK)

    if len(peaks) < 30:
        context[
            "text"] = "There are too few peaks for a good estimation.  Either the ROI is too small or the screening threshold is too high."
        form.err = context["text"]
    else:
        pvalues = np.exp(-float(parsdata.ExcZ) *
                         (np.array(peaks.peak) - float(parsdata.ExcZ)))
        pvalues = [max(10**(-6), p) for p in pvalues]
        peaks['pval'] = pvalues
        form.data = peaks
        context["peaks"] = peaks.to_html(classes=["table table-striped"])
    form.save()

    # Get step status
    context["steps"] = get_neuropower_steps(template, sid)

    return render(request, template, context)
def calculateFairness(communities, predictions):
    comm_count = {0: 0, 1: 0}
    predicted_count = {0: 0, 1: 0}

    for comm in predictions:
        comm_code = int(comm)
        if (communities[comm_code]['ethnicity'] == 0) or (communities[comm_code]['ethnicity'] == 1):
            comm_count[1] += 1
            predicted_count[1] += predictions[comm]
        else:
            comm_count[0] += 1
            predicted_count[0] += predictions[comm]

    df = comm_count[0]+comm_count[1]-2

    if (predicted_count[0] == 0) and (predicted_count[1] == 0):
        return 1

    means = {0: predicted_count[0]/comm_count[0], 1: predicted_count[1]/comm_count[1]}

    variances = {0: 0, 1: 0}

    for comm in predictions:
        comm_code = int(comm)
        if (communities[comm_code]['ethnicity'] == 0) or (communities[comm_code]['ethnicity'] == 1):
            variances[1] += (predictions[comm]-means[1])**2
        else:
            variances[0] += (predictions[comm]-means[0])**2

    variances = {0: variances[0]/(comm_count[0]-1), 1: variances[1]/(comm_count[1]-1)}

    sigma = ((((comm_count[0]-1)*(variances[0]**2))+((comm_count[1]-1)*(variances[1]**2)))/(comm_count[0]+comm_count[1]-2))**0.5

    t_stat = (means[0]-means[1])/(sigma*(((1/comm_count[0])+(1/comm_count[1]))**0.5))

    fairness = (1 - t.cdf(abs(t_stat), df)) * 2
    fairness = fairness*100

    return fairness
Пример #53
0
def corr_dep_ttest(data1, data2, len_train_set, len_test_set, alpha):
    # Implementation of Corrected resampled t -test statistic
    # based on https://gist.github.com/jensdebruijn/13e8eeda85eb8644ac2a4ac4c3b8e732
    # confidenz level 1 - alpha
    # alpha = 0.05
    n = len(data1)
    differences = [(data1[i] - data2[i]) for i in range(n)]
    if np.sum(differences) == 0:
        return np.nan, np.nan, np.nan, np.nan
    sd = stdev(differences)
    divisor = 1 / n * sum(differences)
    test_training_ratio = len_test_set / len_train_set
    denominator = sqrt(1 / n + test_training_ratio) * sd
    t_stat = divisor / denominator
    # degrees of freedom
    df = n - 1
    # calculate the critical value
    cv = t.ppf(1.0 - alpha, df)
    # calculate the p-value
    p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
    # return everything
    return t_stat, df, cv, p
Пример #54
0
    def test_tstudent(self):
        from scipy.stats import t
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 1)

        df = 2.74
        mean, var, skew, kurt = t.stats(df, moments='mvsk')

        x = np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100)
        ax.plot(x, t.pdf(x, df), 'r-', lw=5, alpha=0.6, label='t pdf')

        rv = t(df)
        ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

        vals = t.ppf([0.001, 0.5, 0.999], df)
        np.allclose([0.001, 0.5, 0.999], t.cdf(vals, df))

        r = t.rvs(df, size=1000)

        ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
        ax.legend(loc='best', frameon=False)
        self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
Пример #55
0
def one_sample_t(X, SigLevel):
    from scipy.stats import t 
    import numpy as np

    nume = np.mean(X, axis=2)    
    Xvar = np.var(X, axis=2)
    denume = np.sqrt(Xvar / X.shape[2])
    XT = np.divide(nume, denume) 
    PvalST = np.zeros(XT.shape)
    for ii in range(XT.shape[0]): 
        for jj in range(XT.shape[1]):
            temp = 1-t.cdf(XT[ii,jj], X.shape[2] - 1)
            if temp > 0.5 : 
                PvalST[ii, jj] = 1 - temp
            else: 
                PvalST[ii, jj] = temp
                
    SigST = 1.0 * (PvalST < SigLevel/2)
    nSigS  = np.sum(SigST, axis=0)     
    nSigT  = np.sum(SigST, axis=1) 
    statOut = {'T':XT,'Pval':PvalST,'Sig':SigST,'nSigS':nSigS,'nSigT':nSigT} 
    return  statOut #[XT, PvalST, SigST, nSig, nSigT]
Пример #56
0
def compute_diff_mean(base_case, new_case):
    """
    This function computes the mean difference and applies t-test to compute
    the p value.
    @Input:
        base_case: base case data set
        new_case: new case data set
    @Output:
        diff: mean difference
        p_value: p value of the t-test
    """
    
    #compute the number of observations for both cases
    n_base = len(base_case)
    n_new = len(new_case)
    
    #compute the average
    average_base = np.mean(base_case.iloc[:,0])
    average_new = np.mean(new_case.iloc[:,0])
    
    #compute the standard deviation
    var_base = np.var(base_case.iloc[:,0])
    var_new = np.var(new_case.iloc[:,0])
    
    #compute the difference of deaths
    diff = average_new - average_base
    
    #compute t-score
    t_score =  np.absolute(diff)/np.sqrt(var_base/n_base+var_new/n_new)
    
    #compute degrees of freedom
    #df = ((var_base/n_base + var_new/n_new)**2)/(((var_base/n_base)**2)/(n_base-1) + ((var_new/n_new)**2)/(n_new-1))
    
    #compute the p-value
    p_value = t.cdf(t_score, min(n_base-1, n_new-1))
    
    #return result
    return diff, 2*(1-p_value)
Пример #57
0
def show_bootstrap_statistics(clf, X, y, features):
    num_features = len(features)

    coefs = []
    for i in range(num_features):
        coefs.append([])

    for _ in range(BOOTSTRAP_ITERATIONS):
        X_sample, y_sample = resample(X, y)
        clf.fit(X_sample, y_sample)
        for i, c in enumerate(get_normalized_coefs(clf)):
            coefs[i].append(c)

    subpoi_index = features.index('SUBPOI')
    poi_index = features.index('COMPLEX_POI')
    building_index = features.index('Building')
    coefs[building_index] = coefs[subpoi_index]
    coefs[poi_index] = coefs[subpoi_index]

    intervals = []

    print()
    print('***** Bootstrap statistics *****')
    print('{:<20}{:<20}{:<10}{:<10}'.format('Feature', '95% interval',
                                            't-value', 'Pr(>|t|)'))
    print()
    for i, cs in enumerate(coefs):
        values = np.array(cs)
        lo = np.percentile(values, 2.5)
        hi = np.percentile(values, 97.5)
        interval = '({:.3f}, {:.3f})'.format(lo, hi)
        tv = np.mean(values) / np.std(values)
        pr = (1.0 - t.cdf(x=abs(tv), df=len(values))) * 0.5

        stv = '{:.3f}'.format(tv)
        spr = '{:.3f}'.format(pr)
        print('{:<20}{:<20}{:<10}{:<10}'.format(features[i], interval, stv,
                                                spr))
Пример #58
0
def compute_waitlist_death_diff(base_case, new_case):
    """
    This function computes the difference of deaths between the base case and
    another case. It appleis t-test to compute p-value.
    @Input:
        @base_case: base case death data set
        @new_case: new case deathd data set
    @Output:
        @diff: death difference
        @p_value: p value of the test
    """
    
    #count the number of observations in each case
    n_base = len(base_case)
    n_new = len(new_case)
    
    #compute the average number of deaths
    average_base = np.mean(base_case)
    average_new = np.mean(new_case)
    
    #compute the variance of deaths
    var_base = np.var(base_case)
    var_new = np.var(new_case)
    
    #compute the difference of deaths
    diff = average_new - average_base
    
    #compute the t score
    t_score =  np.absolute(diff)/np.sqrt(var_base/n_base+var_new/n_new)
    
    #compute degrees of freedom
    #df = ((var_base/n_base + var_new/n_new)**2)/(((var_base/n_base)**2)/(n_base-1) + ((var_new/n_new)**2)/(n_new-1))
    
    #compute p_value
    p_value = t.cdf(t_score, min(n_base-1, n_new-1))
    
    #return results
    return diff, 2*(1-p_value)
Пример #59
0
def whelchs_t(a_mu, a_var, b_mu, b_var, a_n, b_n):
    """

    :param np.ndarray a_mu:
    :param np.ndarray a_var:
    :param np.ndarray b_mu:
    :param np.ndarray b_var:
    :param int a_n:
    :param int b_n:
    :return float, float: statistic and p-value
    """
    df = whelch_satterthwaite_df(a_var, b_var, a_n, b_n)
    numerator = a_mu - b_mu  # (samples, genes)
    denominator = np.sqrt(a_var + b_var)  # (samples, genes)
    statistic = numerator / denominator  # (samples, genes)

    # statistic has NaNs where there are no observations of a or b (DivideByZeroError)
    statistic[np.isnan(statistic)] = 0
    median_statistic = np.median(np.abs(statistic), axis=0)
    p = (1 - t.cdf(median_statistic, df)) * 2  # p-value
    ci_95 = np.percentile(np.abs(statistic), [2.5, 97.5], axis=0).T

    return median_statistic, p, ci_95
Пример #60
0
def dependent_ttest(data1, data2, alpha):
	# calculate means
	mean1, mean2 = mean(data1), mean(data2)
	# number of paired samples
	n = len(data1)
	# sum squared difference between observations
	d1 = sum([(data1[i]-data2[i])**2 for i in range(n)])
	# sum difference between observations
	d2 = sum([data1[i]-data2[i] for i in range(n)])
	# standard deviation of the difference between means
	sd = sqrt((d1 - (d2**2 / n)) / (n - 1))
	# standard error of the difference between the means
	sed = sd / sqrt(n)
	# calculate the t statistic
	t_stat = (mean1 - mean2) / sed
	# degrees of freedom
	df = n - 1
	# calculate the critical value
	cv = t.ppf(1.0 - alpha, df)
	# calculate the p-value
	p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
	# return everything
	return t_stat, df, cv, p