def reg(x, y): """Conduct OLS regression on y = beta * x Return betas and p_values from t-test of betas """ try: dim_x = x.shape constant = numpy.ones(dim_x[0]) x = numpy.append(constant.T, x.T) x = x.reshape(dim_x[1] + 1, dim_x[0]).T beta = numpy.dot(numpy.dot(numpy.linalg.inv(numpy.dot(x.T, x)),x.T),y) epsilon = y - numpy.dot(x, beta.T) var_cov = numpy.dot(numpy.dot(epsilon.T, epsilon),numpy.linalg.inv(numpy.dot(x.T, x))) / \ (dim_x[0] - dim_x[1]) std_err = numpy.diagonal(var_cov) ** 0.5 t_stat = beta / std_err p_values = list() for i in t_stat: if t.cdf(i,dim_x[0]-dim_x[1]-1)>0.5: p_values.append(2*(1-t.cdf(i,dim_x[0]-dim_x[1]-1))) else: p_values.append(2*t.cdf(i,dim_x[0]-dim_x[1]-1)) except: dim_x = x.shape beta = numpy.zeros(dim_x[1] + 1) p_values = numpy.zeros(dim_x[1] + 1) print("Error: " + str(sys.exc_info()[1]) + " occurs when conducting OLS regression") return beta, p_values
def t_to_z(mr, dof): data = mr.get_data() # Select just the nonzero voxels nonzero = data[data!=0] # We will store our results here Z = np.zeros(len(nonzero)) # Select values less than or == 0, and greater than zero c = np.zeros(len(nonzero)) k1 = (nonzero <= c) k2 = (nonzero > c) # Subset the data into two sets t1 = nonzero[k1] t2 = nonzero[k2] # Calculate p values for <=0 p_values_t1 = t.cdf(t1, df = dof) z_values_t1 = norm.ppf(p_values_t1) # Calculate p values for > 0 p_values_t2 = t.cdf(-t2, df = dof) z_values_t2 = -norm.ppf(p_values_t2) Z[k1] = z_values_t1 Z[k2] = z_values_t2 # Create new nifti empty_nii = np.zeros(mr.shape) empty_nii[mr.get_data()!=0] = Z Z_nii_fixed = nib.nifti1.Nifti1Image(empty_nii,affine=mr.get_affine(),header=mr.get_header()) return Z_nii_fixed
def TtoZ(t_stat_map,output_nii,dof): '''TtoZ: for details see https://github.com/vsoch/TtoZ Also provided for command line. t_stat_map: file path to t stat image output_nii: output nifti file dof: degrees of freedom (typically number subjects - 2) ''' print("Converting map %s to Z-Scores..." %(t_stat_map)) mr = nibabel.load(t_stat_map) data = mr.get_data() # Select just the nonzero voxels nonzero = data[data!=0] # We will store our results here Z = np.zeros(len(nonzero)) # Select values less than or == 0, and greater than zero c = np.zeros(len(nonzero)) k1 = (nonzero <= c) k2 = (nonzero > c) # Subset the data into two sets t1 = nonzero[k1] t2 = nonzero[k2] # Calculate p values for <=0 p_values_t1 = t.cdf(t1, df = dof) z_values_t1 = norm.ppf(p_values_t1) # Calculate p values for > 0 p_values_t2 = t.cdf(-t2, df = dof) z_values_t2 = -norm.ppf(p_values_t2) Z[k1] = z_values_t1 Z[k2] = z_values_t2 # Write new image to file empty_nii = np.zeros(mr.shape) empty_nii[mr.get_data()!=0] = Z Z_nii_fixed = nibabel.nifti1.Nifti1Image(empty_nii, affine=mr.get_affine(), header=mr.get_header()) nibabel.save(Z_nii_fixed,output_nii)
def t_uneqvar(list_1, list_2, **kwargs): """ Performs a t-test without the equal variance assumption of Student's t. For example, see: Ruxton, G. D. (2006). The unequal variance t-test is an alternative to Student's t-test and the Mann-Whitney U test Behavioral Ecology, 17(4), 688–690. Arguments: list_1, list_2: list of values from the first and final condition, respectively Returns: a dict containing keys: 'p': the p-value resulting from a two-tailed test for change note two-tailed p-values are preferred to avoid numerical issues associated with highly significant p-values 'dir': the direction 't': the t statistic 'df': the calculated degrees of freedom """ from scipy.stats import t from numpy import std, mean from math import sqrt two_tailed = test_kwarg('two_tailed', kwargs, [True, False]) the_return_dict = {} var_1 = (std(list_1, ddof = 1))**2 var_2 = (std(list_2, ddof = 1))**2 the_u = var_2 / var_1 n_1 = len(list_1) * 1. n_2 = len(list_2) * 1. df = (1./n_1 + the_u/n_2)**2/(1/(n_1**2*(n_1-1)) + the_u**2/(n_2**2*(n_2-1))) # Use 1 - 2 here before calculating p so more positive changes corresponse to smaller p t_val = (mean(list_1) - mean(list_2)) / sqrt((var_1 / n_1) + (var_2 / n_2)) # One-sided p the_p = t.cdf(t_val, df) t_val = -1. * t_val if two_tailed: if t_val > 0: the_p = 2. * the_p the_dir = '+' else: # It is numerically preferable to avoid # y = 2 * (1 - x) in case x is close to zero the_p = 2. * t.cdf(t_val, df) the_dir = '-' return_dict = {'t': t_val, 'p': the_p, 'df': df, 'dir': the_dir} return return_dict
def show_bootstrap_statistics(clf, X, y, features): num_features = len(features) coefs = [] for i in range(num_features): coefs.append([]) for _ in range(BOOTSTRAP_ITERATIONS): X_sample, y_sample = resample(X, y) clf.fit(X_sample, y_sample) for i, c in enumerate(get_normalized_coefs(clf)): coefs[i].append(c) poi_index = features.index('POI') building_index = features.index('Building') coefs[building_index] = coefs[poi_index] intervals = [] print() print('***** Bootstrap statistics *****') print('{:<20}{:<20}{:<10}{:<10}'.format('Feature', '95% interval', 't-value', 'Pr(>|t|)')) print() for i, cs in enumerate(coefs): values = np.array(cs) lo = np.percentile(values, 2.5) hi = np.percentile(values, 97.5) interval = '({:.3f}, {:.3f})'.format(lo, hi) tv = np.mean(values) / np.std(values) pr = (1.0 - t.cdf(x=abs(tv), df=len(values))) * 0.5 stv = '{:.3f}'.format(tv) spr = '{:.3f}'.format(pr) print('{:<20}{:<20}{:<10}{:<10}'.format(features[i], interval, stv, spr))
def neuropowertable(request): # Get the template/step status sid = get_session_id(request) template = "neuropower/neuropowertable.html" steps = get_neuropower_steps(template,sid) context = {"steps":steps} if not ParameterModel.objects.filter(SID=sid): # Should not be able to reach this condition context["text"] = "No data found. Go to 'Input' and fill out the form." return render(request,template,context) else: sid = request.session.session_key #why are we getting session id again? parsdata = ParameterModel.objects.filter(SID=sid)[::-1][0] SPM = nib.load(parsdata.location).get_data() if parsdata.ZorT == 'T': SPM = -norm.ppf(t.cdf(-SPM,df=float(parsdata.DoF))) cluster.cluster(SPM,parsdata.ExcZ,parsdata.peaktable) peaks = pd.read_csv(parsdata.peaktable,sep="\t") if len(peaks) < 30: context["text"] = "There are too few peaks for a good estimation. Either the ROI is too small or the screening threshold is too high." else: pvalues = np.exp(-float(parsdata.ExcZ)*(np.array(peaks.peak)-float(parsdata.ExcZ))) pvalues = [max(10**(-6),p) for p in pvalues] peaks['pval'] = pvalues peakform = PeakTableForm() form = peakform.save(commit=False) form.SID = sid form.data = peaks form.save() context["peaks"] = peaks.to_html(classes=["table table-striped"]) return render(request,template,context)
def GeneratePDF(Data, method = 'Robust_Student_t', lower_threshold = 0.15, upper_threshold = 0.85): '''Generate the pdf estimate of the data Input: /Data/ data to estimate pdf on /method/ Method of estimation. Available methods: 'Robust_Student_t'; 'KDE'; 'Normal' /lower_threshold/ in percentage /upper_threshold/ in percentage Output: /pdf/ fitted pdf /cdf/ fitted cdf ''' x = np.linspace(min(Data), max(Data), 100) if method == 'Robust_Student_t': nu, mu, sigma = uvtfit(Data) pdf = t.pdf(x, nu, mu, sigma) cdf = t.cdf(x, nu, mu, sigma) lower = t.ppf(lower_threshold, nu, mu, sigma) upper = t.ppf(upper_threshold, nu, mu, sigma) elif method == 'Normal': mu, sigma = norm.fit(Data) pdf = norm.pdf(x, mu, sigma) cdf = norm.cdf(x, mu, sigma) lower = norm.ppf(lower_threshold, mu, sigma) upper = norm.ppf(upper_threshold, mu, sigma) elif method == 'KDE': kernal = gaussian_kde(Data) pdf = kernal.evaluate(x) cdf = np.array([kernal.integrate_box(x[0], x[i+1]) for i in range(len(x)-1)]) lower = np.percentile(cdf, lower_threshold*100) upper = np.percentile(cdf, upper_threshold*100) return x, pdf, cdf, lower, upper
def compute_zscore(self): #get background and peri rates bg_rates = np.array([t.bg_rate for t in self.trials]) peri_rates = np.array([t.peri_rate for t in self.trials]) bg_counts = np.array([t.bg_count for t in self.trials]) peri_counts = np.array([t.peri_count for t in self.trials]) rate_diff = peri_rates - bg_rates count_diff = peri_counts - bg_counts if peri_counts.sum() + bg_counts.sum() < len(self.trials): pval = 0.5 #kludge z = 0.0 tstat = 0.0 else: rate_diff_std = rate_diff.std(ddof=1) count_diff_std = count_diff.std(ddof=1) if rate_diff_std == 0.0: print 'Very strange that this happenend, rate_diff_std=%0.3f, count_diff_std=%0.3f, stim_num=%d' % (rate_diff_std, count_diff_std, self.stim_number) rate_diff_std = 1.0 z = rate_diff.mean() / rate_diff_std tstat = z*np.sqrt(len(bg_rates)) pval = (1.0 - tdist.cdf(np.abs(tstat), len(bg_rates)-1))*2 #two-tailed t-test pvalue self.zscore = z self.tstat = tstat self.pval = pval
def t_stat(X, c, beta, MRSS, df): """ parameters ---------- X: 2D array (n_trs * number of regressors) design matrix. c: a contrast vector. betas: 2D array (number of regressors x n_vols) estimated betas for linear model. MRSS: 1D array of length n_volx Mean residual sum of squares. df: int n - rank of X. Returns ______ t: a vector of length n_vols t statistics for each voxel. p: a vector of length n_vols p values for each voxel. """ X = np.asarray(X) c = np.atleast_2d(c).T # calculate bottom half of t statistic SE = np.sqrt(MRSS * c.T.dot(npl.pinv(X.T.dot(X)).dot(c))) t = c.T.dot(beta) / SE # Get p value for t value using cumulative density dunction # (CDF) of t distribution ltp = t_dist.cdf(t, df) # lower tail p p = 1 - ltp # upper tail p return t, p
def pcor(X,Y,Z): """ computes the correlation amtrix of X and Y conditioning on Z """ if X.ndim==1: X = X[:,SP.newaxis] if Y.ndim==1: Y = Y[:,SP.newaxis] if Z is None: return STATS.pearsonr(X,Y) if Z.ndim==1: Z = Z[:,SP.newaxis] nSamples = X.shape[0] betaX, _, _, _ = LA.lstsq(Z,X) betaY, _, _, _ = LA.lstsq(Z,Y) Xres = X - SP.dot(Z,betaX) Yres = Y - SP.dot(Z,betaY) corr_cond = SP.corrcoef(Xres[:,0],Yres[:,0])[0,1] dz = Z.shape[1] # dimension of conditioning variable df = max(nSamples - dz - 2,0) # degrees of freedom with warnings.catch_warnings(): warnings.filterwarnings("ignore") tstat = corr_cond / SP.sqrt(1.0 - corr_cond ** 2) # calculate t statistic tstat = math.sqrt(df) * tstat pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1) # calculate p value return corr_cond,pv_cond
def t_stat(data, X_matrix): """ Return the estimated betas, t-values, degrees of freedom, and p-values for the glm_multi regression Parameters ---------- data_4d: numpy array of 4 dimensions The image data of one subject, one run X_matrix: numpy array The design matrix for glm_multi Note that the fourth dimension of `data_4d` (time or the number of volumes) must be the same as the number of rows that X has. Returns ------- beta: estimated beta values t: t-values of the betas df: degrees of freedom p: p-values corresponding to the t-values and degrees of freedom """ beta = glm_beta(data, X_matrix) # Calculate the parameters - b hat beta = np.reshape(beta, (-1, beta.shape[-1])).T fitted = X_matrix.dot(beta) # Residual error y = np.reshape(data, (-1, data.shape[-1])) errors = y.T - fitted # Residual sum of squares RSS = (errors**2).sum(axis=0) df = X_matrix.shape[0] - npl.matrix_rank(X_matrix) # Mean residual sum of squares MRSS = RSS / df # calculate bottom half of t statistic Cov_beta=npl.pinv(X_matrix.T.dot(X_matrix)) SE =np.zeros(beta.shape) for i in range(X_matrix.shape[-1]): c = np.zeros(X_matrix.shape[-1]) c[i]=1 c = np.atleast_2d(c).T SE[i,:]= np.sqrt(MRSS* c.T.dot(npl.pinv(X_matrix.T.dot(X_matrix)).dot(c))) zeros = np.where(SE==0) SE[zeros] = 1 t = beta / SE t[:,zeros] =0 # Get p value for t value using CDF of t didstribution ltp = t_dist.cdf(abs(t), df) p = 1 - ltp # upper tail return beta.T, t, df, p
def corrParallel(X,Y=None,df=None): """ computes the mxk correlation matrix between the mxn matrix X and the kxn matrix Z """ if Y is None: return corrParallelSym(X,df=df) assert X.shape[1]==Y.shape[1], 'ouch, samples do not match' nSamples = X.shape[1] Xstd = X.T Xstd-= Xstd.mean(0) Xstd/= Xstd.std(0) Ystd = Y.T Ystd-= Ystd.mean(0) Ystd/= Ystd.std(0) corr = SP.dot(Xstd.T,Ystd)/nSamples if df is None: df = max(nSamples - 2,0) # degrees of freedom with warnings.catch_warnings(): warnings.filterwarnings("ignore") tstat = corr / SP.sqrt(1.0 - corr ** 2) # calculate t statistic tstat = math.sqrt(df) * tstat pv = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1) # calculate p value return corr,pv
def t_fun(): # accumulate from -infinity to 3.077 res = t.cdf(3.0777, df=1) print(res) # probability of middle a, b = t.interval(0.95, 1) print(a, b)
def _t(M, Rho, nu): N = Rho.shape[0] mu = np.zeros(N) # zero mean x = mvt.multivariate_t_rvs(mu,Rho,nu,M) # generate T RV's U = t.cdf(x, nu) return U
def sample_procedure(dist1, dist2): non_bon = 0 bon = 0 for i in xrange(trials): flag = True for _ in xrange(repeats): # First sample sample1 = np.random.choice(dist1, (sample_size, )) mean1 = np.mean(sample1) std1 = np.std(sample1) # Second sample sample2 = np.random.choice(dist2, (sample_size, )) mean2 = np.mean(sample2) std2 = np.std(sample2) # T test result = t_test(mean1, mean2, std1, std2, sample_size, sample_size) p_value = (1 - t.cdf(result, sample_size - 1)) * 2 if p_value < alpha and flag: non_bon += 1 flag = False if p_value < bonferroni: bon += 1 break print "Time of reject in alpha: ", non_bon / trials print "Time of reject in bonferroni: ", bon / trials
def t_test2(data1,data2): """ Compute t test for two samples with significantly different variances (use f_test_var) to test if they have same mean H0: samples have same means (p-value close to one). Parameters ---------- data1: n,1 - dim array with data data2: n,1 - dim array with data Returns ------- p-value of t test, the t value itself and the degrees of freedom Notes ----- See 3rd Edition of Numerical recipes chapter 14.2.1, p.728 """ N1, N2 = len(data1), len(data2) mean1, mean2 = np.mean(data1), np.mean(data2) var1, var2= np.var(data1,ddof = 1), np.var(data2,ddof = 1) T = (mean1 - mean2) / np.sqrt(var1/N1 + var2/N2) # Eq. 14.2.3 df = (var1/N1 + var2/N2)**2. / ( (var1/N1)**2./(N1 - 1) + (var2/N2)**2./(N2 - 1)) return t.cdf(T, df), T, df
def t_test1(data1,data2): """ Compute t test for two samples with same variance to test if they have same mean H0: samples have same means (p-value close to one). Parameters ---------- data1: n,1 - dim array with data data2: n,1 - dim array with data Returns ------- p-value of t test, the t value itself and the degrees of freedom Notes ----- See 3rd Edition of Numerical recipes chapter 14.2.1, p.727 """ if not isinstance(data1,np.ndarray): data1 = np.array(data1) if not isinstance(data2,np.ndarray): data2 = np.array(data2) N1, N2 = len(data1), len(data2) mean1, mean2 = np.mean(data1), np.mean(data2) # Eq. 14.2.1 sD = np.sqrt( (np.sum( (data1 - np.ones(N1) * mean1) ** 2.) + np.sum( (data2 - np.ones(N2) * mean2) ** 2.)) / (N1 + N2 - 2.) * (1./N1 + 1./N2)) T = (mean1 - mean2) / sD return t.cdf(T, N1 + N2 - 2),T,N1 + N2 - 2
def calc_cdf(mu_null, n, mean, stddev): one_sample_t = ( mean - mu_null)/ (stddev/math.sqrt(n)) if n > 50: cdf_val = normcdf(one_sample_t,0. , 1.) else: cdf_val = t.cdf(one_sample_t, n - 1) return cdf_val
def tdist(var, year, x, eu=None): s,f = seznam_vzorec(var, year, eu=eu) n = len(s) x = abs(x) # print "=TDIST(%s, %s, 2)" % (x, n-2) result = ( 1-t.cdf(x, n-2) ) * 2 return result
def solve_t(t_value=None, f=None, p=None): max_1_none(t_value, f, p) if t_value == None: return t(f, p) elif f == None: raise NotImplemented("Not implemented yet - sorry") elif p == None: return sympify(sci_t.cdf(float(t_value), float(f)))
def corrsig(N, c=None, p=.95): # if c exists, this returns the cutoff import numpy as np from scipy.stats import t if not c is None: return t.cdf(c/np.sqrt((1-c**2)/(N-2)), N-2) else: print "functionality not implemented yet, please query a correlation" return
def log_principal_anomaly(x, N, Q, S): assert N > 3, "N must be more than 3, is %r" % N mean = float(S) / N val = mean - abs(mean - x) scale = sqrt(max(0, (float(N) * Q - pow(S, 2))) / ((N + 1) * (N - 3))) if scale == 0: raise ("Scale is 0!", N, Q, S) t_cdf = t.cdf(val, N - 1, loc=mean, scale=scale) return -log(2 * t_cdf)
def simulateCopula(simulations=10, type=str('g'), rho=float, lamda=tuple, tDof=4, basketSize=5, useGPU=False): result = [] """ $\tau = F^{-1}(u) = -\frac{log(1-u)}{\lambda}$ """ print 'simulating t distribution' if type == 't' else 'simulating gaussian dist' for z in xrange(0, simulations): # for the t distribution we use the same method but # sample from the chisquared distribution # if GPU is enabled, hand over to GPU to provide random number sample if useGPU and type == 'g': z1, z2, z3, z4, z5 = rng.getPseudoRandomNumbers_Standard_cuda(basketSize) else: z1, z2, z3, z4, z5 = random.chisquare(tDof, size=basketSize) if type == 't' else random.normal(size=5) # z1, z2, z3, z4, z5 = chi2.rvs(1, size=5) if type == 't' else random.normal(size=5) x1 = z1 # using factorised copula procedure # $A_i = w_iZ + \sqrt{1-w{^2}{_i}\Epsilon_i $ x2, x3, x4, x5 = [z1 * rho + sqrt(1 - square(rho)) * zn for zn in [z2, z3, z4, z5]] # converting to normal variables from t or normal distribution successfully # via cdf of relevant distribution if type == 't': u1, u2, u3, u4, u5 = [t.cdf(x, 1) for x in [x1, x2, x3, x4, x5]] else: u1, u2, u3, u4, u5 = [norm.cdf(x) for x in [x1, x2, x3, x4, x5]] u = [u1, u2, u3, u4, u5] # $\tau_i = -\frac{-log(1-u)}{\lambda_i} $ tau1, tau2, tau3, tau4, tau5 = [-log(1 - u) / lamda[index] for index, u in enumerate(u)] result.append({'z1': z1, 'z2': z2, 'z3': z3, 'z4': z4, 'z5': z5, 'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5, 'u1': u1, 'u2': u2, 'u3': u3, 'u4': u4, 'u5': u5, 'tau1': tau1, 'tau2': tau2, 'tau3': tau3, 'tau4': tau4, 'tau5': tau5, }) return DataFrame(result)
def main(): parser = argparse.ArgumentParser( description="Convert a whole brain T score map to a Z score map without loss of precision for strongly positive and negative values.") parser.add_argument("t_stat_map", help="T-score statistical map in the form of a 3D NIFTI file (.nii or .nii.gz).", type=nifti_file) parser.add_argument("dof", help="Degrees of freedom (eg. for a two-sample T-test: number of subjects in group - 2)",type=int) parser.add_argument("--output_nii", help="The name for the output Z-Score Map.",type=str,default="z_stat_map.nii") args = parser.parse_args() print "Converting map %s to Z-Scores..." %(args.t_stat_map) mr = nib.load(args.t_stat_map) data = mr.get_data() # Select just the nonzero voxels nonzero = data[data!=0] # We will store our results here Z = np.zeros(len(nonzero)) # Select values less than or == 0, and greater than zero c = np.zeros(len(nonzero)) k1 = (nonzero <= c) k2 = (nonzero > c) # Subset the data into two sets t1 = nonzero[k1] t2 = nonzero[k2] # Calculate p values for <=0 p_values_t1 = t.cdf(t1, df = args.dof) z_values_t1 = norm.ppf(p_values_t1) # Calculate p values for > 0 p_values_t2 = t.cdf(-t2, df = args.dof) z_values_t2 = -norm.ppf(p_values_t2) Z[k1] = z_values_t1 Z[k2] = z_values_t2 # Write new image to file empty_nii = np.zeros(mr.shape) empty_nii[mr.get_data()!=0] = Z Z_nii_fixed = nib.nifti1.Nifti1Image(empty_nii,affine=mr.get_affine(),header=mr.get_header()) nib.save(Z_nii_fixed,args.output_nii)
def pVal(mu1,mu2,s1,s2,n1,n2): se = np.sqrt(s1*s1/n1+s2*s2/n2) df = (s1**2/n1 + s2**2/n2)**2 / ( ((s1**2 / n1)**2 / (n1 - 1)) + ((s2**2 / n2)**2 / (n2 - 1))) tVal = (mu1 - mu2)/se return (1 - t.cdf(tVal,df)) #[email protected]:ryu577/base.git
def SimulateUniforms(self): """docstring for Simulate""" mean = [0.0] * self.size cov = self.copula_covariance s = chi2.rvs(self.dof) Z = multivariate_normal(mean, cov) X = [math.sqrt(self.dof)/math.sqrt(s) * z for z in Z] Y = [t.cdf(x, self.dof) for x in X] return Y
def comparison_test(arr1, arr2, name1, name2, tail=1, bonf=1): stats = _stats(arr1, arr2) p_value = 1 - t.cdf(t_test(*stats), min(len(arr1), len(arr2)) - 1) if tail == 2: #In case for two tail test p_value *= 2 alpha = ALPHA / bonf print "The p-value between {0} and {1} is {2:.5f} and it {3} the null hypothesis".format( name1, name2, p_value, "reject" if p_value < alpha else "does not reject") print "Actual Alpha is", format(alpha, '.4f') if p_value < alpha: print "The effect size is {0:.5f}".format(cohen_d(*stats))
def sig_test(r, n, twotailed = True): import numpy as np from scipy.stats import t as tdist df = n - 2 #Create t-statistic #Use absolute value to be able to deal with negative scores t = np.abs(r * np.sqrt(df/(1-r**2))) p = (1 - tdist.cdf(t,df)) if twotailed: p = p * 2 return p
def cdf(self, arg): """Cumulative density function (CDF). Parameters ---------- arg : array Grid of point to evaluate CDF at Returns ------- array CDF values. Same shape as the input. """ a = self.__const_a() b = self.__const_b() y = (b*arg+a)/(1+np.sign(arg+a/b)*self.lam) * (1-2/self.eta)**(-.5) cond = arg < -a/b return cond * (1-self.lam) * t.cdf(y, self.eta) \ + ~cond * (-self.lam + (1+self.lam) * t.cdf(y, self.eta))
def naiveTopt(icm,cutoff=.05): #like topt but doesn't correct for tail direction """ Returns cluster by fitting t-test and returning residues above cutoff """ param = t.fit(icm,loc=np.median(icm)) x = np.linspace(-1,1,200) cdf = t.cdf(x,param[0],loc=param[1], scale=param[2]) minx = np.max(x[np.nonzero(cdf<cutoff)]) # deal with direction of tail: cursect = np.array([i for i in range(icm.size) if icm[i]<minx]) return cursect
def __init__(self,x,y): self.x = x self.y = y (self.n,self.r) = x.shape xx = np.dot(x.T,x) xy = np.dot(x.T,y) self.xxi = np.linalg.inv(xx) self.b = np.linalg.solve(xx,xy).reshape(-1,1) e = y - np.dot(x,self.b) self.resid = e self.vb = self.genvariance(e) self.se = np.sqrt(np.diagonal(self.vb)).reshape(-1,1) self.tstat = np.divide(self.b,self.se) self.pval = 2*t.cdf(-np.abs(self.tstat),df=self.n-self.r) self.rsq = 1-e.var()/y.var() self.adjrsq = 1-(1-self.rsq)*(self.n-1)/(self.n-self.r) self.logl = -self.n/2*(np.log(2*np.pi*e.var())+1) self.aic = 2*self.r-2*self.logl self.bic = np.log(self.n)*self.r-2*self.logl nulllike = -self.n/2*(np.log(2*np.pi*y.var())+1) self.deviance = 2*(self.logl-nulllike)
def two_sample_Welch_t_test(data1, data2, scale_estimator=lambda x: np.std(x)): """ --Independent two-sample test-- Assuming Gaussian distributions and UNequal variances and unequal sample sizes. Hypothesis H0: mu_1 == mu__2 scale_estimator is a function that estimates the square root of the variance (~st.dev.) """ sample_mean1 = np.mean(data1) sample_mean2 = np.mean(data2) n1 = len(data1) n2 = len(data2) s1 = scale_estimator(data1) s2 = scale_estimator(data2) s_delta = np.sqrt((s1**2) / n1 + (s2**2) / n2) t_statistic = (sample_mean1 - sample_mean2) / s_delta degrees_of_freedom = s_delta**4 / ( (s1**2 / n1)**2 / (n1 - 1) + (s2**2 / n2)**2 / (n2 - 1)) # Welch–Satterthwaite equation p_value = (1 - t.cdf(abs(t_statistic), degrees_of_freedom) ) * 2 # Look up from Student's t-distribution return p_value, t_statistic, degrees_of_freedom
def student_test(data1, data2): print("Student test of Ex = Ey") n, m = len(data1), len(data2) criteria = t.cdf((1 - alfa) / 2, len(data1) + len(data2) - 2) # test = ttest_ind(data1, data2) test = ((np.mean(data1) - np.mean(data2)) * math.sqrt(n * m *(n + m - 2))) \ / (math.sqrt((n + m) * (n * np.var(data1) + m * np.var(data2)))) if test > criteria: print(f"Ho отвергается т.к значение > {criteria}") print("Ex != Ey") else: print(f"Ho подтверждается т.к значение < {criteria}") print("Ex = Ey") print() return criteria, test
def independent_ttest(data1, data2, alpha): # calculate means mean1, mean2 = mean(data1), mean(data2) print(mean1) print(mean2) mean1_glob = mean1 mean2_glob = mean2 # calculate standard errors se1, se2 = sem(data1), sem(data2) # standard error on the difference between the samples sed = sqrt(se1**2.0 + se2**2.0) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df = len(data1) + len(data2) - 2 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return everything return t_stat, df, cv, p
def simple_linear_reg(y, x): """一元线性回归""" assert len(x) == len(y) n = len(x) assert n > 1 mean_x = mean(x) mean_y = mean(y) beta1 = covariance(x, y) / variance(x) beta0 = mean_y - beta1 * mean_x y_hat = [beta0 + beta1 * e for e in x] ss_residual = sum((e1 - e2)**2 for e1, e2 in zip(y, y_hat)) se_model = sqrt(ss_residual / (n - 2)) t_value = beta1 / (se_model / sqrt((n - 1) * variance(x))) p = 2 * (1 - t.cdf(abs(t_value), n - 2)) return beta0, beta1, t_value, n - 2, p
def bharath_Ttest1s(sMean,sd,pMean,n,alpha=0.05): tstatistics=(sMean-pMean)/(sd/np.sqrt(n)) if tstatistics>0: tstatistics=-tstatistics print("\nT Statistics:",tstatistics) tcritical=[t.ppf(q=alpha/2,df=n-1),-t.ppf(q=alpha/2,df=n-1)] print("T critical Values are :",tcritical) if tstatistics<tcritical[0] or tstatistics>tcritical[1]: print("Reject the Null Hypothesis") else: print("Fail to reject the Null Hypothesis") pvalue=2*t.cdf(tstatistics,df=n-1) print("\nPvalue value is:",pvalue) if pvalue < 0.05: print("Reject the Null Hypothesis") else: print("Fails to reject the Null Hypothesis") print("\n")
def data_process_3(): data = [3, -3, 3, 12, 15, -16, 17, 19, 23, -24, 32] # Problem 2 Question 1 # confidence interval c1 = 0.95 samp_size = len(data) avg = np.mean(data) sd = np.std(data, ddof=1) stand_err = sd / np.sqrt(samp_size) t_c = t.ppf(1 - (1 - c1) / 2, df=samp_size - 1) intervals = (avg - (t_c * sd) / np.sqrt(samp_size), avg + (t_c * sd) / np.sqrt(samp_size)) # Problem 2 Question 2 c2 = 0.9 t_c_2 = t.ppf(1 - (1 - c2) / 2, df=samp_size - 1) intervals2 = (avg - (t_c_2 * sd) / np.sqrt(samp_size), avg + (t_c_2 * sd) / np.sqrt(samp_size)) # Problem 2 Question 3 new_sd = 16.836 new_std_err = new_sd / np.sqrt(samp_size) z_c = norm.ppf(1 - (1 - c1) / 2) intervals3 = (avg - (z_c * new_sd) / np.sqrt(samp_size), avg + (z_c * new_sd) / np.sqrt(samp_size)) # Problem 2 Question 4 # solve for t_c when lower interval endpoint is zero (mu = 0) t_c_new = avg / (sd / np.sqrt(samp_size)) # find p value p_val = 2 * t.cdf(-abs(t_c_new), df=samp_size - 1) new_c = 1 - p_val intervals4 = (avg - (t_c_new * sd) / np.sqrt(samp_size), avg + (t_c_new * sd) / np.sqrt(samp_size)) return (samp_size, avg, sd, stand_err, t_c, intervals), (t_c_2, intervals2), (avg, new_std_err, z_c, intervals3), ( t_c_new, p_val, new_c, intervals4)
def student_t(t_input: Tuple[str, float], radius: float, size: float, ignore: bool) -> float: """ Function to calculate the false positive fraction for a given sigma level (Mawet et al. 2014). Parameters ---------- t_input : tuple(str, float) Tuple with the input type ('sigma' or 'fpf') and the input value. radius : float Aperture radius (pix). size : float Separation of the aperture center (pix). ignore : bool Ignore neighboring apertures of the point source to exclude the self-subtraction lobes. Returns ------- float False positive fraction (FPF). """ num_ap = int(math.pi*radius/size) if ignore: num_ap -= 2 # Note that the number of degrees of freedom is given by nu = n-1 with n the number of samples. # The number of samples is equal to the number of apertures minus 1 (i.e. the planet aperture). # See Section 3 of Mawet et al. (2014) for more details on the Student's t distribution. if t_input[0] == 'sigma': t_result = 1. - t.cdf(t_input[1], num_ap-2, loc=0., scale=1.) elif t_input[0] == 'fpf': t_result = t.ppf(1. - t_input[1], num_ap-2, loc=0., scale=1.) return t_result
def test_two_sample_welch_test(self): sal_a = self.data.loc[self.data['discipline'] == 'A']['salary'] sal_b = self.data.loc[self.data['discipline'] == 'B']['salary'] ttest = tTest(y1=sal_a, y2=sal_b) test_summary = ttest.test_summary assert_almost_equal(test_summary['Sample 1 Mean'], np.mean(sal_a)) assert_almost_equal(test_summary['Sample 2 Mean'], np.mean(sal_b)) assert_almost_equal(test_summary['t-statistic'], -3.1386989278486013) assert_almost_equal(test_summary['degrees of freedom'], 377.89897288941387) assert_almost_equal( test_summary['p-value'], t.cdf(test_summary['t-statistic'], test_summary['degrees of freedom']) * 2) assert test_summary['alternative'] == 'two-sided' assert test_summary['test description'] == "Two-Sample Welch's t-test" ttest_group = tTest(group=self.data['discipline'], y1=self.data['salary']) test_group_summary = ttest_group.test_summary assert_almost_equal(test_summary['Sample 1 Mean'], test_group_summary['Sample 1 Mean']) assert_almost_equal(test_summary['Sample 2 Mean'], test_group_summary['Sample 2 Mean']) assert_almost_equal(test_summary['p-value'], test_group_summary['p-value']) assert_almost_equal(test_summary['degrees of freedom'], test_group_summary['degrees of freedom'], 5) assert_almost_equal(test_summary['t-statistic'], test_group_summary['t-statistic']) assert test_group_summary['alternative'] == 'two-sided' assert test_group_summary[ 'test description'] == "Two-Sample Welch's t-test"
def dependent_corr(self, xy, xz, yz, n, twotailed=False, method='steiger'): """ Calculates the statistic significance between two dependent correlation coefficients @param xy: correlation coefficient between x and y @param xz: correlation coefficient between x and z @param yz: correlation coefficient between y and z @param n: number of elements in x, y and z @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method @param conf_level: confidence level, only works for 'zou' method @param method: defines the method uses, 'steiger' or 'zou' @return: t and p-val """ if method == 'steiger': d = xy - xz determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz av = (xy + xz) / 2 cube = (1 - yz) * (1 - yz) * (1 - yz) t2 = d * np.sqrt( (n - 1) * (1 + yz) / (((2 * (n - 1) / (n - 3)) * determin + av * av * cube))) p = 1 - t.cdf(abs(t2), n - 3) if twotailed: p *= 2 return t2, p elif method == 'zou': L1 = self.rz_ci(xy, n)[0] U1 = self.rz_ci(xy, n)[1] L2 = self.rz_ci(xz, n)[0] U2 = self.rz_ci(xz, n)[1] rho_r12_r13 = self.rho_rxy_rxz(xy, xz, yz) lower = xy - xz - pow((pow((xy - L1), 2) + pow( (U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5) upper = xy - xz + pow((pow((U1 - xy), 2) + pow( (xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5) return lower, upper else: raise Exception('Wrong method!')
def dependent_corr(xy, xz, yz, n, twotailed=False, conf_level=None, method='steiger'): """ Calculates the statistic significance between two dependent correlation coefficients @param xy: correlation coefficient between x and y @param xz: correlation coefficient between x and z @param yz: correlation coefficient between y and z @param n: number of elements in x, y and z @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method @param conf_level: confidence level, only works for 'zou' method @param method: defines the method uses, 'steiger' or 'zou' @return: t and p-val """ if method == 'steiger': d = xy - xz determin = 1 - xy ** 2 - xz ** 2 - yz ** 2 + 2 * xy * xz * yz av = (xy + xz)/2 cube = (1 - yz) * (1 - yz) * (1 - yz) e = (n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + (av ** 2) * cube)) if e < 0: return np.nan, np.nan t2 = d * np.sqrt(e) p = 1 - t.cdf(abs(t2), n - 2) if twotailed: p *= 2 " p is the probability of the null hypothesis" return t2, p elif method == 'zou': if conf_level==None: conf_level=0.95 L1 = rz_ci(xy, n, conf_level=conf_level)[0] U1 = rz_ci(xy, n, conf_level=conf_level)[1] L2 = rz_ci(xz, n, conf_level=conf_level)[0] U2 = rz_ci(xz, n, conf_level=conf_level)[1] rho_r12_r13 = rho_rxy_rxz(xy, xz, yz) lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5) upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5) return lower, upper else: raise Exception('Wrong method!')
def DiscretizeNormalizeParam(tau, k_, model, par): # This function discretizes the one-step normalized pdf when the # distribution is parametrically specified # INPUTS # tau :[scalar] projection horizon # k_ :[scalar] coarseness level # model :[string] specifies the distribution: shiftedLN,.TStudent t.T,Uniform # par :[struct] model parameters # OUTPUTS # xi :[1 x k_] centers of the bins # f :[1 x k_] discretized pdf of invariant ## Code # grid a = -norm.ppf(10**(-15),0,sqrt(tau)) h = 2*a/k_ xi = arange(-a+h,a+h,h) # discretized initial pdf (standardized) if model=='shiftedLN': m, s,_ = ShiftedLNMoments(par) csi = par.c mu = par.mu sig = sqrt(par.sig2) if sign(par.skew)==1: M = (m-csi)/s f = 1/h*(lognorm.cdf(xi+h/2+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(xi-h/2+M,sig,scale=exp(mu-log(s)))) f[k_] = 1/h*(lognorm.cdf(-a+h/2+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(-a+M,sig,scale=exp(mu-log(s))) +\ lognorm.cdf(a+M,sig,scale=exp(mu-log(s)))-lognorm.cdf(a-h/2+M,sig,scale=exp(mu-log(s)))) elif sign(par.skew)==-1: M = (m+csi)/s f = 1/h*(lognorm.cdf(-(xi-h/2+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(xi+h/2+M),sig,scale=exp(mu-log(s)))) f[k_-1] = 1/h*(lognorm.cdf(-(-a+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(-a+h/2+M),sig,scale=exp(mu-log(s))) +\ lognorm.cdf(-(a-h/2+M),sig,scale=exp(mu-log(s)))-lognorm.cdf(-(a+M),sig,scale=exp(mu-log(s)))) elif model=='Student t': nu = par f = 1/h*(t.cdf(xi+h/2,nu)-t.cdf(xi-h/2,nu)) f[k_-1] = 1/h*(t.cdf(-a+h/2,nu)-t.cdf(-a,nu) + t.cdf(a,nu)-t.cdf(a-h/2,nu)) elif model=='Uniform': mu = par.mu sigma = par.sigma f = zeros(k_) f[(xi>=-mu/sigma)&(xi<=(1-mu)/sigma)] = sigma return xi, f
def independent_t_test(self, data1, data2, alpha): '''Reference: BrownLee, Jason, 2019, 'How to Code the Student's t-Test from Scratch in Python', MachineLeanring Mastery, retrieved from: https://machinelearningmastery.com/how-to-code-the-students-t-test-from-scratch-in-python/ ''' rejected = False tReject = False pReject = False # calculate means mean1, mean2 = np.mean(data1), np.mean(data2) # calculate standard errors se1, se2 = sem(data1), sem(data2) # standard error on the difference between the samples sed = np.sqrt(se1**2.0 + se2**2.0) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df = len(data1) + len(data2) - 2 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return everything print('t=%.3f, df=%d, cv=%.3f, p=%.3f' % (t_stat, df, cv, p)) # interpret via critical value if abs(t_stat) <= cv: print('Accept null hypothesis that the means are equal.') else: print('Reject the null hypothesis that the means are equal.') tReject = True # interpret via p-value if p > alpha: print('Accept null hypothesis that the means are equal.') else: print('Reject the null hypothesis that the means are equal.') pReject = True rejected = tReject and pReject #return t_stat, df, cv, p return rejected
def independent_ttest(data1,data2,alpha): from scipy.stats import sem from scipy.stats import t from numpy import mean from math import sqrt # calcualte means mean1,mean2 = mean(data1),mean(data2) # caculate standard errors se1, se2 = sem(data1),sem(data2) # standard error on the difference between the samples sed = sqrt(se1**2.0 + se2**2.0) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df = len(data1)+len(data2) - 2 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return results return t_stat, df, cv, p
def paired_t_test(y1_score, y2_score, alpha): """ 成对t检验 :param y1_score: y1 :param y2_score: y2 :param alpha: α :return: t_stat, df, cv, pv """ k = len(y1_score) d = [y1_score[i] - y2_score[i] for i in range(k)] d = np.array(d) # d_ 均值 d_ = np.mean(d) # s 标准差 s = np.std(d) # calculate the t statistic t_stat = abs(k**0.5 * d_ / s) # degrees of freedom df = k - 1 cv = t.ppf(1.0 - alpha, df) pv = (1.0 - t.cdf(t_stat, df)) * 2.0 return t_stat, df, cv, pv
def compute_conf_interval(alpha, x, samp_mean, samp_mean_dev, var=None, df=None, central=True): if central: low_val = (1 - alpha) / 2 upp_val = (1 + alpha) / 2 else: low_val = 1 - alpha upp_val = alpha if var is not None: cdf_arr = norm.cdf(x, samp_mean, samp_mean_dev) else: x = (samp_mean - x) / (samp_mean_dev) x = x[::-1] cdf_arr = t.cdf(x, df) low_int = x[cdf_arr < low_val][-1] upp_int = x[cdf_arr > upp_val][0] if central: if var is not None: print("Alpha: ", alpha, "; Central CI (Gauss): ", np.round((low_int, upp_int), 2)) else: print( "Alpha: ", alpha, "; Central CI (tStudent): ", np.round((samp_mean + low_int * samp_mean_dev, samp_mean + upp_int * samp_mean_dev), 2)) else: if var is not None: print("Alpha: ", alpha, "; Lower CI (Gauss): ", round(low_int, 2)) print("Alpha: ", alpha, "; Upper CI (Gauss): ", round(upp_int, 2)) else: print("Alpha: ", alpha, "; Lower CI: ", round(samp_mean + low_int * samp_mean_dev, 2)) print("Alpha: ", alpha, "; Upper CI: ", round(samp_mean + upp_int * samp_mean_dev, 2))
def fit_model(y, x, covars=None): """ y is n X 1 - phenotype x is n X 1 - site under test covars (optional) is n X p Returns three arrays of (1+p+m) X 1 - coefficients, t-statistic and p-values: the first is the coefficients array where coefficients[0] if the coef of the intercept coefficients[-1] if the coef of the site under test (the m from input x) coefficients[1],..., coefficients[p+1] the coefficient of the covariates the second array holds the f-statistics - again index 0 if for the intercept, index -1 for site under test and 1 to p+1 for covars the third array holds the p-values - again index 0 if for the intercept, index -1 for site under test and 1 to p+1 for covars to sum up - in order to get thecoeffs, p-values and the t-statistic of the site under test (input x) extract coefficients[-1], t-statistic[-1] and p-values[-1] """ if x.ndim == 1: x = x.reshape(-1, 1) # make sure dim is (n,1) and not(n,) if y.ndim == 1: y = y.reshape(-1, 1) X = x if covars is not None: X = column_stack((covars, X)) regr = linear_model.LinearRegression(False) n = X.shape[0] # number of sites X = np.concatenate((np.ones((n, 1)), X), axis=1) mdl = regr.fit(X, y) sse = np.sum( (mdl.predict(X) - y)**2, axis=0) / float(X.shape[0] - X.shape[1]) se = np.array([ np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X)))) for i in range(sse.shape[0]) ]) Ts = mdl.coef_ / se p = 2 * (1 - t.cdf(np.abs(Ts), y.shape[0] - X.shape[1])) return mdl.coef_.reshape(-1), Ts.reshape(-1), p.reshape( -1) #coefficients, t-statistic and p-values
def independent_ttest(data1, data2, alpha): # calculate means mean1, mean2 = data1.mean(), data2.mean() # Standard deviation std1, std2 = data1.std(), data2.std() # Standard errors n1, n2 = len(df_columns), len(df_columns2) print('n1: ' + str(n1)) print('n2: ' + str(n2)) se1, se2 = std1 / math.sqrt(n1), std2 / math.sqrt(n2) # standard error on the difference between the samples sed = math.sqrt(se1**2.0 + se2**2.0) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df = len(data1) + len(data2) - 2 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return everything return t_stat, df, cv, p
def tExpectedImprovement(self, tau, mean, std, nu=3.0): """ Expected Improvement acquisition function. Only to be used with `tStudentProcess` surrogate. Parameters ---------- tau: float Best observed function evaluation. mean: float Point mean of the posterior process. std: float Point std of the posterior process. Returns ------- float Expected improvement. """ gamma = (mean - tau - self.eps) / (std + self.eps) return gamma * std * t.cdf(gamma, df=nu) + std * (1 + (gamma**2 - 1) / (nu - 1)) * t.pdf( gamma, df=nu)
def h_ratio_fit(response, contrast) -> Tuple[int, OptimizeResult]: if isinstance(response[0], np.ndarray): y, sd = response else: y = np.nanmean(response, axis=0) sd = np.nanstd(response, axis=0) initial_values = np.array([2.0 * y[-1], 0.25, 4.0]) bounds = ((0, None), (0, 1), (1.0, 10.0)) fit = minimize(h_ratio_ml, initial_values, args=(contrast, y, sd), method="SLSQP", bounds=bounds) fit.evaluate = lambda a: h_ratio(fit.x, a) fit.df = len(y) - len(initial_values) jac = fit.jac.reshape(1, -1) fit.var = np.diag(pinv(jac.T.dot(jac))) fit.parameter_names = ['r_max', 'c_50', 'n'] fit.compare = lambda x: (1 - t.cdf( np.abs(fit.x - x.x) / np.sqrt(fit.var / fit.df + x.var / x.df), fit.df + x.df)) * 2 return fit
def neuropowertable(request): # Get the template/step status sid = get_session_id(request) template = "neuropower/neuropowertable.html" context = {} # Initiate peak table peakform = PeakTableForm() form = peakform.save(commit=False) form.SID = sid # Load model data parsdata = ParameterModel.objects.filter(SID=sid)[::-1][0] # Compute peaks SPM = nib.load(parsdata.location).get_data() MASK = nib.load(parsdata.masklocation).get_data() if parsdata.ZorT == 'T': SPM = -norm.ppf(t.cdf(-SPM, df=float(parsdata.DoF))) peaks = cluster.cluster(SPM, float(parsdata.ExcZ), MASK) if len(peaks) < 30: context[ "text"] = "There are too few peaks for a good estimation. Either the ROI is too small or the screening threshold is too high." form.err = context["text"] else: pvalues = np.exp(-float(parsdata.ExcZ) * (np.array(peaks.peak) - float(parsdata.ExcZ))) pvalues = [max(10**(-6), p) for p in pvalues] peaks['pval'] = pvalues form.data = peaks context["peaks"] = peaks.to_html(classes=["table table-striped"]) form.save() # Get step status context["steps"] = get_neuropower_steps(template, sid) return render(request, template, context)
def calculateFairness(communities, predictions): comm_count = {0: 0, 1: 0} predicted_count = {0: 0, 1: 0} for comm in predictions: comm_code = int(comm) if (communities[comm_code]['ethnicity'] == 0) or (communities[comm_code]['ethnicity'] == 1): comm_count[1] += 1 predicted_count[1] += predictions[comm] else: comm_count[0] += 1 predicted_count[0] += predictions[comm] df = comm_count[0]+comm_count[1]-2 if (predicted_count[0] == 0) and (predicted_count[1] == 0): return 1 means = {0: predicted_count[0]/comm_count[0], 1: predicted_count[1]/comm_count[1]} variances = {0: 0, 1: 0} for comm in predictions: comm_code = int(comm) if (communities[comm_code]['ethnicity'] == 0) or (communities[comm_code]['ethnicity'] == 1): variances[1] += (predictions[comm]-means[1])**2 else: variances[0] += (predictions[comm]-means[0])**2 variances = {0: variances[0]/(comm_count[0]-1), 1: variances[1]/(comm_count[1]-1)} sigma = ((((comm_count[0]-1)*(variances[0]**2))+((comm_count[1]-1)*(variances[1]**2)))/(comm_count[0]+comm_count[1]-2))**0.5 t_stat = (means[0]-means[1])/(sigma*(((1/comm_count[0])+(1/comm_count[1]))**0.5)) fairness = (1 - t.cdf(abs(t_stat), df)) * 2 fairness = fairness*100 return fairness
def corr_dep_ttest(data1, data2, len_train_set, len_test_set, alpha): # Implementation of Corrected resampled t -test statistic # based on https://gist.github.com/jensdebruijn/13e8eeda85eb8644ac2a4ac4c3b8e732 # confidenz level 1 - alpha # alpha = 0.05 n = len(data1) differences = [(data1[i] - data2[i]) for i in range(n)] if np.sum(differences) == 0: return np.nan, np.nan, np.nan, np.nan sd = stdev(differences) divisor = 1 / n * sum(differences) test_training_ratio = len_test_set / len_train_set denominator = sqrt(1 / n + test_training_ratio) * sd t_stat = divisor / denominator # degrees of freedom df = n - 1 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return everything return t_stat, df, cv, p
def test_tstudent(self): from scipy.stats import t import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) df = 2.74 mean, var, skew, kurt = t.stats(df, moments='mvsk') x = np.linspace(t.ppf(0.01, df), t.ppf(0.99, df), 100) ax.plot(x, t.pdf(x, df), 'r-', lw=5, alpha=0.6, label='t pdf') rv = t(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = t.ppf([0.001, 0.5, 0.999], df) np.allclose([0.001, 0.5, 0.999], t.cdf(vals, df)) r = t.rvs(df, size=1000) ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) self.assertEqual(str(ax), "AxesSubplot(0.125,0.11;0.775x0.77)")
def one_sample_t(X, SigLevel): from scipy.stats import t import numpy as np nume = np.mean(X, axis=2) Xvar = np.var(X, axis=2) denume = np.sqrt(Xvar / X.shape[2]) XT = np.divide(nume, denume) PvalST = np.zeros(XT.shape) for ii in range(XT.shape[0]): for jj in range(XT.shape[1]): temp = 1-t.cdf(XT[ii,jj], X.shape[2] - 1) if temp > 0.5 : PvalST[ii, jj] = 1 - temp else: PvalST[ii, jj] = temp SigST = 1.0 * (PvalST < SigLevel/2) nSigS = np.sum(SigST, axis=0) nSigT = np.sum(SigST, axis=1) statOut = {'T':XT,'Pval':PvalST,'Sig':SigST,'nSigS':nSigS,'nSigT':nSigT} return statOut #[XT, PvalST, SigST, nSig, nSigT]
def compute_diff_mean(base_case, new_case): """ This function computes the mean difference and applies t-test to compute the p value. @Input: base_case: base case data set new_case: new case data set @Output: diff: mean difference p_value: p value of the t-test """ #compute the number of observations for both cases n_base = len(base_case) n_new = len(new_case) #compute the average average_base = np.mean(base_case.iloc[:,0]) average_new = np.mean(new_case.iloc[:,0]) #compute the standard deviation var_base = np.var(base_case.iloc[:,0]) var_new = np.var(new_case.iloc[:,0]) #compute the difference of deaths diff = average_new - average_base #compute t-score t_score = np.absolute(diff)/np.sqrt(var_base/n_base+var_new/n_new) #compute degrees of freedom #df = ((var_base/n_base + var_new/n_new)**2)/(((var_base/n_base)**2)/(n_base-1) + ((var_new/n_new)**2)/(n_new-1)) #compute the p-value p_value = t.cdf(t_score, min(n_base-1, n_new-1)) #return result return diff, 2*(1-p_value)
def show_bootstrap_statistics(clf, X, y, features): num_features = len(features) coefs = [] for i in range(num_features): coefs.append([]) for _ in range(BOOTSTRAP_ITERATIONS): X_sample, y_sample = resample(X, y) clf.fit(X_sample, y_sample) for i, c in enumerate(get_normalized_coefs(clf)): coefs[i].append(c) subpoi_index = features.index('SUBPOI') poi_index = features.index('COMPLEX_POI') building_index = features.index('Building') coefs[building_index] = coefs[subpoi_index] coefs[poi_index] = coefs[subpoi_index] intervals = [] print() print('***** Bootstrap statistics *****') print('{:<20}{:<20}{:<10}{:<10}'.format('Feature', '95% interval', 't-value', 'Pr(>|t|)')) print() for i, cs in enumerate(coefs): values = np.array(cs) lo = np.percentile(values, 2.5) hi = np.percentile(values, 97.5) interval = '({:.3f}, {:.3f})'.format(lo, hi) tv = np.mean(values) / np.std(values) pr = (1.0 - t.cdf(x=abs(tv), df=len(values))) * 0.5 stv = '{:.3f}'.format(tv) spr = '{:.3f}'.format(pr) print('{:<20}{:<20}{:<10}{:<10}'.format(features[i], interval, stv, spr))
def compute_waitlist_death_diff(base_case, new_case): """ This function computes the difference of deaths between the base case and another case. It appleis t-test to compute p-value. @Input: @base_case: base case death data set @new_case: new case deathd data set @Output: @diff: death difference @p_value: p value of the test """ #count the number of observations in each case n_base = len(base_case) n_new = len(new_case) #compute the average number of deaths average_base = np.mean(base_case) average_new = np.mean(new_case) #compute the variance of deaths var_base = np.var(base_case) var_new = np.var(new_case) #compute the difference of deaths diff = average_new - average_base #compute the t score t_score = np.absolute(diff)/np.sqrt(var_base/n_base+var_new/n_new) #compute degrees of freedom #df = ((var_base/n_base + var_new/n_new)**2)/(((var_base/n_base)**2)/(n_base-1) + ((var_new/n_new)**2)/(n_new-1)) #compute p_value p_value = t.cdf(t_score, min(n_base-1, n_new-1)) #return results return diff, 2*(1-p_value)
def whelchs_t(a_mu, a_var, b_mu, b_var, a_n, b_n): """ :param np.ndarray a_mu: :param np.ndarray a_var: :param np.ndarray b_mu: :param np.ndarray b_var: :param int a_n: :param int b_n: :return float, float: statistic and p-value """ df = whelch_satterthwaite_df(a_var, b_var, a_n, b_n) numerator = a_mu - b_mu # (samples, genes) denominator = np.sqrt(a_var + b_var) # (samples, genes) statistic = numerator / denominator # (samples, genes) # statistic has NaNs where there are no observations of a or b (DivideByZeroError) statistic[np.isnan(statistic)] = 0 median_statistic = np.median(np.abs(statistic), axis=0) p = (1 - t.cdf(median_statistic, df)) * 2 # p-value ci_95 = np.percentile(np.abs(statistic), [2.5, 97.5], axis=0).T return median_statistic, p, ci_95
def dependent_ttest(data1, data2, alpha): # calculate means mean1, mean2 = mean(data1), mean(data2) # number of paired samples n = len(data1) # sum squared difference between observations d1 = sum([(data1[i]-data2[i])**2 for i in range(n)]) # sum difference between observations d2 = sum([data1[i]-data2[i] for i in range(n)]) # standard deviation of the difference between means sd = sqrt((d1 - (d2**2 / n)) / (n - 1)) # standard error of the difference between the means sed = sd / sqrt(n) # calculate the t statistic t_stat = (mean1 - mean2) / sed # degrees of freedom df = n - 1 # calculate the critical value cv = t.ppf(1.0 - alpha, df) # calculate the p-value p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0 # return everything return t_stat, df, cv, p