def TwoSamples_tTest(x,y, SignificanceLevel=0.05): # Analyze data n = len(x) m = len(y) s_x = np.std(x,ddof=1) s_y = np.std(y,ddof=1) x_bar = np.mean(x) y_bar = np.mean(y) # Perform test statistic DOFs = n+m-2 S_pool = np.sqrt(1/DOFs * ( (n-1)*s_x**2 + (m-1)*s_y**2 )) T = (x_bar - y_bar) / (S_pool * np.sqrt(1/n + 1/m)) # Compute p value from scipy.stats.distributions import t if T >= 0: p = 2 * (1-t.cdf(T,DOFs)) else: p = 2 * t.cdf(T, DOFs) # Compute confidence interval CI T_Interval = np.array(t.interval(1-SignificanceLevel,DOFs)) RejectionRange = np.array([[-np.inf,T_Interval[0]],[T_Interval[1],np.inf]]) # Compute CI for difference in means MeansInterval = (x_bar-y_bar) + T_Interval * S_pool * np.sqrt(1/n + 1/m) return T, p, RejectionRange, MeansInterval
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) if n1 >= 2 and n2 >= 2: # calculate proportions propGroup1 = [] for i in xrange(0, n1): propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) propGroup2 = [] for i in xrange(0, n2): propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) # calculate p-value, effect size, and CI meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) normVarG1 = varG1 / n1 normVarG2 = varG2 / n2 unpooledVar = normVarG1 + normVarG2 sqrtUnpooledVar = math.sqrt(unpooledVar) if unpooledVar != 0: # p-value T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) ) pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*sqrtUnpooledVar upperCI = dp + tCritical*sqrtUnpooledVar else: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' else: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 note = 'degenerate case: both groups must contain at least 2 samples' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) try: if n1 < 2 or n2 < 2: raise Exception('degenerate case: both groups must contain at least 2 samples') # calculate proportions propGroup1 = [] for i in xrange(0, n1): propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) propGroup2 = [] for i in xrange(0, n2): propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) # calculate statistics meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) dof = n1 + n2 - 2 pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2) sqrtPooledVar = math.sqrt(pooledVar) denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2) # p-value T_statistic = (meanG1 - meanG2) / denom pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*denom upperCI = dp + tCritical*denom except Exception as note: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 except ZeroDivisionError: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
def build_aggregate_model(self): aggModel = SingleModelData() aggModel.nfolds = len(self.modelData) aggModel.aggregate = True aggModel.unit = self.unit aggModel.preprocFile = self.modelData[0].preprocFile aggModel.stimClass = self.modelData[0].stimClass aggModel.freqs = self.modelData[0].freqs aggModel.numChans = self.modelData[0].numChans aggModel.timeLags = self.modelData[0].timeLags aggModel.high_freq = self.modelData[0].high_freq aggModel.low_freq = self.modelData[0].low_freq aggModel.is_surprise = self.modelData[0].is_surprise aggModel.is_count_response = self.modelData[0].is_count_response aggModel.output_nls = [md.output_nl for md in self.modelData] #compute aggregate STRF params strfs = np.array([smd.strf for smd in self.modelData]) aggModel.strf = strfs.mean(axis=0).squeeze() aggModel.strfStd = strfs.std(axis=0).squeeze() smoothedStrfs = [] g1 = gaussian_2d_kernel(1) for strf in strfs: sstrf = convolve2d(strf, g1, mode='same') smoothedStrfs.append(sstrf) smoothedStrfs = np.array(smoothedStrfs) aggModel.smoothedStrf = smoothedStrfs.mean(axis=0).squeeze() aggModel.smoothedStrfStd = smoothedStrfs.std(axis=0).squeeze() strf_tstat = np.abs(aggModel.smoothedStrf / aggModel.smoothedStrfStd) df = len(self.modelData) - 1 strf_pvals = (1 - tdist.cdf(strf_tstat, df))*2 aggModel.smoothedStrfPvals = strf_pvals #compute aggregate output nl minx_vals = [] maxx_vals = [] for nl in aggModel.output_nls: minx_vals.append(np.min(nl.domain)) maxx_vals.append(np.max(nl.domain)) minx_vals = np.array(minx_vals) maxx_vals = np.array(maxx_vals) minx = minx_vals.max() maxx = maxx_vals.min() avg_x = np.linspace(minx, maxx, 200) avg_x = avg_x[1:-2] y = np.zeros([len(aggModel.output_nls), len(avg_x)]) if minx < maxx: for k,nl in enumerate(aggModel.output_nls): xnl = nl.domain.squeeze() ynl = nl.range.squeeze() if len(xnl.shape) > 0 and len(ynl.shape) > 0: f = interp1d(xnl, ynl) y[k, :] = f(avg_x) agg_nl = OutputNL() agg_nl.domain = avg_x agg_nl.range = y.mean(axis=0) agg_nl.range_std = y.std(axis=0) aggModel.output_nl = agg_nl self.aggregateModel = aggModel
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) try: if n1 < 2 or n2 < 2: raise Exception('degenerate case: both groups must contain at least 2 samples') # calculate proportions propGroup1 = [] for i in xrange(0, n1): if parentSeqGroup1[i] > 0: propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) else: propGroup1.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' propGroup2 = [] for i in xrange(0, n2): if parentSeqGroup2[i] > 0: propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) else: propGroup2.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' # calculate statistics meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) dof = n1 + n2 - 2 pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2) sqrtPooledVar = math.sqrt(pooledVar) denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2) # p-value T_statistic = (meanG1 - meanG2) / denom pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*denom upperCI = dp + tCritical*denom except Exception as note: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 except ZeroDivisionError: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
DF = (n1 - 1) + (n2 - 1) print('SE=', SE, 'DF=', DF) # calculate t-score tscore = np.abs(((x1 - x2) - 0) / SE) print(tscore) # calculate t-value from scipy.stats.distributions import t # set confident level equal c1 c1 = 0.95 alpha = 1 - c1 t95 = t.ppf(1.0 - alpha / 2.0, DF) print(t95) # set confident level equal c1 c1 = 0.94 alpha = 1 - c1 t95 = t.ppf(1.0 - alpha / 2.0, DF) print(t95) f = t.cdf(tscore, DF) - t.cdf(-tscore, DF) print(f)
# Unexplained variation uv = (rd**2).sum(1) / (dx.shape[1] - 4) # (x'x)^{-1} = (vs^2v')^{-1} xtx = np.dot(vt.T / s**2, vt) # Standard error for the interaction term se = np.sqrt(uv * xtx[3, 3]) # Z-scores for the interaction term zs = params[:, 3] / se zs = zs.dropna() zsa = np.abs(zs) # P-values for the interaction term pv = student_t.cdf(-np.abs(zs), xmat.shape[0] - xmat.shape[1]) # Bonferroni threshold bt = norm.ppf(1 - 0.025 / zs.shape[0]) # Calculate the FDR for a range of threshold from 2 to 5. fdr = [] n = len(zs) for t in np.linspace(0, 6, 20): d = np.sum(zsa > t) f = 2 * n * norm.cdf(-t) / d fdr.append([t, f, d]) fdr = np.asarray(fdr) # Plots relating to FDR plt.clf()
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) if n1 >= 2 and n2 >= 2: # calculate proportions propGroup1 = [] for i in xrange(0, n1): if parentSeqGroup1[i] > 0: propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) else: propGroup1.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' propGroup2 = [] for i in xrange(0, n2): if parentSeqGroup2[i] > 0: propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) else: propGroup2.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' # calculate p-value, effect size, and CI meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = var(propGroup1, ddof=1) varG2 = var(propGroup2, ddof=1) normVarG1 = varG1 / n1 normVarG2 = varG2 / n2 unpooledVar = normVarG1 + normVarG2 sqrtUnpooledVar = math.sqrt(unpooledVar) if unpooledVar != 0: # p-value T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) ) pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*sqrtUnpooledVar upperCI = dp + tCritical*sqrtUnpooledVar else: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' else: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 note = 'degenerate case: both groups must contain at least 2 samples' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note