def calc_twosample_ts(propGroup1, propGroup2): n1 = len(propGroup1[0]) n2 = len(propGroup2[0]) numFeatures = len(propGroup1) T_statistics = [] effectSizes = [] notes = [] for r in xrange(0, numFeatures): meanG1 = float(sum(propGroup1[r])) / n1 varG1 = variance(propGroup1[r], meanG1) stdErrG1 = varG1 / n1 meanG2 = float(sum(propGroup2[r])) / n2 varG2 = variance(propGroup2[r], meanG2) stdErrG2 = varG2 / n2 dp = meanG1 - meanG2 effectSizes.append(dp * 100) denom = math.sqrt(stdErrG1 + stdErrG2) if denom == 0: notes.append( 'degenerate case: zero variance for both groups; variance set to 1e-6.' ) T_statistics.append(dp / 1e-6) else: notes.append('') T_statistics.append(dp / denom) return T_statistics, effectSizes, notes
def calc_twosample_ts(propGroup1, propGroup2): n1 = len(propGroup1[0]) n2 = len(propGroup2[0]) numFeatures = len(propGroup1) T_statistics = [] effectSizes = [] notes = [] for r in xrange(0, numFeatures): meanG1 = float(sum(propGroup1[r])) / n1 varG1 = variance(propGroup1[r], meanG1) stdErrG1 = varG1 / n1 meanG2 = float(sum(propGroup2[r])) / n2 varG2 = variance(propGroup2[r], meanG2) stdErrG2 = varG2 / n2 dp = meanG1 - meanG2 effectSizes.append(dp*100) denom = math.sqrt(stdErrG1 + stdErrG2) if denom == 0: notes.append('degenerate case: zero variance for both groups; variance set to 1e-6.') T_statistics.append(dp/1e-6) else: notes.append('') T_statistics.append(dp/denom) return T_statistics, effectSizes, notes
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) if n1 >= 2 and n2 >= 2: # calculate proportions propGroup1 = [] for i in xrange(0, n1): propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) propGroup2 = [] for i in xrange(0, n2): propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) # calculate p-value, effect size, and CI meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) normVarG1 = varG1 / n1 normVarG2 = varG2 / n2 unpooledVar = normVarG1 + normVarG2 sqrtUnpooledVar = math.sqrt(unpooledVar) if unpooledVar != 0: # p-value T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar dof = (unpooledVar*unpooledVar) / ( (normVarG1*normVarG1)/(n1-1) + (normVarG2*normVarG2)/(n2-1) ) pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*sqrtUnpooledVar upperCI = dp + tCritical*sqrtUnpooledVar else: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' else: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 note = 'degenerate case: both groups must contain at least 2 samples' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) try: if n1 < 2 or n2 < 2: raise Exception('degenerate case: both groups must contain at least 2 samples') # calculate proportions propGroup1 = [] for i in xrange(0, n1): propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) propGroup2 = [] for i in xrange(0, n2): propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) # calculate statistics meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) dof = n1 + n2 - 2 pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2) sqrtPooledVar = math.sqrt(pooledVar) denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2) # p-value T_statistic = (meanG1 - meanG2) / denom pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*denom upperCI = dp + tCritical*denom except Exception as note: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 except ZeroDivisionError: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
#=======================================================================
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) try: if n1 < 2 or n2 < 2: raise Exception('degenerate case: both groups must contain at least 2 samples') # calculate proportions propGroup1 = [] for i in xrange(0, n1): if parentSeqGroup1[i] > 0: propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) else: propGroup1.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' propGroup2 = [] for i in xrange(0, n2): if parentSeqGroup2[i] > 0: propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) else: propGroup2.append( 0.0 ) note = 'degenerate case: parent group had a count of zero' # calculate statistics meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) dof = n1 + n2 - 2 pooledVar = ((n1 - 1)*varG1 + (n2 - 1)*varG2) / (n1 + n2 - 2) sqrtPooledVar = math.sqrt(pooledVar) denom = sqrtPooledVar * math.sqrt(1.0/n1 + 1.0/n2) # p-value T_statistic = (meanG1 - meanG2) / denom pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf(0.5 * (1.0-coverage), dof) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical*denom upperCI = dp + tCritical*denom except Exception as note: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 except ZeroDivisionError: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' return 1.0 - pValue, 2*min(pValue, 1.0 - pValue), lowerCI*100, upperCI*100, dp*100, note
def run(self, seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, confIntervMethod, coverage): note = '' n1 = len(seqGroup1) n2 = len(seqGroup2) if n1 >= 2 and n2 >= 2: # calculate proportions propGroup1 = [] for i in xrange(0, n1): if parentSeqGroup1[i] > 0: propGroup1.append(float(seqGroup1[i]) / parentSeqGroup1[i]) else: propGroup1.append(0.0) note = 'degenerate case: parent group had a count of zero' propGroup2 = [] for i in xrange(0, n2): if parentSeqGroup2[i] > 0: propGroup2.append(float(seqGroup2[i]) / parentSeqGroup2[i]) else: propGroup2.append(0.0) note = 'degenerate case: parent group had a count of zero' # calculate p-value, effect size, and CI meanG1 = float(sum(propGroup1)) / n1 meanG2 = float(sum(propGroup2)) / n2 dp = meanG1 - meanG2 varG1 = variance(propGroup1, meanG1) varG2 = variance(propGroup2, meanG2) normVarG1 = varG1 / n1 normVarG2 = varG2 / n2 unpooledVar = normVarG1 + normVarG2 sqrtUnpooledVar = math.sqrt(unpooledVar) if unpooledVar != 0: # p-value T_statistic = (meanG1 - meanG2) / sqrtUnpooledVar dof = (unpooledVar * unpooledVar) / ((normVarG1 * normVarG1) / (n1 - 1) + (normVarG2 * normVarG2) / (n2 - 1)) pValue = t.cdf(T_statistic, dof) # CI tCritical = t.isf( 0.5 * (1.0 - coverage), dof ) # 0.5 factor accounts from symmetric nature of distribution lowerCI = dp - tCritical * sqrtUnpooledVar upperCI = dp + tCritical * sqrtUnpooledVar else: if meanG1 != meanG2: pValue = 0.0 # the difference (at least according to these samples) must be true as there is no variance else: pValue = 0.5 lowerCI = dp upperCI = dp note = 'degenerate case: variance of both groups is zero' else: pValue = 0.5 lowerCI = 0.0 upperCI = 0.0 dp = 0.0 note = 'degenerate case: both groups must contain at least 2 samples' return 1.0 - pValue, 2 * min( pValue, 1.0 - pValue), lowerCI * 100, upperCI * 100, dp * 100, note