Пример #1
0
def detect_differentially_abundant_features(seqGroup1, seqGroup2,
                                            parentSeqGroup1, parentSeqGroup2,
                                            coverage, B, preferences,
                                            progress):
    numFeatures = len(seqGroup1)
    n1 = len(seqGroup1[0])
    n2 = len(seqGroup2[0])

    # convert to proportions
    propGroup1 = []
    for r in xrange(0, numFeatures):
        row = []
        for c in xrange(0, n1):
            row.append(float(seqGroup1[r][c]) / parentSeqGroup1[r][c])
        propGroup1.append(row)

    propGroup2 = []
    for r in xrange(0, numFeatures):
        row = []
        for c in xrange(0, n2):
            row.append(float(seqGroup2[r][c]) / parentSeqGroup2[r][c])
        propGroup2.append(row)

    # calculate t-statistics for unpooled variances for each feature
    T_statistics, effectSizes, notes = calc_twosample_ts(
        propGroup1, propGroup2)

    # generate statistics using non-parametric t-test based on permutations of the t-statistic
    pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs = permuted_statistics(
        propGroup1, propGroup2, seqGroup1, seqGroup2, T_statistics, coverage,
        B, progress)
    if progress != None and progress.wasCanceled():
        return [], [], [], [], [], []

    # generate p values for sparse data using fisher's exact test
    fishers = Fishers(preferences)
    diffBetweenProp = DiffBetweenPropAsymptoticCC(preferences)
    for r in xrange(0, numFeatures):
        if sum(seqGroup1[r]) < n1 and sum(seqGroup2[r]) < n2:
            p1, p2, note = fishers.hypothesisTest(sum(seqGroup1[r]),
                                                  sum(seqGroup2[r]),
                                                  sum(parentSeqGroup1[r]),
                                                  sum(parentSeqGroup2[r]))
            l, u, es, note = diffBetweenProp.run(sum(seqGroup1[r]),
                                                 sum(seqGroup2[r]),
                                                 sum(parentSeqGroup1[r]),
                                                 sum(parentSeqGroup2[r]),
                                                 coverage)
            pValuesOneSided[r] = p1
            pValuesTwoSided[r] = p2
            lowerCIs[r] = l
            upperCIs[r] = u
            effectSizes[r] = es
            notes[r] = "heuristic: statistics calculated with Fisher's test"

    return pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes
Пример #2
0
	def testDiffBetweenPropAsymptoticCC(self):
		"""Verify computation of Difference between proportions asymptotic CI method with continuity correction"""
		from stamp.plugins.samples.confidenceIntervalMethods.DiffBetweenPropAsymptoticCC import DiffBetweenPropAsymptoticCC
		diffBetweenPropAsymptoticCC = DiffBetweenPropAsymptoticCC(preferences)
		
		lowerCI, upperCI, effectSize, _ = diffBetweenPropAsymptoticCC.run(table1[0], table1[1], table1[2], table1[3], 0.95)
		self.assertAlmostEqual(lowerCI, -13.3167148125733)
		self.assertAlmostEqual(upperCI, 39.98338147924)
		self.assertAlmostEqual(effectSize, 13.333333333)
				
		lowerCI, upperCI, effectSize, _ = diffBetweenPropAsymptoticCC.run(table2[0], table2[1], table2[2], table2[3], 0.95)
		self.assertAlmostEqual(lowerCI, 0.271407084568653)
		self.assertAlmostEqual(upperCI, 0.328592915431347)
		self.assertAlmostEqual(effectSize, 0.3)
Пример #3
0
def detect_differentially_abundant_features(seqGroup1, seqGroup2, parentSeqGroup1, parentSeqGroup2, coverage, B, preferences, progress):
	numFeatures = len(seqGroup1)   
	n1 = len(seqGroup1[0])
	n2 = len(seqGroup2[0])
	
	# convert to proportions
	propGroup1 = []
	for r in xrange(0, numFeatures):
		row = []
		for c in xrange(0, n1):
			row.append(float(seqGroup1[r][c]) / parentSeqGroup1[r][c])
		propGroup1.append(row)
			
	propGroup2 = []
	for r in xrange(0, numFeatures):
		row = []
		for c in xrange(0, n2):
			row.append(float(seqGroup2[r][c]) / parentSeqGroup2[r][c])
		propGroup2.append(row)

	# calculate t-statistics for unpooled variances for each feature
	T_statistics, effectSizes, notes = calc_twosample_ts(propGroup1, propGroup2)

	# generate statistics using non-parametric t-test based on permutations of the t-statistic
	pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs = permuted_statistics(propGroup1, propGroup2, seqGroup1, seqGroup2, T_statistics, coverage, B, progress)
	if progress != None and progress.wasCanceled():
		return [], [], [], [], [], []
	
	# generate p values for sparse data using fisher's exact test
	fishers = Fishers(preferences)
	diffBetweenProp = DiffBetweenPropAsymptoticCC(preferences)
	for r in xrange(0, numFeatures):
		if sum(seqGroup1[r]) < n1 and sum(seqGroup2[r]) < n2:
			p1, p2, note = fishers.hypothesisTest(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r]))
			l, u, es, note = diffBetweenProp.run(sum(seqGroup1[r]), sum(seqGroup2[r]), sum(parentSeqGroup1[r]), sum(parentSeqGroup2[r]), coverage)
			pValuesOneSided[r] = p1 
			pValuesTwoSided[r] = p2 
			lowerCIs[r] = l
			upperCIs[r] = u
			effectSizes[r] = es
			notes[r] = "heuristic: statistics calculated with Fisher's test"

	return pValuesOneSided, pValuesTwoSided, lowerCIs, upperCIs, effectSizes, notes