예제 #1
0
def permutater(healthy, disease, tobs, pVal):
	# Combing to universal dataset
	dataSet = np.append(healthy, disease)
	setCombination = range(len(dataSet))

	# Coming up with every permutation of healthy and disease combinations
	healthyCombination = [list(x) for x in list(combinations(setCombination, len(healthy)))]
	diseaseCombination = []
	for comb in healthyCombination:
		diseaseCombination.append([i for i in setCombination if i not in comb])

	nSuccess = 0
	confidence = 0.01
	numRep = 0
	for i, healthyRange in enumerate(healthyCombination):
		numRep += 1 # Count Number of repititions
		diseaseRange = diseaseCombination[i]
		randHealthy = dataSet[healthyRange]
		randDisease = dataSet[diseaseRange]

		# trand calculation
		trand = ttest(randHealthy, randDisease)
		if(fabs(tobs) <= fabs(trand)):
			nSuccess += 1

		pValue = nSuccess/float(numRep)

		# 2 Boolean Expressions for breaking out of loop
		#interval = (pValue <= 0.01)
		interval = (pValue <= (pVal + 0.01) and (pVal - 0.01 <= pValue))
		if (interval):
			break

	print("Estimated p-value: {}".format(pValue))
	print("Number of Repetitions Required For Estimate: {}".format(numRep))
예제 #2
0
from ttest import ttest
from permutation import permutater

healthy = [1.0, 3.0, 1.6, 2.1, 2.5, 2.7, 2.3, 1.5]
disease = [1.1, 0.5, 2.0, 2.0, 1.2, 1.2, 1.3, 0.5, 1.4, 2.4]

tobs = ttest(healthy, disease)
permutater(healthy, disease, tobs, 0.02291)


예제 #3
0
#!/usr/bin/python

import sys
from ttest import print_ttest as ttest


def read(f):
	d = {}
	for line in f:
		line = line.split()
		query, score = line[0], float(line[1])
		if query != 'all':
			d[query] = score
	return [value for (key, value) in sorted(d.items())]

if __name__ == '__main__':
	if len(sys.argv) < 3:
		print('Supply two data files')
		exit()

	d1_file = sys.argv[1]
	d2_file = sys.argv[2]

	with open(d1_file) as f:
		d1 = read(f)

	with open(d2_file) as f:
		d2 = read(f)

	ttest(d1, d2, False)