def permutater(healthy, disease, tobs, pVal): # Combing to universal dataset dataSet = np.append(healthy, disease) setCombination = range(len(dataSet)) # Coming up with every permutation of healthy and disease combinations healthyCombination = [list(x) for x in list(combinations(setCombination, len(healthy)))] diseaseCombination = [] for comb in healthyCombination: diseaseCombination.append([i for i in setCombination if i not in comb]) nSuccess = 0 confidence = 0.01 numRep = 0 for i, healthyRange in enumerate(healthyCombination): numRep += 1 # Count Number of repititions diseaseRange = diseaseCombination[i] randHealthy = dataSet[healthyRange] randDisease = dataSet[diseaseRange] # trand calculation trand = ttest(randHealthy, randDisease) if(fabs(tobs) <= fabs(trand)): nSuccess += 1 pValue = nSuccess/float(numRep) # 2 Boolean Expressions for breaking out of loop #interval = (pValue <= 0.01) interval = (pValue <= (pVal + 0.01) and (pVal - 0.01 <= pValue)) if (interval): break print("Estimated p-value: {}".format(pValue)) print("Number of Repetitions Required For Estimate: {}".format(numRep))
from ttest import ttest from permutation import permutater healthy = [1.0, 3.0, 1.6, 2.1, 2.5, 2.7, 2.3, 1.5] disease = [1.1, 0.5, 2.0, 2.0, 1.2, 1.2, 1.3, 0.5, 1.4, 2.4] tobs = ttest(healthy, disease) permutater(healthy, disease, tobs, 0.02291)
#!/usr/bin/python import sys from ttest import print_ttest as ttest def read(f): d = {} for line in f: line = line.split() query, score = line[0], float(line[1]) if query != 'all': d[query] = score return [value for (key, value) in sorted(d.items())] if __name__ == '__main__': if len(sys.argv) < 3: print('Supply two data files') exit() d1_file = sys.argv[1] d2_file = sys.argv[2] with open(d1_file) as f: d1 = read(f) with open(d2_file) as f: d2 = read(f) ttest(d1, d2, False)