def analyzeColumn( summaryDataColumn, listPdbs, outputFileName, numTests=1000000): '''takes a column of summary data, computes average and permutes, outputs''' origDiffMean = getDiffMean(summaryDataColumn, listPdbs) pValCounts = [0., 0.] # above, below for test in range(numTests): newLists = statistics.permuteLists(listPdbs) testMean = getDiffMean(summaryDataColumn, newLists) if testMean >= origDiffMean: pValCounts[0] += 1. if testMean <= origDiffMean: pValCounts[1] += 1. pVals = [pValCount/float(numTests) for pValCount in pValCounts] outputFile = open(outputFileName, 'w') outputFile.write("origDiffMean\tmean1\tmean2\tpValAbove\tpValBelow\n") outputFile.write(str(origDiffMean) + "\t") outputFile.write(str(getMean(summaryDataColumn, listPdbs[0])) + "\t") outputFile.write(str(getMean(summaryDataColumn, listPdbs[1])) + "\t") for pVal in pVals: outputFile.write(str(pVal) + "\t") outputFile.write("\n") outputFile.close()
def makeCompareResidueReportAlternate( pdbs, outputFilename="residue.bfactor", numTests=9999, correctionAll=0., correctionBeta=0.): '''different way to do p-vals, instead of permuting all data, permute the pairs of hyp/meso pdb files.''' residueNames = aminoAcid3Codes # for now ignore what is in the files fileTemp2 = open(outputFilename + ".pvals.txt", 'w') fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w') fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") #first find means means = [{}, {}] betaMeans = [{}, {}] overallList = [[], []] overallBetaList = [[], []] totalMeans, totalBetaMeans = [0., 0.], [0., 0.] for code in residueNames: betaKsLists = [[], []] for lindex in range(2): # either a or b totalList, betaList = [], [] for pdbResidues in pdbs[lindex]: if code in pdbResidues: for atomValues in pdbResidues[code].values(): totalList.extend(atomValues) means[lindex][code] = statistics.computeMean(totalList) for pdbResidues in pdbs[lindex]: if code in pdbResidues: betaList.extend(pdbResidues[code][carbonBetaCodes[code]]) betaKsLists[lindex] = betaList betaMeans[lindex][code] = statistics.computeMean(betaList) overallList[lindex].extend(totalList) overallBetaList[lindex].extend(betaList) #use betaKsLists to compute ks stuff for lindex in range(2): # either a or b totalMeans[lindex] = statistics.computeMean(overallList[lindex]) totalBetaMeans[lindex] = statistics.computeMean(overallBetaList[lindex]) #print means, betaMeans pValueCounts = [{}, {}] # first is above, second is below pValueBetaCounts = [{}, {}] for code in residueNames+["ALL"]: # initialize counts, even for overall total for aboveBelow in range(2): pValueCounts[aboveBelow][code] = 1 pValueBetaCounts[aboveBelow][code] = 1 for test in xrange(numTests): testMeans = [{}, {}] testBetaMeans = [{}, {}] overallList = [[], []] overallBetaList = [[], []] totalTestMeans, totalTestBetaMeans = [0., 0.], [0., 0.] newPdbs = statistics.permuteLists(pdbs) for code in residueNames: for lindex in range(2): # either a or b totalList, betaList = [], [] for pdbResidues in newPdbs[lindex]: if code in pdbResidues: for atomValues in pdbResidues[code].values(): totalList.extend(atomValues) testMeans[lindex][code] = statistics.computeMean(totalList) for pdbResidues in newPdbs[lindex]: if code in pdbResidues: betaList.extend(pdbResidues[code][carbonBetaCodes[code]]) testBetaMeans[lindex][code] = statistics.computeMean(betaList) overallList[lindex].extend(totalList) overallBetaList[lindex].extend(betaList) for lindex in range(2): # either a or b totalTestMeans[lindex] = statistics.computeMean(overallList[lindex]) totalTestBetaMeans[lindex] = \ statistics.computeMean(overallBetaList[lindex]) for code in residueNames: # calc pval for each residue testMeanDiff = testMeans[0][code] - testMeans[1][code] origMeanDiff = means[0][code] - means[1][code] - correctionAll if origMeanDiff <= testMeanDiff: pValueCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueCounts[1][code] += 1 testMeanDiff = testBetaMeans[0][code] - testBetaMeans[1][code] origMeanDiff = betaMeans[0][code] - betaMeans[1][code] - correctionBeta if origMeanDiff <= testMeanDiff: pValueBetaCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueBetaCounts[1][code] += 1 code = "ALL" # fake residue name for overall testMeanDiff = totalTestMeans[0] - totalTestMeans[1] origMeanDiff = totalMeans[0] - totalMeans[1] - correctionAll if origMeanDiff <= testMeanDiff: pValueCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueCounts[1][code] += 1 testMeanDiff = totalTestBetaMeans[0] - totalTestBetaMeans[1] - \ correctionBeta origMeanDiff = totalBetaMeans[0] - totalBetaMeans[1] if origMeanDiff <= testMeanDiff: pValueBetaCounts[0][code] += 1 if origMeanDiff >= testMeanDiff: pValueBetaCounts[1][code] += 1 for code in residueNames: # output time fileTemp2.write(code + " " + str(means[0][code]-means[1][code]) + " ") fileTemp2.write(str(means[0][code]) + " " + str(means[1][code]) + " ") fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ") fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ") fileTemp2.write("\n") fileTemp3.write( code + " " + str(betaMeans[0][code]-betaMeans[1][code]) + " ") fileTemp3.write( str(betaMeans[0][code]) + " " + str(betaMeans[1][code]) + " ") fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ") fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ") fileTemp3.write("\n") code = "ALL" # fake for overall fileTemp2.write("ALL " + str(totalMeans[0]-totalMeans[1]) + " ") fileTemp2.write(str(totalMeans[0]) + " " + str(totalMeans[1]) + " ") fileTemp2.write(str(pValueCounts[0][code]/float(1+numTests)) + " ") fileTemp2.write(str(pValueCounts[1][code]/float(1+numTests)) + " ") fileTemp2.write("\n") fileTemp3.write("ALL " + str(totalBetaMeans[0]-totalBetaMeans[1]) + " ") fileTemp3.write(str(totalBetaMeans[0]) + " " + str(totalBetaMeans[1]) + " ") fileTemp3.write(str(pValueBetaCounts[0][code]/float(1+numTests)) + " ") fileTemp3.write(str(pValueBetaCounts[1][code]/float(1+numTests)) + " ") fileTemp3.write("\n") fileTemp2.close() fileTemp3.close() return totalMeans[0]-totalMeans[1], totalBetaMeans[0]-totalBetaMeans[1]