def summarizeOneFeature(tmDataList, columnName, intervals=50, outName="a.txt"): '''takes that column, makes a histogram for each structure''' outFile = open(outName, 'w') columnNum = tmDataList[0].titleToColumn(columnName) treeData = {} overallMax = 0. for tm3tree in tmDataList: data = tm3tree.getListColumn(columnNum) overallMax = max(overallMax, max(data)) treeData[tm3tree] = data if intervals == "max": intervals = overallMax # 1 per interval = overallMax/intervals # number of intervals desired #print a header outFile.write("name\tcount\tmean\tstddev\t") currentOut = 0. while currentOut < overallMax: outFile.write(str(currentOut) + "\t") currentOut += interval outFile.write("\n") for tm3tree in tmDataList: tm3data = treeData[tm3tree] avgData = statistics.computeMean(tm3data) stddevData = statistics.computeStdDev(tm3data, avgData) histo, outMax = statistics.computeHistogram(tm3data, interval, overallMax) outFile.write(tm3tree.inputFileName + "\t") outFile.write(str(len(tm3data)) + "\t") outFile.write(str(avgData) + "\t") outFile.write(str(stddevData) + "\t") for histoCount in histo: outFile.write(str(histoCount) + "\t") outFile.write("\n") outFile.close()
def calcColumnsMeanStddev(columnList, tmDataList): """returns a dict of column number to mean and another dict to stddev.""" columnsToMean = {} columnsToStddev = {} for column in columnList: colData = [] for tmData in tmDataList: colData.extend(tmData.getListColumn(column)) colAvg = statistics.computeMean(colData) colStddev = statistics.computeStdDev(colData, colAvg) columnsToMean[column] = colAvg columnsToStddev[column] = colStddev return columnsToMean, columnsToStddev
def makeResidueReport( residueData, outputFilename="residue.bfactor", maxY=False, maxYBeta=False, runGraphs=False): #residueNames = residueData.keys() residueNames = aminoAcid3Codes residueNames.sort() fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("ResidueName Mean StdDev Low High Count\n") averages, stddevs = {}, {} betaAverages, betaStddevs = {}, {} for residueName in residueNames: #assemble into one big list totalList = [] if residueName in residueData: for data in residueData[residueName].values(): totalList.extend(data) average = statistics.computeMean(totalList) averages[residueName] = average stddev = statistics.computeStdDev(totalList, average) stddevs[residueName] = stddev betaList = [] if residueName in residueData: data = residueData[residueName] betaList.extend(data[carbonBetaCodes[residueName]]) else: data = [] if len(betaList) > 0: betaAvg = statistics.computeMean(betaList) betaAverages[residueName] = betaAvg betaStddevs[residueName] = statistics.computeStdDev(betaList, betaAvg) if len(totalList) > 0: fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") else: fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(0.) + " " + str(0.) + " " + str(0.) + "\n") fileTemp.close() if gnuplotAvailable and runGraphs: plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yData, yError, yMin, yMax = [], [], 10, 0 yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0 for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' if code in averages: yData.append(averages[code]) yError.append(stddevs[code]) yMin = min(yMin, yData[-1]-yError[-1]) yMax = max(yMax, yData[-1]+yError[-1]) yBetaData.append(betaAverages[code]) yBetaError.append(betaStddevs[code]) yBetaMin = min(yBetaMin, yBetaData[-1]-yBetaError[-1]) yBetaMax = max(yBetaMax, yBetaData[-1]+yBetaError[-1]) else: # none of that residue yData.append(0) yError.append(0) yBetaData.append(0) yBetaError.append(0) yLabels += ')' graphData = Gnuplot.Data(range(20), yData, yError) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style yerrorbars') plotter('set boxwidth 0.9 absolute') plotter('set xtics ' + yLabels) if maxY is False: plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxY) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphData) #do another graph with just carbon-betas plotter('set output "' + outputFilename + '.beta.png"') graphDataBeta = Gnuplot.Data(range(20), yBetaData, yBetaError) plotter.ylabel('Mean Travel In Distance of Carbon Beta') if maxYBeta is False: plotter( 'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxYBeta) + ']') plotter.plot(graphDataBeta)
def makeAtomReport(residueData, outputFilename="atom.bfactor", runGraphs=True): residueNames = residueData.keys() residueNames.sort() fileTemp = open(outputFilename + '.txt', 'w') fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n") resAtomAverage = {} for residueName in residueNames: resAtomAverage[residueName] = {} atomNames = residueData[residueName].keys() atomNames.sort() for atomName in atomNames: totalList = residueData[residueName][atomName] average = statistics.computeMean(totalList) resAtomAverage[residueName][atomName] = average stddev = statistics.computeStdDev(totalList, average) fileTemp.write( residueName + " " + atomName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") fileTemp.close() if gnuplotAvailable and runGraphs: #first make backbone-sidechain report plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yDataBackbone, yDataSidechain = [], [] yDataCa, yDataCb = [], [] for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' backValues, sideValues = [], [] caValues, cbValues = [], [] try: for key, values in residueData[code].iteritems(): if string.strip(key) in backboneAtomCodes: backValues.extend(values) else: sideValues.extend(values) if string.strip(key) == caCode: caValues.extend(values) elif string.strip(key) == cbCode: cbValues.extend(values) except KeyError: # sometimes one residue won't be represented pass # but that is okay if len(backValues) == 0: yDataBackbone.append(0) else: yDataBackbone.append(sum(backValues)/float(len(backValues))) if len(sideValues) == 0: yDataSidechain.append(0) else: yDataSidechain.append(sum(sideValues)/float(len(sideValues))) if len(caValues) == 0: yDataCa.append(0) else: yDataCa.append(sum(caValues)/float(len(caValues))) if len(cbValues) == 0: yDataCb.append(0) else: yDataCb.append(sum(cbValues)/float(len(cbValues))) if len(backValues + sideValues + caValues + cbValues) > 0: makeAminoAcidHistogram( plotter, backValues, sideValues, caValues, cbValues, outputFilename + "." + str(code)) yLabels += ')' graphDataBackbone = Gnuplot.Data(range(20), yDataBackbone, title="Backbone") graphDataSidechain = Gnuplot.Data( range(20), yDataSidechain, title="Sidechain") graphDataCa = Gnuplot.Data(range(20), yDataCa, title="C-alpha") graphDataCb = Gnuplot.Data(range(20), yDataCb, title="C-beta") plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style points') plotter('set key right top') plotter('set xtics ' + yLabels) plotter( 'set yrange [' + str(min(yDataBackbone + yDataSidechain) - 0.5) + ':' + str(max(yDataBackbone+yDataSidechain)+0.5) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphDataBackbone, graphDataSidechain) plotter('set output "' + outputFilename + '.ab.png"') if "buried" in outputFilename: plotter('set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':6.]') else: plotter( 'set yrange [' + str(min(yDataCa + yDataCb) - 0.5) + ':' + str(max(yDataCa + yDataCb) + 0.5) + ']') plotter.plot(graphDataCa, graphDataCb)
def makeCompareResidueReport( residueBoth, outputFilename="residue.bfactor", maxY=False, maxYBeta=False, numTests=9): ranges = [-0.3, 0.6] residueNames = [] for residueName in residueBoth[0].keys() + residueBoth[1].keys(): if residueName not in residueNames: residueNames.append(residueName) residueNames.sort() #residueNames = aminoAcid3Codes #for now ignore what is in the files fileTemp = open(outputFilename + ".txt", 'w') fileTemp.write("ResidueName AtomName Mean StdDev Low High Count\n") fileTemp2 = open(outputFilename + ".pvals.txt", 'w') fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp3 = open(outputFilename + ".pvals.beta.txt", 'w') fileTemp3.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") averages, stddevs = ({}, {}), ({}, {}) betaAverages, betaStddevs = ({}, {}), ({}, {}) totalLists, betaLists = ({}, {}), ({}, {}) for residueName in residueNames: totalList = [], [] betaList = [], [] for indexSet, residueData in enumerate(residueBoth): try: for data in residueData[residueName].values(): totalList[indexSet].extend(data) totalLists[indexSet][residueName] = totalList[indexSet] average = statistics.computeMean(totalList[indexSet]) averages[indexSet][residueName] = average #print average, residueName stddev = statistics.computeStdDev(totalList[indexSet], average) stddevs[indexSet][residueName] = stddev data = residueData[residueName] betaList[indexSet].extend(data[carbonBetaCodes[residueName]]) betaLists[indexSet][residueName] = betaList[indexSet] if len(betaList[indexSet]) > 0: betaAvg = statistics.computeMean(betaList[indexSet]) #print betaAvg, residueName betaAverages[indexSet][residueName] = betaAvg betaStddevs[indexSet][residueName] = statistics.computeStdDev( betaList[indexSet], betaAvg) fileTemp.write( residueName + " " + str(average) + " " + str(stddev) + " " + str(min(totalList)) + " " + str(max(totalList)) + " " + str(len(totalList)) + "\n") except (ZeroDivisionError, KeyError): pass # probably don't really need this residue anyway fileTemp.close() for index, code in enumerate(aminoAcid3Codes): # now do the pvalue tests meanA = averages[0][code] meanB = averages[1][code] listA = totalLists[0][code] listB = totalLists[1][code] pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests) #fileTemp2.write("ResidueName DiffMeans MeanA MeanB pValAbove pValBelow\n") fileTemp2.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ") fileTemp2.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1])) fileTemp2.write("\n") meanA = betaAverages[0][code] meanB = betaAverages[1][code] listA = betaLists[0][code] listB = betaLists[1][code] pvals = statistics.pvalueDiffMeans(listA, listB, meanA-meanB, numTests) fileTemp3.write(code + " " + str(meanA-meanB) + " " + str(meanA) + " ") fileTemp3.write(str(meanB) + " " + str(pvals[0]) + " " + str(pvals[1])) fileTemp3.write("\n") fileTemp2.close() fileTemp3.close() if gnuplotAvailable: plotter = Gnuplot.Gnuplot(debug=0) yLabels = '(' yData, yError, yMin, yMax = [], [], 10, 0 yBetaData, yBetaError, yBetaMin, yBetaMax = [], [], 10, 0 for index, code in enumerate(aminoAcid3Codes): yLabels += '"' + str(code) + '" ' + str(index) if index != len(aminoAcid3Codes) - 1: yLabels += ', ' yData.append(averages[0][code] - averages[1][code]) #yError.append(stddevs[0][code]) #yMin = min(yMin, yData[-1] - yError[-1]) #yMax = max(yMax, yData[-1] + yError[-1]) yMin = min(yMin, yData[-1]) yMax = max(yMax, yData[-1]) #print betaAverages[0][code] #print betaAverages[1][code] betaAvgDiff = 0. try: betaAvg0 = betaAverages[0][code] betaAvg1 = betaAverages[1][code] betaAvgDiff = betaAvg0 - betaAvg1 except KeyError: print code betaAvgDiff = 0. yBetaData.append(betaAvgDiff) #yBetaError.append(betaStddevs[0][code]) yBetaMin = min(yBetaMin, yBetaData[-1]) yBetaMax = max(yBetaMax, yBetaData[-1]) yLabels += ')' graphData = Gnuplot.Data(range(20), yData) plotter('set terminal png') plotter('set output "' + outputFilename + '.png"') plotter('set data style points') plotter('set boxwidth 0.9 absolute') plotter('set xtics ' + yLabels) if ranges: plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']') elif maxY is False: plotter('set yrange [' + str(yMin-0.2) + ':' + str(yMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxY) + ']') plotter('set xrange [-1:20]') plotter.xlabel('Residue') plotter.ylabel('Mean Travel In Distance') plotter.plot(graphData) #do another graph with just carbon-betas plotter('set output "' + outputFilename + '.beta.png"') graphDataBeta = Gnuplot.Data(range(20), yBetaData) plotter.ylabel('Mean Travel In Distance of Carbon Beta') if ranges: plotter('set yrange [' + str(ranges[0]) + ':' + str(ranges[1]) + ']') elif maxYBeta is False: plotter( 'set yrange [' + str(yBetaMin-0.2) + ':' + str(yBetaMax+0.2) + ']') else: plotter('set yrange [0:' + str(maxYBeta) + ']') plotter.plot(graphDataBeta)