Пример #1
0
def tstTravelFindHoles(
    tstFileName, phiFileName, debugOut=False, borderSize=2, nearbyDistance=4.):
  '''if debugout is set, additional files are created.
  bordersize can change the amount of extra space around the protein.
  nearbydistance changes the distance that nearby residues are gathered from.'''
  print "reading tst and phi files"
  tstD = tstdata.tstData(
      tstFileName, necessaryKeys=tstdata.tstData.necessaryKeysForHoles)
  phiData = phi(phiFileName)  # read in the phimap if possible
  gridSize = 1.0 / phiData.scale  # needed later
  numberHandles = tstD.countHandles()
  print "there are ", numberHandles, " holes in this structure"
  print "running travel depth now"
  phiTravelDepthGrid, phiTravelDepthData, meshData = tstTravelDepthMeshRun(
      tstD, phiData, tstFileName, borderSize=borderSize, threshold="auto")
  del phiData, phiTravelDepthGrid, phiTravelDepthData
  #not needed, reclaim memory
  print "calculating travel out distance"
  meshData.calculateTravelDistance("surfout", [3], [0, 2])
  print "finding holes"
  loopTrisSave, loopPointsSave, regLoopTris, regLoopPts, pointNeighbors, \
      pointNeighborsNodes, outsidePoints, outsidePointsNodes, possHoleStarts = \
      tstTopology.fillInHolesAndGrow(
          tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_TRIANGLE'],
          tstD.dict['POINT_XYZ'], tstD.dict['NORM_XYZ'], numberHandles,
          tstFileName, debugOut, meshData, "surfout")
  if debugOut:
    tstdebug.debugTriangleList(
        regLoopTris, tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_XYZ'],
        tstFileName + ".regular.loops.py")
  tstDPointXYZ = tstD.dict['POINT_XYZ']  # save for later
  tstDPdbRecord = tstD.dict['PDB_RECORD']  # save for later
  del tstD  # rest of tstD isn't needed, so free memory
  #output the possible places where HOLE could start... centers of regular plugs
  print "writing output files"
  paths.outputNodesText(possHoleStarts, tstFileName + ".HOLE.start.txt")
  pathsList = meshData.getPaths(
      "surfout", pointNeighbors, outsidePoints, possHoleStarts)
  del meshData  # deletes everything that has no other refs (from paths)
  #print len(pointNeighbors), len(outsidePoints), len(possHoleStarts),
  #print len(pathsList)
  allPoints, outsidePts = [], []
  for point in pointNeighborsNodes.keys():
    allPoints.append(point.pathXyz)
  for point in outsidePointsNodes:
    outsidePts.append(point.pathXyz)
  tstdebug.pointDebug(allPoints, filename=tstFileName+".tree.py")
  tstdebug.pointDebug(
      outsidePts, filename=tstFileName+".outside.py", mainColor=(.9, .1, .1),
      radius=0.55)
  #tstdebug.debugSetGridSpheres(
  #     pointNeighbors.keys(),, gridSize, tstFileName+".tree.radius.py",
  #     radius=True, mainColor=(0.01, 0.9, 0.05))
  #testing new output of tree with radius
  foundPaths = 0
  pathFile = string.replace(tstFileName, ".nocav.tst", ".py")
  #very very specific... probably bad but nice...
  #can always rerun standalone later.
  knownPathExists = os.path.isfile(pathFile)
  logName = tstFileName + ".findholes.log"
  logFile = open(logName, 'w')
  logFile.write(
      "number endNumOne endNumTwo plugs stepLength pathLength pathMinRadius " +
      "pathMaxInsideRadius endMinOne endMinTwo minimaCount travelDepthMax " +
      "windingMetric avgTheta ")
  if knownPathExists:
    logFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp")
  logFile.write("\n")
  if knownPathExists:
    bestStats = ["err", "err", "err", "err", "err", "err", "err"]
    bestStatsPaths = [0, 0, 0, 0, 0, 0, 0]
  sortedByMinRadiusPaths = []
  for pathIndex, (outsideOne, outsideTwo, plugs, nodePath) in enumerate(
      pathsList):
    pointPath, spheres = [], []
    for node in nodePath:
      pointPath.append(list(node.pathXyz))
      spheres.append(list(node.pathXyz))
      pointRadius = node.distances["surfout"]  # add 'radius' info
      if not pointRadius or pointRadius == 0.:
        pointRadius = .000000001  # very small number, on surface
      pointPath[-1].insert(0, pointRadius)
      spheres[-1].append(pointRadius)
    minRad = paths.pathMinRadius(pointPath)
    newTuple = (
        minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres)
    #insertion sort into new list
    position = 0
    while position < len(sortedByMinRadiusPaths) and \
        sortedByMinRadiusPaths[position][0] > minRad:
      #only if list can handle it and already inserted are bigger
      position += 1
    sortedByMinRadiusPaths.insert(position, newTuple)
  print "output files for individual paths"
  for pathIndex, newTuple in enumerate(sortedByMinRadiusPaths):
    (minRad, outsideOne, outsideTwo, plugs, nodePath, pointPath, spheres) = \
        newTuple  # unpack the tuple into the various data
    throughTris, throughPts = paths.checkPath(
        pointPath, loopPointsSave, tstDPointXYZ)
    if throughTris:  # it worked... make some more debugging output
      foundPaths += 1
      outName = tstFileName + "." + str(foundPaths)
      if debugOut and throughTris:
        tstdebug.debugTrianglesNotOrig(
            throughTris, tstDPointXYZ, outName+".through.loop.py",
            ptList=throughPts)
      #always do these 2
      tstdebug.debugSetGridSpheres(
          pointPath, gridSize, outName + ".pore.py", radius=True,
          mainColor=(0.01, 0.9, 0.05))
      tstdebug.debugSetGridSpheres(
          pointPath, gridSize, outName + ".path.py", mainColor=(.01, 0.95, 0.9))
      #mesh.meshFromSpheres(spheres, 0.5, outName + ".points.py")
      paths.outputRadiiTxt(pointPath, outName + ".radii.txt")
      paths.outputNearbyResidues(
          pointPath, outName, tstDPdbRecord, nearbyDistance)
      pathLen = paths.pathLength(pointPath)
      minimaCount = paths.pathMinimaCount(pointPath)
      maxRad, endMinOne, endMinTwo = paths.insideTwoMinimaRadiusMax(pointPath)
      travelDepthMax = paths.pathMaxDistance(nodePath, 'traveldepth')
      windingMetric = paths.computeWindingMetric(pointPath)
      thetas, avgTheta = paths.averageTheta(pointPath)
      #print endMinOne, endMinTwo, minimaCount, travelDepthMax, windingMetric,
      #print avgTheta
      #attempt to do pRMSD if possible...
      prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \
          "err", "err", "err", "err", "err", "err", "err"
      if knownPathExists:
        try:
          sourcePath, sourceRadii = [], []
          for pathPt in pointPath:
            sourcePath.append(pathPt[1:4])
            sourceRadii.append(pathPt[0])
          prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp = \
              comparePathsManyMetrics(False, pathFile, sourcePath, sourceRadii)
          if bestStats[0] == 'err' or bestStats[0] > prmsd:
            bestStats[0] = prmsd
            bestStatsPaths[0] = foundPaths
          if bestStats[1] == 'err' or bestStats[1] < coverage:
            bestStats[1] = coverage
            bestStatsPaths[1] = foundPaths
          if bestStats[2] == 'err' or bestStats[2] < span:
            bestStats[2] = span
            bestStatsPaths[2] = foundPaths
          if bestStats[3] == 'err' or bestStats[3] > wrmsd:
            bestStats[3] = wrmsd
            bestStatsPaths[3] = foundPaths
          if bestStats[4] == 'err' or bestStats[4] < less1:
            bestStats[4] = less1
            bestStatsPaths[4] = foundPaths
          if bestStats[5] == 'err' or bestStats[5] < lessrad:
            bestStats[5] = lessrad
            bestStatsPaths[5] = foundPaths
          if bestStats[6] == 'err' or bestStats[6] < radiicomp:
            bestStats[6] = radiicomp
            bestStatsPaths[6] = foundPaths
        except (IOError, TypeError):
          #if there is no known path  file, this should be the error
          pass
      #now output data
      logFile.write(str(foundPaths) + " ")
      logFile.write(str(outsideOne) + " ")
      logFile.write(str(outsideTwo) + " ")
      logFile.write(str(plugs) + " ")
      logFile.write(str(len(pointPath)) + " ")
      logFile.write(str(pathLen) + " ")
      logFile.write(str(minRad) + " ")
      logFile.write(str(maxRad) + " ")
      logFile.write(str(endMinOne) + " ")
      logFile.write(str(endMinTwo) + " ")
      logFile.write(str(minimaCount) + " ")
      logFile.write(str(travelDepthMax) + " ")
      logFile.write(str(windingMetric) + " ")
      logFile.write(str(avgTheta) + " ")
      if knownPathExists:
        logFile.write(str(prmsd) + " ")
        logFile.write(str(coverage) + " ")
        logFile.write(str(span) + " ")
        logFile.write(str(wrmsd) + " ")
        logFile.write(str(less1) + " ")
        logFile.write(str(lessrad) + " ")
        logFile.write(str(radiicomp) + " ")
      logFile.write("\n")  # that's all
  logFile.close()
  if knownPathExists:  # output bestStats and bestStatsPaths
    bestName = tstFileName + ".known.best.txt"
    bestFile = open(bestName, 'w')
    bestFile.write("pRMSD coverage span wrmsd less1 lessrad radiicomp ")
    bestFile.write("pRMSD# coverage# span# wrmsd# less1# lessrad# radiicomp#\n")
    for stat in bestStats:
      bestFile.write(str(stat) + " ")
    for stat in bestStatsPaths:
      bestFile.write(str(stat) + " ")
    bestFile.write("\n")
    bestFile.close()
  print "done with chunnel"
def processData(dataList, nameList, listPaths, outputFileName="processed.foundholes."):
    # first do the one big summary output file
    bestLists = []
    compCols = [8, 9, 10, 11, 12, 13, 14]
    comps = ["min", "max", "max", "min", "max", "max", "min"]
    compThreshs = [5.0, 0.4, 0.8, 2.5, 0.5, 0.8, 2.5]
    for index, colIdx in enumerate(compCols):
        bestList = []
        for data in dataList:
            if comps[index] == "min":
                bestVal, bestIndex = getMinColumn(data, colIdx)  # index 8 is the prmsd
            else:
                bestVal, bestIndex = getMaxColumn(data, colIdx)  # index 8 is the prmsd
            bestList.append(bestVal)
        bestLists.append(bestList)
        # fileOut = open(outputFileName + colNamesBonus[colIdx-4] + ".best.log", 'w')
        # for index, name in enumerate(nameList):
        #  fileOut.write(name + " " + str(bestList[index]) + "\n")
        # fileOut.close()
    fileOut = open(outputFileName + "overall.best.log", "w")
    fileOut.write("name pRMSD coverage span wRMSD less1 lessRad radiicomp\n")
    for index, name in enumerate(nameList):
        fileOut.write(name + " ")
        for index2 in range(len(compCols)):
            fileOut.write(str(bestLists[index2][index]) + " ")
        fileOut.write("\n")
    fileOut.close()
    for colIdx, sortColNumber in enumerate(compCols):
        bestRankStrings, sortedEvals, backwardsEvals = [], [], []
        for index, data in enumerate(dataList):
            bestRankString, sortedEval, backEval = processDataOne(data, nameList[index], sortColNumber, comps[colIdx])
            bestRankStrings.append(bestRankString)
            sortedEvals.append(sortedEval)
            backwardsEvals.append(backEval)
        fileOut = open(outputFileName + colNamesBonus[sortColNumber - 4] + ".best.rankings.log", "w")
        for bestRankStr in bestRankStrings:
            fileOut.write(bestRankStr + "\n")
        fileOut.close()
        for index, colName in enumerate(colNames):
            fileOut = open(
                outputFileName + "rankings." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in sortedEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
            fileOut = open(
                outputFileName + "rankings.reverse." + colName + "." + colNamesBonus[sortColNumber - 4] + ".log", "w"
            )
            for line in backwardsEvals:
                fileOut.write(line[index] + "\n")
            fileOut.close()
    drilledData = [False, False, False, False, [], [], [], [], [], [], [], [], [], []]
    for drilledPath in listPaths:
        drilledData[4].append(float(len(drilledPath)))
        drilledData[5].append(float(paths.pathLength(drilledPath)))
        drilledData[6].append(float(paths.pathMinRadius(drilledPath)))
        drilledData[7].append(float(paths.pathMaxInsideRadius(drilledPath)))
    for column in range(4, 14):
        columnName = colNamesBonus[column - 4]
        for colIdx, sortColNumber in enumerate(compCols):
            # print columnName, colNamesBonus[sortColNumber - 4]
            columnData = []
            selectColumnData = []
            bestColumnData = []
            for data in dataList:
                if comps[colIdx] == "min":
                    bestVal, bestIndex = getMinColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMin(data, column, sortColNumber, bestVal))
                else:
                    bestVal, bestIndex = getMaxColumn(data, sortColNumber)
                    selectColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, compThreshs[colIdx]))
                    bestColumnData.extend(getFromColumnIfMax(data, column, sortColNumber, bestVal))
                columnData.extend(getOneColumn(data, column))
            if len(drilledData[column]) > 0:
                maxHere = max(max(columnData), max(drilledData[column])) + 1.0
            else:
                maxHere = max(columnData) + 1.0
            interval = maxHere / 40.0
            histogram = statistics.computeHistogram(columnData, interval, maxData=maxHere)
            # selectHistogram = statistics.computeHistogram(
            #    selectColumnData, interval, maxData=histogram[1])
            bestHistogram = statistics.computeHistogram(bestColumnData, interval, maxData=histogram[1])
            if len(drilledData[column]) > 0:
                realHistogram = statistics.computeHistogram(drilledData[column], interval, maxData=histogram[1])
            else:
                realHistogram = False
            # need to scale select part of data to be same height as histogram
            maxHeight = max(histogram[0])
            # maxSelectHeight = max(selectHistogram[0])
            maxBestHeight = max(bestHistogram[0])
            if realHistogram:
                maxRealHeight = max(realHistogram[0])
                realScaledHistogram = [[], realHistogram[1]]
                for histPoint in realHistogram[0]:
                    realScaledHistogram[0].append(histPoint * maxHeight / maxRealHeight)
            # selectScaledHistogram = [[], selectHistogram[1]]
            bestScaledHistogram = [[], bestHistogram[1]]
            # for histPoint in selectHistogram[0]:
            # selectScaledHistogram[0].append(histPoint*maxHeight/maxSelectHeight)
            for histPoint in bestHistogram[0]:
                bestScaledHistogram[0].append(histPoint * maxHeight / maxBestHeight)
            # print histogram, len(histogram[0])
            # print selectHistogram, len(selectHistogram[0])
            # make gnuplot version if possible
            if gnuplotAvailable:
                # plotter = Gnuplot.Gnuplot(debug=0)
                xVals = []
                for cutoff in range(2 + int(histogram[1] / interval)):
                    xVals.append(cutoff * interval)
                graphData = Gnuplot.Data(xVals, histogram[0], title="All")
                # if comps[colIdx] == 'min':
                #  graphSelectData = Gnuplot.Data(
                #     xVals, selectScaledHistogram[0],
                #     title="<" + str(compThreshs[colIdx]) + " " +
                #     str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #     str(maxHeight/maxSelectHeight))
                # else:
                #  graphSelectData = Gnuplot.Data(
                #      xVals, selectScaledHistogram[0], title=">" +
                #      str(compThreshs[colIdx]) + " " +
                #      str(colNamesBonus[sortColNumber-4]) + " Scaled by " +
                #      str(maxHeight/maxSelectHeight))
                graphBestData = Gnuplot.Data(
                    xVals,
                    bestScaledHistogram[0],
                    title="Best "
                    + str(colNamesBonus[sortColNumber - 4])
                    + " Scaled by "
                    + str(maxHeight / maxBestHeight),
                )
                if realHistogram:
                    graphRealData = Gnuplot.Data(
                        xVals, realScaledHistogram[0], title="Drilled Holes Scaled by " + str(maxHeight / maxRealHeight)
                    )
                graphDataCum = Gnuplot.Data(xVals, statistics.computeCumulativeHistogram(histogram[0]))
                plotter("set terminal png")
                plotter('set output "' + outputFileName + columnName + "." + colNamesBonus[sortColNumber - 4] + '.png"')
                plotter("set data style linespoints")
                plotter("set xrange [" + str(min(xVals) - 0.01) + ":" + str(max(xVals) + 0.01) + "]")
                plotter.xlabel(columnName)
                plotter.ylabel("Count")
                plotter("set multiplot")
                plotter("set key right top")
                if realHistogram:
                    # plotter.plot(
                    #    graphData, graphSelectData, graphBestData, graphRealData)
                    plotter.plot(graphData, graphBestData, graphRealData)
                else:
                    # plotter.plot(graphData, graphSelectData, graphBestData)
                    plotter.plot(graphData, graphBestData)
                plotter("unset multiplot")