def getJustMembranePdb(inputFileName, outputFileName): pdbBarriers = pdb.pdbData(inputFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) zCoord = barrierZ[1] goodResChain = [] for index, thisResNum in enumerate(pdbBarriers.resNums): chain = pdbBarriers.chains[index] resChain = str(thisResNum) + str(chain) if resChain not in goodResChain: #otherwise don't need to check, already in zTest = pdbBarriers.coords[index][2] if abs(zTest) <= zCoord: goodResChain.append(resChain) newPdb = pdbBarriers.getListResiduesChains(goodResChain) newPdb.write(outputFileName)
def pathLength(path): '''computes the total length from point to point''' length = 0 lastPathPt = path[0][1:4] # init for loop for nextPathPtRad in path[1:]: nextPathPt = nextPathPtRad[1:4] length += geometry.distL2(nextPathPt, lastPathPt) lastPathPt = nextPathPt return length
def getResiduesBetweenPoints(pair, pdbD): '''a pair of nodes, finds center and radius, returns residues within''' aXYZ = pair[0].getXYZ() bXYZ = pair[1].getXYZ() radius = geometry.distL2(aXYZ, bXYZ)/2. pointRad = [radius] for index in range(3): pointRad.append((aXYZ[index]+bXYZ[index])/2.) resList = getNearbyResidues([pointRad], pdbD) return resList
def tstTravelSurfInsideOld(tstFileName, phiFileName=False): '''does the old algorithm of just computing the shortest distance to any surface point from any atom by going through both lists the hard way''' tstD = tstdata.tstData(tstFileName) # read the file into the data structure #do the biggest disjoint set of tris/points stuff allPoints, allTris, cavPoints, cavTris = cavity.assumeNoCavities( tstD.dict['POINT_XYZ'], tstD.dict['TRIANGLE_POINT'], tstD.dict['POINT_NEIGHBOR']) pointXyz = tstD.dict['POINT_XYZ'] pdbD = pdb.pdbData() for line in tstD.dict['PDB_RECORD']: pdbD.processLine(line) atomTravelInDepths = [] for coord in pdbD.coords: minDist = geometry.distL2(coord, pointXyz[allPoints[0]-1][1:]) #set to first for point in allPoints[1:]: thisDist = geometry.distL2(coord, pointXyz[point-1][1:]) minDist = min(minDist, thisDist) atomTravelInDepths.append(minDist) #make a pdb file with the bfactor replaced for index, atomTID in enumerate(atomTravelInDepths): pdbD.updateFactors(index, (pdbD.factors[index][0], atomTID)) pdbD.write(tstFileName+".old.atomdepth.pdb") #also add record to tstdata atomTIDRecord = [] for index, atomTID in enumerate(atomTravelInDepths): atomTIDRecord.append([index+1, atomTID]) tstD.dict['ATOM_DEPTH_OLD'] = atomTIDRecord #write data into tst file tstFile = open(tstFileName, 'a') tstFile.write("ATOM_TRAVEL_IN\n") for line in tstD.dict['ATOM_DEPTH_OLD']: lineOut = "%8d" % line[0] for count in xrange(1, len(line)): lineOut += "%+9.4f " % line[count] noPlusLine = string.replace(lineOut, "+", " ") tstFile.write(noPlusLine) tstFile.write("\n") tstFile.write("END ATOM_DEPTH_OLD\n") tstFile.close()
def getAllAdjacentBoxesOnce(curBox, lenX, lenY, lenZ, extraEdges=False): # when called on all curBoxes, only returns each pair once returnVec = getAllAdjacentBoxes(curBox, lenX, lenY, lenZ) # add distance info newReturnVec = [] for box in returnVec: newReturnVec.append((box, geometry.distL2(curBox, box))) if extraEdges and curBox in extraEdges: for adjBox, adjDist, adjGridDist in extraEdges[curBox]: # tuple unpack newReturnVec.append((adjBox, adjDist)) newVec = [box for box in newReturnVec if curBox[0:2] <= box[0][0:2]] return newVec
def tstEdgeCurvature(trianglePoint, pointXyz, pointTriangle, pointNeighbor): '''for each edge, calculate the angle between the triangles around it. calculate point curvature based on average of these for each point''' triXyz = {} for triPtList in trianglePoint: tri = triPtList[0] xyz = [] for pt in triPtList[1:]: xyz.append(pointXyz[pt-1][1:]) triXyz[tri] = xyz edgeAngle = {} # store edge angles as they are found so don't duplicate work pointMeanAngle = [] # once all edges found, find mean, store in tst format pointWeightedMeanAngle = [] # weight by edge length for pointNeighborList in pointNeighbor: mainPt = pointNeighborList[0] angles = [] weightedAngles = [] for otherPt in pointNeighborList[2:]: # pN[1] is count ptList = [mainPt, otherPt] ptList.sort() ptTuple = tuple(ptList) # canonicalized format edgeLength = geometry.distL2( pointXyz[mainPt-1][1:], pointXyz[otherPt-1][1:]) if ptTuple in edgeAngle: # already done angles.append(edgeAngle[ptTuple]) weightedAngles.append(edgeAngle[ptTuple] * edgeLength) else: # have to compute it mainTris = set(pointTriangle[mainPt-1][2:]) otherTris = set(pointTriangle[otherPt-1][2:]) tris = list(mainTris.intersection(otherTris)) #will almost always be 2 #for now assume only 2 normalA = geometry.getTriNormalList(triXyz[tris[0]]) normalB = geometry.getTriNormalList(triXyz[tris[1]]) unsignedAngle = geometry.getAngle(normalA, normalB) # unsigned centerTriA = geometry.getAverage(triXyz[tris[0]]) planeA = geometry.calculatePlaneD(normalA, centerTriA) ptsB = set(trianglePoint[tris[1]-1][1:]) edgePts = set(ptList) otherB = pointXyz[list(ptsB.difference(edgePts))[0]-1][1:] side = geometry.checkPlaneSide(normalA+[planeA], otherB) if side: angle = - unsignedAngle * 180 / math.pi # concave negative else: angle = unsignedAngle * 180 / math.pi # convex positive edgeAngle[ptTuple] = angle angles.append(angle) weightedAngles.append(angle*edgeLength) pointMeanAngle.append([mainPt, statistics.computeMean(angles)]) pointWeightedMeanAngle.append( [mainPt, statistics.computeMean(weightedAngles)]) return edgeAngle, pointMeanAngle, pointWeightedMeanAngle
def checkPath(path, loopPointsList, pointXYZ, xyzStart=1): '''checks to see if the path intersects any topological loop on the surf''' pathThrough = False, False # return a tuple... false is for failure for loopPts in loopPointsList: triangles = tstdata.trianglinizeLoop(loopPts) numberIntersects = 0 #do intersection checks... carefully for triangle in triangles: lastPathPt = path[0][xyzStart:xyzStart + 3] # init for loop for nextPathPtRad in path[1:]: nextPathPt = nextPathPtRad[xyzStart:xyzStart+3] triPts0 = pointXYZ[triangle[0]-1][1:] triPts1 = pointXYZ[triangle[1]-1][1:] triPts2 = pointXYZ[triangle[2]-1][1:] posPt, maxIt = False, 5000 while False == posPt: posPt = geometry.linePlaneIntersectionNumeric( triPts0, triPts1, triPts2, lastPathPt, nextPathPt) if False == posPt: triPts0, triPts1, triPts2 = geometry.perturbTriangle( triPts0, triPts1, triPts2) maxIt -= 1 if maxIt < 0: print "had to perturb points 5000 times", triPts0, triPts1, print triPts2, lastPathPt, nextPathPt, "giving up" sys.exit(1) if posPt is not False: if geometry.distL2(lastPathPt, nextPathPt) >= \ geometry.distL2(lastPathPt, posPt) and \ geometry.distL2(lastPathPt, nextPathPt) >= \ geometry.distL2(nextPathPt, posPt): if geometry.intPointInsideTri(triPts0, triPts1, triPts2, posPt): numberIntersects += 1 lastPathPt = nextPathPt # for next loop #print numberIntersects # for debugging... if 1 == numberIntersects % 2: # if intersects odd number of times pathThrough = triangles, loopPts break # no need to do more checks, one is good enough return pathThrough # in case caller wants to do something with it.
def outputRadiiTxt(origPath, txtfile): distanceRadiusPairs = [(0, origPath[0][0])] # first pair is 0, first radius lastPt, lastDist = origPath[0][1:], 0 for pt in origPath[1:]: # all but first newDistance = geometry.distL2(pt[1:], lastPt) newRadius = pt[0] lastPt = pt[1:] lastDist += newDistance distanceRadiusPairs.append((lastDist, newRadius)) outputFile = open(txtfile, 'w') for distance, radius in distanceRadiusPairs: outputFile.write(str(distance) + ", " + str(radius) + " \n") outputFile.close()
def buildNeighbors(self, allPoints): '''builds a neighbor dictionary''' if (not self.__neighbors) or 0 == len(self.__neighbors): # remake self.__neighbors = {} for pointStart in allPoints: neighborList = self.dict['POINT_NEIGHBOR'][pointStart-1] startXYZ = self.dict['POINT_XYZ'][pointStart-1][1:] tempList = [] for neighborPoint in neighborList[2:]: # first 2 are p#, order endXYZ = self.dict['POINT_XYZ'][neighborPoint-1][1:] distance = geometry.distL2(startXYZ, endXYZ) tempList.append([neighborPoint, distance]) self.__neighbors[pointStart] = tempList return self.__neighbors
def getNearbyResidues(pointPath, pdbD, nearbyDistance=0.): '''returns a list of residues in the pdbD near the pointpath within rad+nd''' residuesNearPath = [] for pathPt in pointPath: for index, coord in enumerate(pdbD.coords): distanceBetween = geometry.distL2(pathPt[1:4], coord) if distanceBetween < pathPt[0] + nearbyDistance: residueNumber = pdbD.resNums[index] chain = pdbD.chains[index] resChain = str(residueNumber) + str(chain) if resChain not in residuesNearPath: # guarantee uniqueness residuesNearPath.append(resChain) residuesNearPath.sort() return residuesNearPath
def checkPathBarriers(prefix): tstName = prefix + ".nocav.tst" findHolesName = tstName + ".findholes.log" findHolesFile = open(findHolesName, 'r') findHolesLines = findHolesFile.readlines() findHolesFile.close() HolesName = tstName + ".sideshole.log" # holds all the output goodHolesName = tstName + ".good.sideshole.log" # just the 1 1 0 1 1 sideHolesName = tstName + ".side.sideshole.log" # just the * * 1 * * badHolesName = tstName + ".bad.sideshole.log" # all others pdbWithBarriersFileName = "planes_" + prefix + ".pdb" pdbBarriers = pdb.pdbData(pdbWithBarriersFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) #barrier is just Z coordinate #setup for main loop over paths poreSuffix = ".pore.py" logFile = open(HolesName, 'w') goodLogFile = open(goodHolesName, 'w') sideLogFile = open(sideHolesName, 'w') badLogFile = open(badHolesName, 'w') #the following 5 things are calculated and written for each path, headers #the 6th, barrier separation, is really the same for each structure logFile.write("endsBeyond1count barrier1count endsBetweenCount ") logFile.write("barrier2count endsBeyond2count barrierSeparation\n") goodLogFile.write("prefix ") goodLogFile.write(string.strip(findHolesLines[0]) + " ") goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") sideLogFile.write("prefix ") sideLogFile.write(string.strip(findHolesLines[0]) + " ") sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") badLogFile.write("prefix ") badLogFile.write(string.strip(findHolesLines[0]) + " ") badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") holeNumber = 1 poreFile = tstName + "." + str(holeNumber) + poreSuffix print poreFile paths = [] sides, goods = [], [] endsToPaths = {} pathsToEnds = {} while os.path.exists(poreFile): path = comparePaths.readCGOPath(poreFile) pathRad = comparePaths.readCGOPathWithRadius(poreFile) paths.append(pathRad) pathNum = len(paths) - 1 for end in string.split(findHolesLines[holeNumber])[1:3]: if pathNum not in pathsToEnds: pathsToEnds[pathNum] = [] pathsToEnds[pathNum].append(end) if end not in endsToPaths: endsToPaths[end] = [] endsToPaths[end].append(pathNum) intersections = [0, 0] for index, barrier in enumerate(barrierZ): intersections[index] = countCrossingsZ(path, barrier) ends = [0, 0, 0] for endPoint in [path[0], path[-1]]: endPointZ = endPoint[2] if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]: ends[0] += 1 elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]: ends[1] += 1 elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]: ends[2] += 1 outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \ str(ends[1]) + " " + str(intersections[1]) + " " + \ str(ends[2]) + " " + str(barrierSep) + " " logFile.write(outputThisTime) logFile.write("\n") if ends[0] + ends[1] + ends[2] != 2: print "problems sorting out the ends" if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]: #it is 'good' goods.append(pathNum) goodLogFile.write(prefix + " ") goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") goodLogFile.write(outputThisTime + "\n") elif ends[1] == 1: sides.append(pathNum) sideLogFile.write(prefix + " ") sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") sideLogFile.write(outputThisTime + "\n") else: badLogFile.write(prefix + " ") badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") badLogFile.write(outputThisTime + "\n") #and that is it for this path holeNumber += 1 # get set up for next pass poreFile = tstName + "." + str(holeNumber) + poreSuffix logFile.close() goodLogFile.close() sideLogFile.close() badLogFile.close() #next lines are for debugging the new data structures ''' print sides print goods print endsToPaths print pathsToEnds ''' #now want to find side branches of good paths branches = 0 branchSuffix = ".branch.py" branchFile = tstName + "." + str(branches) + branchSuffix branchLog = open(tstName + ".branchholes.log", 'w') branchLog.write(string.strip(findHolesLines[0]) + "\n") for side in sides: foundGoods = [] for sideEnd in pathsToEnds[side]: for good in goods: for goodEnd in pathsToEnds[good]: if goodEnd == sideEnd: foundGoods.append(good) if len(foundGoods) > 0: branchedPath = paths[side] # start with whole path for good in foundGoods: # remove physiological intersecting paths branchedPath = pathsModule.subtractPaths(branchedPath, paths[good]) if len(branchedPath) > 0: # has to have some length remaining branches += 1 branchFile = tstName + "." + str(branches) + branchSuffix print branches, side, foundGoods tstdebug.debugSetGridSpheres( branchedPath, 0.5, branchFile, radius=True, mainColor=(0.01, 0.9, 0.35)) branchLog.write(str(branches) + " ") branchLog.write(str(pathsToEnds[side][0]) + " ") branchLog.write(str(pathsToEnds[side][1]) + " ") branchLog.write("- ") # dummy, not real branchLog.write("0. 0. 0. 0. 0. 0. 0. 0. 0. 0. \n") branchLog.close() addFoundHoleStats.redoFindholes( prefix, nearbyDistance=4., logExt=".branchholes.log", poreSuffix=".branch.py", nearbyName=".branch")
def comparePathsManyMetrics( source=False, target=False, sourceDataManual=False, sourceRadiiManual=False, core=100): '''do lots of different metrics to compare the paths''' #open and read in both first sourceData, sourceRadii, targetData, targetRadii = False, False, False, False if source: if -1 != string.find(source, "py"): sourceData, sourceRadii = readCGOPathRadius(source) elif -1 != string.find(source, "sph"): sourceData, sourceRadii = readSphPathRadius(source) else: sourceData = sourceDataManual sourceRadii = sourceRadiiManual if target: if -1 != string.find(target, "py"): targetData, targetRadii = readCGOPathRadius(target) elif -1 != string.find(target, "sph"): targetData, targetRadii = readSphPathRadius(target) #now have the data... now do the one-sided RMSD thing sumDistanceSquared, sumWeighted = 0.0, 0.0 sumRadiiDiff = 0.0 #figure out which % of the center (core) to use outside = (100. - core)/200. dataCounted = 0. targetMappedTo, targetMappedToIndices = [], [] withinOne, withinRadius = 0, 0 for index, sourceDatum in enumerate(sourceData): #make sure in 'core' if float(index)/float(len(sourceData)) >= outside and \ float(index)/float(len(sourceData)) < (1. - outside): dataCounted += 1. #find match closest, distance = targetData[0], geometry.distL2( targetData[0], sourceDatum) tarRad = 1. for tarIndex, targetDatum in enumerate(targetData): thisDist = geometry.distL2(targetDatum, sourceDatum) if thisDist < distance: closest = targetDatum distance = thisDist tarRad = targetRadii[tarIndex] if distance < 1.: withinOne += 1 if distance < tarRad: withinRadius += 1 if sourceRadii: sumRadiiDiff += abs(tarRad - sourceRadii[index])**2. if closest not in targetMappedTo: targetMappedTo.append(closest) targetIndex = targetData.index(closest) targetMappedToIndices.append(targetIndex) sumDistanceSquared += distance**2. sumWeighted += distance**2.*(1./(tarRad+.0000000001)) targetMappedToIndices.sort() if dataCounted > 0.: prmsd = (sumDistanceSquared/float(dataCounted))**0.5 radiicomp = (sumRadiiDiff/float(dataCounted))**0.5 coverage = float(len(targetMappedTo)) / float(len(targetData)) span = float( targetMappedToIndices[-1] - targetMappedToIndices[0]+1.) / float( len(targetData)) #percentage of length of target from first covered to last covered wrmsd = (sumWeighted/dataCounted)**0.5 less1 = float(withinOne)/float(dataCounted) lessrad = float(withinRadius)/float(dataCounted) else: prmsd = "err" radiicomp = "err" coverage = "err" span = "err" wrmsd = "err" less1 = "err" lessrad = "err" return prmsd, coverage, span, wrmsd, less1, lessrad, radiicomp
def checkPathBarriers(prefix): tstName = prefix + ".nocav.tst" findHolesName = tstName + ".findholes.log" findHolesFile = open(findHolesName, 'r') findHolesLines = findHolesFile.readlines() findHolesFile.close() HolesName = tstName + ".membranehole.log" # holds all the output goodHolesName = tstName + ".good.membranehole.log" # just the 1 1 0 1 1 sideHolesName = tstName + ".side.membranehole.log" # just the * * 1 * * badHolesName = tstName + ".bad.membranehole.log" # all others pdbWithBarriersFileName = "planes_" + prefix + ".pdb" pdbBarriers = pdb.pdbData(pdbWithBarriersFileName) #get the barriers read in and defined barrierAtomList = [[], []] for index, resName in enumerate(pdbBarriers.resNames): if resName == "DUM": if pdbBarriers.atoms[index][0] == "O": barrierAtomList[0].append(pdbBarriers.coords[index]) elif pdbBarriers.atoms[index][0] == "N": barrierAtomList[1].append(pdbBarriers.coords[index]) barrierZ = [barrierAtomList[0][0][2], barrierAtomList[1][0][2]] barrierZ.sort() barrierSep = geometry.distL2(barrierAtomList[0][0], barrierAtomList[1][0]) #barrier is just Z coordinate #setup for main loop over paths poreSuffix = ".pore.py" logFile = open(HolesName, 'w') goodLogFile = open(goodHolesName, 'w') sideLogFile = open(sideHolesName, 'w') badLogFile = open(badHolesName, 'w') #the following 5 things are calculated and written for each path, headers #the 6th, barrier separation, is really the same for each structure logFile.write("endsBeyond1count barrier1count endsBetweenCount ") logFile.write("barrier2count endsBeyond2count barrierSeparation\n") goodLogFile.write("prefix ") goodLogFile.write(string.strip(findHolesLines[0]) + " ") goodLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") goodLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") sideLogFile.write("prefix ") sideLogFile.write(string.strip(findHolesLines[0]) + " ") sideLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") sideLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") badLogFile.write("prefix ") badLogFile.write(string.strip(findHolesLines[0]) + " ") badLogFile.write("endsBeyond1count barrier1count endsBetweenCount ") badLogFile.write("barrier2count endsBeyond2count barrierSeparation\n") holeNumber = 1 poreFile = tstName + "." + str(holeNumber) + poreSuffix print poreFile paths = [] sides = [] while os.path.exists(poreFile): path = comparePaths.readCGOPath(poreFile) paths.append(path) intersections = [0, 0] for index, barrier in enumerate(barrierZ): intersections[index] = countCrossingsZ(path, barrier) ends = [0, 0, 0] for endPoint in [path[0], path[-1]]: endPointZ = endPoint[2] if endPointZ < barrierZ[0] and endPointZ < barrierZ[1]: ends[0] += 1 elif endPointZ >= barrierZ[0] and endPointZ <= barrierZ[1]: ends[1] += 1 elif endPointZ > barrierZ[0] and endPointZ > barrierZ[1]: ends[2] += 1 outputThisTime = str(ends[0]) + " " + str(intersections[0]) + " " + \ str(ends[1]) + " " + str(intersections[1]) + " " + \ str(ends[2]) + " " + str(barrierSep) + " " logFile.write(outputThisTime) logFile.write("\n") if ends[0] + ends[1] + ends[2] != 2: print "problems sorting out the ends" if ends[0] == 1 and ends[2] == 1 and intersections == [1, 1]: # it is 'good' goodLogFile.write(prefix + " ") goodLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") goodLogFile.write(outputThisTime + "\n") elif ends[1] == 2: sides.append(len(paths) - 1) sideLogFile.write(prefix + " ") sideLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") sideLogFile.write(outputThisTime + "\n") else: badLogFile.write(prefix + " ") badLogFile.write(string.strip(findHolesLines[holeNumber]) + " ") badLogFile.write(outputThisTime + "\n") #and that is it for this path holeNumber += 1 # get set up for next pass poreFile = tstName + "." + str(holeNumber) + poreSuffix print sides logFile.close() goodLogFile.close() sideLogFile.close() badLogFile.close()
def pathCrowFliesLength(path): '''computes the distance from the first to the last point''' firstPt = path[0][1:4] lastPt = path[-1][1:4] return geometry.distL2(firstPt, lastPt)