def _countPositionsFewPoints(self, xyzData, tolerance): '''for a list of list of xyz data, count the number of positions each atom takes based on the tolerance and the distance. tolerance is compared to the euclidean difference squared to determine if a position is equal. actually uses a clustering algorithm and uses a unionfind data structure.''' self.posCount = [] self.posClusters = [] #just save all the data since we made it self.posClusterLists = [] #just save all the data since we made it tolerance2 = tolerance ** 2. #square the tolerance since it is compared for oneSet in xrange(len(xyzData[0])): #goes from 0 to atom count clusters = unionFind() xyzList = [] for oneIndex in xrange(len(xyzData)): #0 to number of positions (mol2#s) clusters.find(oneIndex) #initiate each position xyzList.append(xyzData[oneIndex][oneSet]) for oneIndex in xrange(len(xyzData)): #0 to positions oneXyz = xyzList[oneIndex] for twoIndex in xrange(oneIndex+1, len(xyzData)): #oneIndex to positions if geometry_basic.distL2Squared3(oneXyz, xyzList[twoIndex]) \ < tolerance2: clusters.union(oneIndex, twoIndex) tempLists = clusters.toLists() self.posCount.append(len(tempLists)) self.posClusters.append(clusters) self.posClusterLists.append(tempLists)
def getRMSD(self, xyzOne, xyzTwo): '''calculates just the rmsd of the two conformations''' sumSquared = 0.0 for atomIndex in xrange(len(self.atomXyz[xyzOne])): sumSquared += geometry_basic.distL2Squared3( \ self.atomXyz[xyzOne][atomIndex], self.atomXyz[xyzTwo][atomIndex]) rmsd = (sumSquared / len(self.atomXyz[xyzOne])) ** 0.5 return rmsd
def getWithin(self): '''returns pairs of points within the tolerance. only compare within buckets. slower but doesn't require unionfind data structure, kept for testing, etc.''' returnPairs = set() for bucket in self.possiblyNearbyPoints: for oneIndex, oneXyzIndex in enumerate(bucket): oneXyz = self.pointList[oneXyzIndex] for twoIndex in xrange(oneIndex + 1, len(bucket)): twoXyzIndex = bucket[twoIndex] twoXyz = self.pointList[twoXyzIndex] if distL2Squared3(oneXyz, twoXyz) < self.tolerance2: if twoXyzIndex < oneXyzIndex: oneXyzIndex, twoXyzIndex = twoXyzIndex, oneXyzIndex returnPairs.add((oneXyzIndex, twoXyzIndex)) return returnPairs
def decide(self, mol2data, xyzData): '''mol2data is the mol2.Mol2 object. xyzData is a list of coords. use self.rules to return True (clashed) or False (not clashed).''' dists = defaultdict(list) #format is atomNum -> (otherNum, dist, bondDist) #all dists in list are euclidean distance squared atomNums = range(len(xyzData)) atomNums.sort() for atomNumOne in atomNums: for atomNumTwo in atomNums: if atomNumTwo > atomNumOne: thisDist = distL2Squared3(xyzData[atomNumOne], xyzData[atomNumTwo]) bondDist = mol2data.bondsBetweenActual(atomNumOne, atomNumTwo) dists[atomNumOne].append((atomNumTwo, thisDist, bondDist)) dists[atomNumTwo].append((atomNumOne, thisDist, bondDist)) for rule in self.rules: #match atom types first for atomNum in atomNums: if rule[3] == "*" or \ 0 == string.find(mol2data.atomType[atomNum], rule[3]): for dist in dists[atomNum]: #for every distance if rule[4] == "*" or \ 0 == string.find(mol2data.atomType[dist[0]], rule[4]): brokeRule = False if rule[0] == "max": #is a max distance constraint if dist[1] > rule[6]: #broke the rule brokeRule = True elif rule[0] == "min": #is a min distance constraint if dist[1] < rule[6]: #broke the rule brokeRule = True if brokeRule: #check to make sure actually broken if not cmp(dist[2], rule[1]) == rule[2]: #this basically amounts #to checking to see if the right number of bonds lie between #the atoms in question. brokeRule = False if brokeRule: #rule has been broken so there is a clash #print rule, atomNum, dist #debug the rules broken return True #can quit after first broken rule #if everything passed, return False indicating no clashes return False
def getWithinCluster(self, clusters): '''souped up for speed version of code. puts nearby points into the unionfind data structure 'clusters'. does every possible shortcut i can think of for now. super fast now.''' #for bucket in self.possiblyNearbyPoints: # print len(bucket), #print "bucket lengths" for bucket in self.possiblyNearbyPoints: #print len(bucket), len(self.pointList) indicesLeft = set(xrange(len(bucket))) while len(indicesLeft) > 0: oneIndex = indicesLeft.pop() oneXyzIndex = bucket[oneIndex] if len(bucket) > self.bigBucket: thisCluster = clusters.getList(oneXyzIndex) #this is O(n), don't do lots #print "trying to skip", len(thisCluster), len(bucket) if len(thisCluster) >= len(bucket): #means we should at least quit #doing this bucket, nothing left to union break oneXyz = self.pointList[oneXyzIndex] for twoIndex in xrange(len(bucket)): twoXyzIndex = bucket[twoIndex] if len(bucket) > self.bigBucket: if twoXyzIndex in thisCluster: continue #skip this iteration of the twoIndex for loop twoXyz = self.pointList[twoXyzIndex] if distL2Squared3(oneXyz, twoXyz) < self.tolerance2: clusters.union(oneXyzIndex, twoXyzIndex) try: indicesLeft.remove(twoIndex) except KeyError: pass #really quite okay if len(bucket) == len(self.pointList): #might be able to quit now if all #unioned together already after a single pass. clusterList = clusters.toLists() if len(clusterList) == 1: #only one cluster means quit now return None #just quit entirely