-
Notifications
You must be signed in to change notification settings - Fork 2
/
hierarchy.py
executable file
·625 lines (594 loc) · 27.9 KB
/
hierarchy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
#!/usr/bin/env python2.7
#Ryan G. Coleman, Brian K. Shoichet Lab
#uses mol2 file to generate a hierarchy
import string, sys
from unionfind2 import unionFind
import geometry_basic
import buckets
import gzip
import operator
import math
import time
import shortestpaths
def printClusterHelper(clusterList):
'''stupid function used for debugging, prints list of pymol out.???.mol2 lines
to copy/paste and run to see what the clusters are.
run mol2hydroxyls.py -r and mol2tomultimol2.py first to get out.???.mol2 files
'''
for clusters in clusterList:
print "pymol ",
for conf in clusters:
print "out." + string.zfill(conf, 3) + ".mol2 ",
print " "
def computeBreaks(limitError, options):
'''3 diff requirements, make sure we break it into enough pieces to meet them
all.'''
#have to break the atomXyz into multiple sets so the hierarchy isn't too big
try:
breaksS = int(math.ceil(limitError.getSets() / float(options.limitset)))
except TypeError: #means None was used
breaksS = 1
try:
breaksC = int(math.ceil(limitError.getConfs() / float(options.limitconf)))
except TypeError: #means None was used
breaksC = 1
try:
breaksX = int(math.ceil(limitError.getCoords() / float(options.limitcoord)))
except TypeError: #means None was used
breaksX = 1
#print breaksS, breaksC, breaksX #see which breaks is higher
breaks = max(breaksS, breaksC, breaksX) #use the max of any of these
return breaks
class TooBigError(Exception):
'''error raised when the hierarchy has too many conformations of input
after the hydroxyls have been rotated.'''
def __init__(self, confs, sets, coords):
self.confs = confs
self.coords = coords
self.sets = sets
def __str__(self):
return repr(self.confs) + ", " + repr(self.coords) + ", " + repr(self.sets)
def getConfs(self):
'''actually used to figure out how many sub-groups to split input confs'''
return self.confs
def getCoords(self):
'''actually used to figure out how many sub-groups to split input confs'''
return self.coords
def getSets(self):
'''actually used to figure out how many sub-groups to split input confs'''
return self.sets
class Hierarchy(object):
'''uses data from a mol2 file to make a hierarchy of conformations.'''
#the following constants are used when writing out the confs/groups and are
#based on the 80 character limit in fortran. yeah seriously.
#they might change if something serious happens but it is better that they
#are here than hardcoded several times later
#these are floats so that the division works
grGrPerLine = 17. #group -> group children per line in output
grCoPerLine = 9. #group -> conf
coCoPerLine = 9. #conf -> conf
coSePerLine = 8. #conf -> set
def __init__(self, mol2data, clashDecider, \
tolerance=0.001, verbose=False, timeit=False, \
limitset=9999999999, \
limitconf=9999999999, limitcoord=9999999999, solvdata=None):
'''takes a mol2data class as input. makes a hierarchy.'''
if solvdata is not None:
self.solvdata = solvdata
if timeit:
startTime = time.time()
#first step is to count the number of positions each atom has.
#the tolerance is taken into account here and only here.
totalCoords = len(mol2data.atomXyz) * len(mol2data.atomXyz[0])
if verbose:
print "total number of sets (complete confs):", len(mol2data.atomXyz)
if len(mol2data.atomXyz) > limitset: #quit now, way too many sets
raise TooBigError(None, len(mol2data.atomXyz), totalCoords)
if len(mol2data.atomXyz) > 50:
if verbose:
print "using faster count positions algorithm for large data"
self._countPositions(mol2data.atomXyz, tolerance, verbose)
else:
if verbose:
print "using default count positions algorithm for smaller data"
self._countPositionsFewPoints(mol2data.atomXyz, tolerance)
if timeit:
countTime = time.time()
print "time to count unique positions:", countTime-startTime
if verbose:
print "unique positions, atoms:", self.posCount, len(mol2data.atomXyz)
if totalCoords > limitcoord:
raise TooBigError(None, len(mol2data.atomXyz), totalCoords)
#this breaks out of the init stage, needs fewer confs to be passed in.
#the rigid component is the biggest set of bonded non-moving atoms
self._findRigidComponent(mol2data.atomBonds) #also uses self.posCount
if timeit:
rigidTime = time.time()
print "time to find rigid component:", rigidTime-countTime
if verbose:
print "rigid atoms, others:", self.rigidComponent, self.atomsNotAssigned
#new algorithm, find bonded atoms that move together, put in conformations
self._findRigidHeavy(mol2data.atomType)
self.heavyAtomNums = None
self._setHeavy(mol2data.atomType)
self._findConformations(mol2data.atomBonds, mol2data.atomXyz)
self._findSets() #puts conformations in sets
if timeit:
flexTime = time.time()
print "time to find flexible components:", flexTime-rigidTime
if verbose:
print "total number of confs:", self.confNums[-1]
if self.confNums[-1] > limitconf:
raise TooBigError(self.confNums[-1], len(mol2data.atomXyz), \
totalCoords)
#this breaks out of the init stage, needs fewer confs to be passed in.
#now want to actually put atom positions into hierarchy groups
self._assignCoords(mol2data.atomXyz)
if timeit:
assignCoordsTime = time.time()
print "time to assign coords:", assignCoordsTime-flexTime
self._identifyClashSetnums(clashDecider, mol2data)
if timeit:
afterClash = time.time()
print "time to identify clash sets:", afterClash-assignCoordsTime
if verbose:
print "number of broken/clashed sets:", len(self.brokenSets)
#the mol2data is needed during output so save it.
self.mol2data = mol2data
if timeit:
afterXyz = time.time()
print "time to identify conf atoms:", afterXyz - afterClash
self.clusters = None #used to detect if clustering/clouding was done
self._makeClouds() #highest level of ligand sampling
if timeit:
afterClouds = time.time()
print "time to make clouds:", afterClouds - afterXyz
def _countPositions(self, xyzData, tolerance, verbose=False):
'''for a list of list of xyz data, count the number of positions each
atom takes based on the tolerance and the distance. tolerance is compared
to the euclidean difference squared to determine if a position is equal.
actually uses a clustering algorithm and uses a unionfind data structure.'''
self.posCount = []
self.posClusters = [] #just save all the data since we made it
self.posClusterLists = [] #just save all the data since we made it
tolerance2 = tolerance ** 2. #square the tolerance since it is compared
for oneSet in xrange(len(xyzData[0])): #goes from 0 to atom count
#if verbose:
# print oneSet, " atom positions being calculated"
clusters = unionFind()
xyzList = []
for oneIndex in xrange(len(xyzData)): #0 to number of positions (mol2#s)
clusters.find(oneIndex) #initiate each position
xyzList.append(xyzData[oneIndex][oneSet])
bucket = buckets.Bucket3d(xyzList, tolerance) #constructor to make fast
bucket.getWithinCluster(clusters)
#for pointA, pointB in bucket.getWithin(clusters):
# clusters.union(pointA, pointB)
tempLists = clusters.toLists()
self.posCount.append(len(tempLists))
self.posClusters.append(clusters)
self.posClusterLists.append(tempLists)
def _countPositionsFewPoints(self, xyzData, tolerance):
'''for a list of list of xyz data, count the number of positions each
atom takes based on the tolerance and the distance. tolerance is compared
to the euclidean difference squared to determine if a position is equal.
actually uses a clustering algorithm and uses a unionfind data structure.'''
self.posCount = []
self.posClusters = [] #just save all the data since we made it
self.posClusterLists = [] #just save all the data since we made it
tolerance2 = tolerance ** 2. #square the tolerance since it is compared
for oneSet in xrange(len(xyzData[0])): #goes from 0 to atom count
clusters = unionFind()
xyzList = []
for oneIndex in xrange(len(xyzData)): #0 to number of positions (mol2#s)
clusters.find(oneIndex) #initiate each position
xyzList.append(xyzData[oneIndex][oneSet])
for oneIndex in xrange(len(xyzData)): #0 to positions
oneXyz = xyzList[oneIndex]
for twoIndex in xrange(oneIndex+1, len(xyzData)): #oneIndex to positions
if geometry_basic.distL2Squared3(oneXyz, xyzList[twoIndex]) \
< tolerance2:
clusters.union(oneIndex, twoIndex)
tempLists = clusters.toLists()
self.posCount.append(len(tempLists))
self.posClusters.append(clusters)
self.posClusterLists.append(tempLists)
def _findRigidComponent(self, atomBonds):
'''uses bond and position count information to find largest set of atoms
that don't move. this is the rigid component. set into self.rigidComponent
also find the complement of atomnums and the rigid component and set into
self.atomsNotAssigned for use later'''
clusters = unionFind()
for atomNum in xrange(len(self.posCount)):
if 1 == self.posCount[atomNum]:
for otherNum, bondType in atomBonds[atomNum]:
if 1 == self.posCount[otherNum]:
clusters.union(atomNum, otherNum)
maxSize = 0
maxCluster = None
clusterLists = clusters.toLists()
for clusterList in clusterLists:
if len(clusterList) > maxSize:
maxSize = len(clusterList)
maxCluster = clusterList
self.rigidComponent = maxCluster
self.atomsAssigned = set(self.rigidComponent)
self.atomsNotAssigned = set()
for atomNum in xrange(len(self.posCount)):
if atomNum not in self.rigidComponent:
self.atomsNotAssigned.add(atomNum)
def _findRigidHeavy(self, atomTypes):
'''counts the heavy atoms in the rigid component and puts in
self.heavyRigidCount'''
self.heavyRigidCount = 0
self.heavyRigidAtomNums = []
for atomNum in self.atomsAssigned:
if atomTypes[atomNum].find('H') == -1:
self.heavyRigidAtomNums.append(atomNum)
self.heavyRigidCount += 1
#print self.heavyRigidCount
def _setHeavy(self, atomTypes):
'''for all atoms, finds the heavy ones, put in self.heavyAtomNums, return'''
if self.heavyAtomNums is None: #only do this once, it never changes
self.heavyAtomNums = []
for atomNum in xrange(len(atomTypes)):
if atomTypes[atomNum].find('H') == -1:
self.heavyAtomNums.append(atomNum)
return self.heavyAtomNums
def _findConformations(self, atomBonds, xyzData):
'''uses bond and xyzs to figure out what sets of neighboring atoms move
together and assign them to conformations and assign each set a specific
bunch of conformations.'''
#self.rigidComponent is the list of atom numbers for the rigid comp
#self.atomsAssigned is the set of atom numbers for the rigid comp (@start)
#self.atomsNotAssigned is the rest of the atom numbers
self.confNums = [1] #rigid starts
self.confAtoms = {} #maps to atom numbers
self.confAtoms[1] = list(self.atomsAssigned)
self.confInput = {} #maps to the input xyz lists
self.confInput[1] = range(len(xyzData))
confClusters = {}
for atomNum in self.atomsNotAssigned:
for listInputs in self.posClusterLists[atomNum]:
tupleInputs = tuple(listInputs) #can't use lists as keys
if tupleInputs not in confClusters.keys():
confClusters[tupleInputs] = unionFind()
confClusters[tupleInputs].find(atomNum) #in case of singletons
for otherNum, bondType in atomBonds[atomNum]:
if listInputs in self.posClusterLists[otherNum]:
confClusters[tupleInputs].union(atomNum, otherNum)
for tupleInputs, clusters in confClusters.iteritems():
for atomLists in clusters.toLists():
#make a conf for each
thisConf = self.confNums[-1] + 1
self.confAtoms[thisConf] = atomLists
self.confInput[thisConf] = tupleInputs
self.confNums.append(thisConf)
#print self.confNums, self.confAtoms, self.confInput
#that's it, confs have been built
def _findSets(self):
'''puts conformations together into sets'''
self.setToConfs = {} #maps set numbers to conf lists
for confNum in self.confNums:
for tupleInput in self.confInput[confNum]:
if tupleInput not in self.setToConfs:
self.setToConfs[tupleInput] = []
self.setToConfs[tupleInput].append(confNum)
#print self.setToConfs
#self.setToConfs contains relevant mapping
def _assignCoords(self, xyzData):
'''for each conf (including rigid) find atom positions for each atom'''
self.outAtoms = 0 #counter to indicate how many there are
self.outAtomOrigAtom = {} #maps to original atom numbers from mol2
self.outAtomInputConf = {}
self.outAtomConfNum = {}
self.confNumAtomList = {}
for confNum in self.confNums:
self.confNumAtomList[confNum] = []
for atomNum in self.confAtoms[confNum]:
self.outAtoms += 1
globalAtomNum = self.outAtoms
self.outAtomOrigAtom[globalAtomNum] = atomNum
self.outAtomInputConf[globalAtomNum] = self.confInput[confNum][0]
self.outAtomConfNum[globalAtomNum] = confNum
self.confNumAtomList[confNum].append(globalAtomNum)
def _identifyClashSetnums(self, clashDecider, mol2data):
'''for each set decide if it is broken/clashed
and add it to the self.brokenConfs list if it is. clashDecider is a
clash.Clash object that figures out what a clash is. mol2data is the
mol2.Mol2 object that has atom type information and bondedTo method.'''
self.brokenSets = []
for aSet in self.setToConfs.keys():
if clashDecider.decide(mol2data, mol2data.atomXyz[aSet]):
#means there was a clash
self.brokenSets.append(aSet)
#otherwise we do nothing
def _initClusters(self, clusters):
'''initializes or reinitializes the clusters of conformations'''
self.clusters = {}
self.setNameRemap = {} #maps old sets to new names
self.setNameOutOrder = []
self.setNameFirst = {}
self.setNameLast = {}
curSetName = 1
for clusterIndex, cluster in enumerate(clusters): #save each cluster
self.clusters[clusterIndex] = tuple(cluster)
self.setNameFirst[clusterIndex] = curSetName
for setName in cluster:
self.setNameRemap[setName] = curSetName #map from old to new
self.setNameOutOrder.append(setName)
curSetName += 1 #advance counter
self.setNameLast[clusterIndex] = curSetName - 1 #doing inclusive
def _findAdditionalMatchSpheres(self, numSpheres=5, cutoff=2.5):
'''for each cluster, find a couple matching spheres for distant atoms
that are relatively localized in space.
data ends up in dict self.clusterSpheres.
numSpheres is the max# of spheres to add for each cluster. will not always
find as many as requested.
cutoff is used as a cutoff to decide
whether or not to add a sphere for that atom, mean pairwise dist?'''
atomDists = self.mol2data.distFromAtoms(self.rigidComponent) #useful
possibleAtoms = set(self.heavyAtomNums) #only heavy can be matching
possibleAtoms.difference_update(self.rigidComponent) #no need to repeat
possAtomDist = [] #useful for sorting by distance
for possibleAtom in possibleAtoms:
possAtomDist.append((possibleAtom, atomDists[possibleAtom]))
possAtomDist.sort(key=operator.itemgetter(1), reverse=True)
#use possAtomDist for each cluster now to find the best candidates
self.clusterSpheres = {} #indexed by clusterIndex just like self.clusters
for clusterIndex in self.clusters.keys():
cluster = self.clusters[clusterIndex] #cluster is a tuple of conformations
#print "cluster", cluster #debugging
self.clusterSpheres[clusterIndex] = []
for possibleAtom, atomDist in possAtomDist:
xyzPositions = self.mol2data.getXyzManyConfs(cluster, possibleAtom)
okayToAdd = False
if 1 == len(xyzPositions): #singleton cluster, definitely okay
okayToAdd = True
else:
longDist, meanDist = geometry_basic.longestAndMeanDist(xyzPositions)
if meanDist <= cutoff: #passes cutoff
okayToAdd = True
if okayToAdd: #either singleton or passes cutoff
avgPoint = geometry_basic.getAverage(xyzPositions)
self.clusterSpheres[clusterIndex].append((possibleAtom, avgPoint))
#print possibleAtom #debugging
if len(self.clusterSpheres[clusterIndex]) == numSpheres: #done
break #out of for loop, no need to go on
#print self.clusterSpheres[clusterIndex] #debugging
def _makeClouds(self):
'''highest level of hierachical ligand sampling, breaks the input
sets into a few clouds representing gross levels of similar conformations'''
atomDists = self.mol2data.distFromAtoms(self.rigidComponent)
#needs switched to divisive bisecting k-means clustering to be fast.
clusters = self.mol2data.divisiveClustering()
#printClusterHelper(clusters) #debug cluster assignments
self._initClusters(clusters)
#now that we have clusters, want to find additional matching spheres
#(with colors even though coloring is bad)
#data ends up in dict self.clusterSpheres
self._findAdditionalMatchSpheres()
def _colorWriter(self, outFile, mol2data):
'''writes the color table if it was changed from the default'''
if mol2data.colorConverter.colorInts != \
mol2data.colorConverter.colorIntsDefault: #if not default
colors = mol2data.colorConverter.colorInts.items()
colors.sort(key=operator.itemgetter(1))
for colorName, colorKey in colors:
outFile.write('T %2d %8s\n' % (colorKey, colorName))
def _allButSetWriter(self, outFile, mol2data, solvdata, setsTotal, \
clustersTotal=0):
'''writes the M A B X R and C lines'''
#now the molecule section, facts about the whole molecule, 5 lines
outFile.write('M %16s %9s %3d %3d %6d %6d %6d %6d %6d %6d\n' % ( \
mol2data.name[-16:], mol2data.protName[-9:], \
len(mol2data.atomNum), len(mol2data.bondStart), \
self.outAtoms, self.confNums[-1], setsTotal, \
self.heavyRigidCount, 5, clustersTotal))
#second molecule line, solvation and charge data
outFile.write('M %+9.4f %+10.3f %+10.3f %+10.3f %9.3f\n' % ( \
solvdata.totalCharge, solvdata.totalPolarSolv, \
solvdata.totalApolarSolv, solvdata.totalSolv, \
solvdata.totalSurface))
#smiles and long version of name
outFile.write('M %-76s\n' % (mol2data.smiles[-76:]))
outFile.write('M %-76s\n' % (mol2data.longname[-76:]))
#best dud energy, computed and put in later. idea is to store the best
#energy that can be found using the old DOCK/db methods and make sure
#we aren't totally missing the ball.
outFile.write('M %+10.4f\n' % 999.999)
#atom line, 1 per atom
for atomNum in xrange(len(mol2data.atomNum)):
outFile.write( \
'A %3d %-4s %-5s %2d %2d %+9.4f %+10.3f %+10.3f %+10.3f %9.3f\n' % \
(mol2data.atomNum[atomNum], mol2data.atomName[atomNum], \
mol2data.atomType[atomNum], \
mol2data.dockNum[atomNum], mol2data.colorNum[atomNum], \
solvdata.charge[atomNum], solvdata.polarSolv[atomNum], \
solvdata.apolarSolv[atomNum], solvdata.solv[atomNum], \
solvdata.surface[atomNum]))
#now all the bonds.
for bondNum in xrange(len(mol2data.bondStart)):
outFile.write('B %3d %3d %3d %-2s\n' % (mol2data.bondNum[bondNum], \
mol2data.bondStart[bondNum], mol2data.bondEnd[bondNum], \
mol2data.bondType[bondNum]))
#now all the coordinates. this section is complex to output since not
# all atoms*input coordinates are output.
for xyzNum in xrange(self.outAtoms):
xyzNum += 1 #1-index nonsense
atomNum = self.outAtomOrigAtom[xyzNum]
inputConfNum = self.outAtomInputConf[xyzNum]
confNum = self.outAtomConfNum[xyzNum]
xyz = self.mol2data.atomXyz[inputConfNum][atomNum]
#atomnum needs incremented by 1 to make it match up with the input atom#
outFile.write('X %9d %3d %6d %+9.4f %+9.4f %+9.4f\n' % (xyzNum, \
atomNum+1, confNum, xyz[0], xyz[1], xyz[2]))
#amazingly these coordinates are not converted to integers.
#rigid xyzs, or really just the ligand xyzs to be used for matching
self.rigidNumSeen = 0
for rigidNum in self.heavyRigidAtomNums:
self.rigidNumSeen += 1
atomColor = mol2data.colorNum[rigidNum]
xyz = self.mol2data.atomXyz[0][rigidNum]
outFile.write('R %6d %2d %+9.4f %+9.4f %+9.4f\n' % (self.rigidNumSeen, \
atomColor, xyz[0], xyz[1], xyz[2]))
#conformations...
for confNum in self.confNums:
coordStart = min(self.confNumAtomList[confNum])
coordEnd = max(self.confNumAtomList[confNum])
outFile.write('C %6d %9d %9d\n' % (confNum, coordStart, coordEnd))
def _setWriter(self, outFile, mol2data, solvdata):
'''writes the S lines. no more limit here.'''
#set conf list S
if self.clusters is not None: #if clusters weren't made
curSets = self.setToConfs.keys() #this order is fine
curSets.sort()
else:
curSets = self.setNameOutOrder
for outSetNum, curSet in enumerate(curSets): #all sets
if self.clusters is not None: # if clusters weren't made
outSetNum += 1 #1 index since it is fortran
else:
outSetNum = self.setNameRemap[curSet]
curConfs = self.setToConfs[curSet]
totalConfs = len(curConfs)
if 0 == totalConfs: #means there are no children, this shouldn't happen
print "set", curSet, "has no conformations in it.", curConfs
sys.exit(1)
else:
totalLines = int(math.ceil(totalConfs / self.coSePerLine))
lastLineLen = totalConfs % int(self.coSePerLine)
if 0 == lastLineLen:
lastLineLen += int(self.coSePerLine) #correct count when 0
#the first line that says how many more are coming and has data
inInput = 0 #mix-n-match
confEnergy = 999999.999
outHydro = 3 #mix-n-match
#this makes the confEnergy a mmff internal energy, ignoring hydroxyls
#that have been rotated for now.
confEnergy = mol2data.inputEnergy[curSet] - min(mol2data.inputEnergy)
outHydro = mol2data.inputHydrogens[curSet]
brokenSet = 0 #not broken
if curSet in self.brokenSets:
brokenSet = 1 #broken
outFile.write('S %6d %6d %3d %1d %1d %+11.3f\n' % \
(outSetNum, totalLines, totalConfs, brokenSet, \
outHydro, confEnergy))
fullLineFormat = 'S %6d %6d %1d'
for count in xrange(int(self.coSePerLine)):
fullLineFormat += ' %6d'
fullLineFormat += '\n'
for lineNum in xrange(totalLines - 1): #each full line
outData = [outSetNum, lineNum + 1, self.coSePerLine]
for count in xrange(int(self.coSePerLine)):
outData.append(curConfs[ \
lineNum * int(self.coSePerLine) + count])
outFile.write(fullLineFormat % tuple(outData))
#now write last line separately and carefully
partLineFormat = 'S %6d %6d %1d'
outData = [outSetNum, totalLines, lastLineLen]
for count in xrange(lastLineLen):
partLineFormat += ' %6d'
outData.append(curConfs[ \
(totalLines - 1) * int(self.coSePerLine) + count])
partLineFormat += '\n'
outFile.write(partLineFormat % tuple(outData))
def _cloudWriter(self, outFile, mol2data):
'''write the cloud data'''
self.cloudNumSeen = 0
for clusterId in self.clusters.keys():
outClusId = clusterId + 1
countSph = len(self.clusterSpheres[clusterId])
#next line gets around a bug produced when countSph is 0
maxSphCount = max(self.cloudNumSeen + countSph, self.cloudNumSeen + 1)
outFile.write('D %6d %6d %6d %3d %3d %3d\n' % (outClusId, \
self.setNameFirst[clusterId], self.setNameLast[clusterId], \
countSph, self.cloudNumSeen + 1, maxSphCount))
for matchAtom, matchXyz in self.clusterSpheres[clusterId]:
self.cloudNumSeen += 1 #advance counter
atomColor = mol2data.colorNum[matchAtom]
outFile.write('D %6d %2d %+9.4f %+9.4f %+9.4f\n' % (self.cloudNumSeen, \
atomColor, matchXyz[0], matchXyz[1], matchXyz[2]))
def write(self, db2gzFileName, verbose=False, timeit=False, \
limitset=9999999, writeMode='w'):
'''writes to the new db2 file format. already gzipped.
writeMode allows append instead of write(over)'''
try: #to open the file
outFile = gzip.GzipFile(db2gzFileName, writeMode)
try:
mol2data = self.mol2data
except AttributeError:
print 'mol2data missing when output stage encountered.(3)'
sys.exit(1)
try:
solvdata = self.solvdata
except AttributeError:
print 'solvdata missing when output stage encountered.(4)'
sys.exit(1)
#check if default colors changed, write if they have.
self._colorWriter(outFile, mol2data)
self._allButSetWriter(outFile, mol2data, solvdata, \
len(self.setToConfs.keys()), len(self.clusters))
self._setWriter(outFile, mol2data, solvdata)
if self.clusters is not None: #if makeclouds was run
self._cloudWriter(outFile, mol2data) #this sucks, have to only
#write clouds for sets that were written. need to rething huge hack
outFile.write('E\n') #write the E line here
outFile.close()
except IOError:
print "error opening output file", db2gzFileName
sys.exit(1)
if verbose:
print db2gzFileName + " file written out"
def writeMol2(self, mol2fileName, verbose=False, timeit=False, \
separateClusters=True):
'''writes multi-mol2 files instead of db2 files. useful for debugging
the clustering (or other procedures). each cluster can be written separately
and will be given a prefix of cluster.00001. etc'''
if self.clusters is None:
separateClusters=False #don't write non-existent clusters
if separateClusters:
currentCluster = self.clusters.keys()[0] + 1
currentPrefix = "cluster." + string.zfill(currentCluster, 5) + "."
currentName = currentPrefix + mol2fileName
else:
currentName = mol2fileName
try: #to open the file
outFile = open(currentName, 'w')
try:
mol2data = self.mol2data
except AttributeError:
print 'mol2data missing when output stage encountered.(1)'
sys.exit(1)
try:
solvdata = self.solvdata
except AttributeError:
print 'solvdata missing when output stage encountered.(2)'
sys.exit(1)
if self.clusters is not None: #if makeclouds was run
outFile.close() #close the open and empty file. stupid stupid hack.
for clusterId in self.clusters.keys():
currentCluster = clusterId + 1
currentPrefix = "cluster." + string.zfill(currentCluster, 5) + "."
currentName = currentPrefix + mol2fileName
outFile = open(currentName, 'w') #
outNums = []
for confNumber in xrange(self.setNameFirst[clusterId], \
self.setNameLast[clusterId] + 1):
outNum = self.setNameOutOrder[confNumber - 1] #hate hate 1-indexing for fortran
outNums.append(outNum)
self.mol2data.writeMol2File(outFile, outNums)
if verbose:
print currentName + " file written out"
outFile.close()
else:
self.mol2data.writeMol2File(outFile) #just write them all
outFile.close()
except IOError:
print "error opening output file", currentName
sys.exit(1)
if verbose:
print currentName + " file written out"
if -1 != string.find(sys.argv[0], "hierarchy.py"):
#nothing to do if called from commandline
pass