def writeModel(topology, positions, file=sys.stdout, modelIndex=1, keepIds=False): """Write out a model to a PDBx/mmCIF file. Parameters ---------- topology : Topology The Topology defining the model to write positions : list The list of atomic positions to write file : file=stdout A file to write the model to modelIndex : int=1 The model number of this frame keepIds : bool=False If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDBx/mmCIF format. Otherwise, the output file will be invalid. """ if len(list(topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') atomIndex = 1 posIndex = 0 for (chainIndex, chain) in enumerate(topology.chains()): if keepIds: chainName = chain.id else: chainName = chr(ord('A') + chainIndex % 26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if keepIds: resId = res.id else: resId = resIndex + 1 for atom in res.atoms(): coords = positions[posIndex] if atom.element is not None: symbol = atom.element.symbol else: symbol = '?' line = "ATOM %5d %-3s %-4s . %-4s %s ? %5s . %10.4f %10.4f %10.4f 0.0 0.0 ? ? ? ? ? . %5s %4s %s %4s %5d" print(line % (atomIndex, symbol, atom.name, res.name, chainName, resId, coords[0], coords[1], coords[2], resId, res.name, chainName, atom.name, modelIndex), file=file) posIndex += 1 atomIndex += 1
def isHbond(d, h, a): if norm(d - a) > 0.35 * nanometer: return False deltaDH = h - d deltaHA = a - h deltaDH /= norm(deltaDH) deltaHA /= norm(deltaHA) return acos(dot(deltaDH, deltaHA)) < 50 * degree
def isHbond(d, h, a): if norm(d-a) > 0.35*nanometer: return False deltaDH = h-d deltaHA = a-h deltaDH /= norm(deltaDH) deltaHA /= norm(deltaHA) return acos(dot(deltaDH, deltaHA)) < 50*degree
def writeModel(topology, positions, file=sys.stdout, modelIndex=None): """Write out a model to a PDB file. Parameters: - topology (Topology) The Topology defining the model to write - positions (list) The list of atomic positions to write - file (file=stdout) A file to write the model to - modelIndex (int=None) If not None, the model will be surrounded by MODEL/ENDMDL records with this index """ if len(list(topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') atomIndex = 1 posIndex = 0 if modelIndex is not None: print >> file, "MODEL %4d" % modelIndex for (chainIndex, chain) in enumerate(topology.chains()): chainName = chr(ord('A') + chainIndex % 26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if len(res.name) > 3: resName = res.name[:3] else: resName = res.name for atom in res.atoms(): if len(atom.name) < 4 and atom.name[:1].isalpha() and ( atom.element is None or len(atom.element.symbol) < 2): atomName = ' ' + atom.name elif len(atom.name) > 4: atomName = atom.name[:4] else: atomName = atom.name coords = positions[posIndex] if atom.element is not None: symbol = atom.element.symbol else: symbol = ' ' line = "ATOM %5d %-4s %3s %s%4d %s%s%s 1.00 0.00 %2s " % ( atomIndex % 100000, atomName, resName, chainName, (resIndex + 1) % 10000, _format_83(coords[0]), _format_83(coords[1]), _format_83(coords[2]), symbol) assert len(line) == 80, 'Fixed width overflow detected' print >> file, line posIndex += 1 atomIndex += 1 if resIndex == len(residues) - 1: print >> file, "TER %5d %3s %s%4d" % ( atomIndex, resName, chainName, resIndex + 1) atomIndex += 1 if modelIndex is not None: print >> file, "ENDMDL"
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters: - positions (list) The list of atomic positions to write - unitCellDimensions (Vec3=None) The dimensions of the crystallographic unit cell. - periodicBoxVectors (tuple of Vec3=None) The vectors defining the periodic box. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file # Update the header. self._modelCount += 1 file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write(struct.pack('<i', self._firstStep+self._modelCount*self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if periodicBoxVectors is not None: boxVectors = periodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit(nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2]))*nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. b_length = b_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. c_length = c_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. angle1 = math.sin(math.pi/2-gamma) angle2 = math.sin(math.pi/2-beta) angle3 = math.sin(math.pi/2-alpha) file.write(struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4*len(positions)) for i in range(3): file.write(length) data = array.array('f', (10*x[i] for x in positions)) data.tofile(file) file.write(length)
def writeModel(topology, positions, file=sys.stdout, modelIndex=None): """Write out a model to a PDB file. Parameters: - topology (Topology) The Topology defining the model to write - positions (list) The list of atomic positions to write - file (file=stdout) A file to write the model to - modelIndex (int=None) If not None, the model will be surrounded by MODEL/ENDMDL records with this index """ if len(list(topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') atomIndex = 1 posIndex = 0 if modelIndex is not None: print >>file, "MODEL %4d" % modelIndex for (chainIndex, chain) in enumerate(topology.chains()): chainName = chr(ord('A')+chainIndex%26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if len(res.name) > 3: resName = res.name[:3] else: resName = res.name for atom in res.atoms(): if len(atom.name) < 4 and atom.name[:1].isalpha() and (atom.element is None or len(atom.element.symbol) < 2): atomName = ' '+atom.name elif len(atom.name) > 4: atomName = atom.name[:4] else: atomName = atom.name coords = positions[posIndex] if atom.element is not None: symbol = atom.element.symbol else: symbol = ' ' line = "ATOM %5d %-4s %3s %s%4d %s%s%s 1.00 0.00 %2s " % ( atomIndex%100000, atomName, resName, chainName, (resIndex+1)%10000, _format_83(coords[0]), _format_83(coords[1]), _format_83(coords[2]), symbol) assert len(line) == 80, 'Fixed width overflow detected' print >>file, line posIndex += 1 atomIndex += 1 if resIndex == len(residues)-1: print >>file, "TER %5d %3s %s%4d" % (atomIndex, resName, chainName, resIndex+1) atomIndex += 1 if modelIndex is not None: print >>file, "ENDMDL"
def writeModel(topology, positions, file=sys.stdout, modelIndex=1, keepIds=False): """Write out a model to a PDBx/mmCIF file. Parameters ---------- topology : Topology The Topology defining the model to write positions : list The list of atomic positions to write file : file=stdout A file to write the model to modelIndex : int=1 The model number of this frame keepIds : bool=False If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDBx/mmCIF format. Otherwise, the output file will be invalid. """ if len(list(topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') atomIndex = 1 posIndex = 0 for (chainIndex, chain) in enumerate(topology.chains()): if keepIds: chainName = chain.id else: chainName = chr(ord('A')+chainIndex%26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if keepIds: resId = res.id resIC = (res.insertionCode if len(res.insertionCode) > 0 else '.') else: resId = resIndex + 1 resIC = '.' for atom in res.atoms(): coords = positions[posIndex] if atom.element is not None: symbol = atom.element.symbol else: symbol = '?' line = "ATOM %5d %-3s %-4s . %-4s %s ? %5s %s %10.4f %10.4f %10.4f 0.0 0.0 ? ? ? ? ? . %5s %4s %s %4s %5d" print(line % (atomIndex, symbol, atom.name, res.name, chainName, resId, resIC, coords[0], coords[1], coords[2], resId, res.name, chainName, atom.name, modelIndex), file=file) posIndex += 1 atomIndex += 1
def _findUnoccupiedDirection(point, positions): """Given a point in space and a list of atom positions, find the direction in which the local density of atoms is lowest.""" point = point.value_in_unit(unit.nanometers) direction = mm.Vec3(0, 0, 0) for pos in positions.value_in_unit(unit.nanometers): delta = pos-point distance = unit.norm(delta) if distance > 0.1: distance2 = distance*distance direction -= delta/(distance2*distance2) direction /= unit.norm(direction) return direction
def writeModel(topology, positions, file=sys.stdout): """Write out a model to a PDB file. Parameters ---------- topology : Topology The Topology defining the model to write positions : list The list of atomic positions to write file : file=stdout A file to write the model to modelIndex : int=None If not None, the model will be surrounded by MODEL/ENDMDL records with this index keepIds : bool=False If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDB format. No guarantees are made about what will happen if they are not, and the output file could be invalid. extraParticleIdentifier : string=' ' String to write in the element column of the ATOM records for atoms whose element is None (extra particles) """ if len(list(topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometer) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') print('%i' % len(positions), file=file) atomIndex = 1 for (chainIndex, chain) in enumerate(topology.chains()): residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): resName = res.name[:4] resId = res.id for atom in res.atoms(): atomName = atom.name[:5] coords = positions[atomIndex - 1] line = '%5i%5s%5s%5i%8.3f%8.3f%8.3f' % ( int(resId), resName, atomName, atomIndex, coords[0], coords[1], coords[2]) print(line, file=file) atomIndex += 1
def computeLengthsAndAngles(periodicBoxVectors): """Convert periodic box vectors to lengths and angles. Lengths are returned in nanometers and angles in radians. """ if is_quantity(periodicBoxVectors): (a, b, c) = periodicBoxVectors.value_in_unit(nanometers) else: a, b, c = periodicBoxVectors a_length = norm(a) b_length = norm(b) c_length = norm(c) alpha = math.acos(dot(b, c) / (b_length * c_length)) beta = math.acos(dot(c, a) / (c_length * a_length)) gamma = math.acos(dot(a, b) / (a_length * b_length)) return (a_length, b_length, c_length, alpha, beta, gamma)
def computeLengthsAndAngles(periodicBoxVectors): """Convert periodic box vectors to lengths and angles. Lengths are returned in nanometers and angles in radians. """ if is_quantity(periodicBoxVectors): (a, b, c) = periodicBoxVectors.value_in_unit(nanometers) else: a, b, c = periodicBoxVectors a_length = norm(a) b_length = norm(b) c_length = norm(c) alpha = math.acos(dot(b, c)/(b_length*c_length)) beta = math.acos(dot(c, a)/(c_length*a_length)) gamma = math.acos(dot(a, b)/(a_length*b_length)) return (a_length, b_length, c_length, alpha, beta, gamma)
def writeModel(self, positions, unitCellDimensions=None): """Write out a model to the DCD file. Parameters: - positions (list) The list of atomic positions to write - unitCellDimensions (Vec3=None) The dimensions of the crystallographic unit cell. If None, the dimensions specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file # Update the header. self._modelCount += 1 file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write( struct.pack('<i', self._firstStep + self._modelCount * self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxSize = self._topology.getUnitCellDimensions() if boxSize is not None: if unitCellDimensions is not None: boxSize = unitCellDimensions size = boxSize.value_in_unit(angstroms) file.write( struct.pack('<i6di', 48, size[0], 0, size[1], 0, 0, size[2], 48)) length = struct.pack('<i', 4 * len(positions)) for i in range(3): file.write(length) data = array.array('f', (10 * x[i] for x in positions)) data.tofile(file) file.write(length)
def avgAbsCaDeviation(contacts, positions): dev = [] for i in xrange(len(contacts)): a1, a2, d = contacts[i] distance = float( str(unit.norm(positions[a1] - positions[a2])).split()[0]) #print (distance) dev.append(np.abs(distance - d)) return np.mean(dev)
def addHarmonicConstraint(harmonicforce, pairlist, positions, threshold, k): """ add harmonic bonds between pairs if distance is smaller than threshold """ print('Constraint force constant =', k) for i,j in pairlist: distance = unit.norm( positions[i]-positions[j] ) if distance<threshold: harmonicforce.addBond( i,j, distance.value_in_unit(unit.nanometer), k.value_in_unit( unit.kilojoule/unit.nanometer**2/unit.mole )) print("added harmonic bond between", i, j, 'with distance',distance)
def writeModel(self, positions, unitCellDimensions=None): """Write out a model to the DCD file. Parameters: - positions (list) The list of atomic positions to write - unitCellDimensions (Vec3=None) The dimensions of the crystallographic unit cell. If None, the dimensions specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file # Update the header. self._modelCount += 1 file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write(struct.pack('<i', self._firstStep+self._modelCount*self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxSize = self._topology.getUnitCellDimensions() if boxSize is not None: if unitCellDimensions is not None: boxSize = unitCellDimensions size = boxSize.value_in_unit(angstroms) file.write(struct.pack('<i6di', 48, size[0], 0, size[1], 0, 0, size[2], 48)) length = struct.pack('<i', 4*len(positions)) for i in range(3): file.write(length) data = array.array('f', (10*x[i] for x in positions)) data.tofile(file) file.write(length)
def writeHeader(topology, file=sys.stdout): """Write out the header for a PDB file. Parameters: - topology (Topology) The Topology defining the molecular system being written - file (file=stdout) A file to write the file to """ print >> file, "REMARK 1 CREATED WITH OPENMM %s, %s" % ( Platform.getOpenMMVersion(), str(date.today())) vectors = topology.getPeriodicBoxVectors() if vectors is not None: (a, b, c) = vectors.value_in_unit(angstroms) a_length = norm(a) b_length = norm(b) c_length = norm(c) alpha = math.acos(dot(b, c) / (b_length * c_length)) * 180.0 / math.pi beta = math.acos(dot(c, a) / (c_length * a_length)) * 180.0 / math.pi gamma = math.acos(dot(a, b) / (a_length * b_length)) * 180.0 / math.pi print >> file, "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 " % ( a_length, b_length, c_length, alpha, beta, gamma)
def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosition, loopDirection, orientTo, newAtoms, newPositions, firstIndex): """Add a series of residues to a chain.""" orientToPositions = dict((atom.name, self.positions[atom.index]) for atom in orientTo.atoms()) if loopDirection is None: loopDirection = mm.Vec3(0, 0, 0) # We'll add the residues in an arc connecting the endpoints. Figure out the height of that arc. length = unit.norm(endPosition-startPosition) numResidues = len(residueNames) if length > numResidues*0.3*unit.nanometers: loopHeight = 0*unit.nanometers else: loopHeight = (numResidues*0.3*unit.nanometers-length)/2 # Add the residues. for i, residueName in enumerate(residueNames): template = self.templates[residueName] # Find a translation that best matches the adjacent residue. points1 = [] points2 = [] for atom in template.topology.atoms(): if atom.name in orientToPositions: points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer)) points2.append(template.positions[atom.index].value_in_unit(unit.nanometer)) (translate2, rotate, translate1) = _overlayPoints(points1, points2) # Create the new residue. newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000)) fraction = (i+1.0)/(numResidues+1.0) translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection templateAtoms = list(template.topology.atoms()) if newResidue == next(chain.residues()): templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')] for atom in templateAtoms: newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue) newAtoms.append(newAtom) templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate)
def testUnitMathModule(self): """ Tests the unit_math functions on Quantity objects """ self.assertEqual(u.sqrt(1.0*u.kilogram*u.joule), 1.0*u.kilogram*u.meter/u.second) self.assertEqual(u.sqrt(1.0*u.kilogram*u.calorie), math.sqrt(4.184)*u.kilogram*u.meter/u.second) self.assertEqual(u.sqrt(9), 3) # Test on a scalar self.assertEqual(u.sin(90*u.degrees), 1) self.assertEqual(u.sin(math.pi/2*u.radians), 1) self.assertEqual(u.sin(math.pi/2), 1) self.assertEqual(u.cos(180*u.degrees), -1) self.assertEqual(u.cos(math.pi*u.radians), -1) self.assertEqual(u.cos(math.pi), -1) self.assertAlmostEqual(u.tan(45*u.degrees), 1) self.assertAlmostEqual(u.tan(math.pi/4*u.radians), 1) self.assertAlmostEqual(u.tan(math.pi/4), 1) acos = u.acos(1.0) asin = u.asin(1.0) atan = u.atan(1.0) self.assertTrue(u.is_quantity(acos)) self.assertTrue(u.is_quantity(asin)) self.assertTrue(u.is_quantity(atan)) self.assertEqual(acos.unit, u.radians) self.assertEqual(asin.unit, u.radians) self.assertEqual(atan.unit, u.radians) self.assertEqual(acos.value_in_unit(u.degrees), 0) self.assertEqual(acos / u.radians, 0) self.assertEqual(asin.value_in_unit(u.degrees), 90) self.assertEqual(asin / u.radians, math.pi/2) self.assertAlmostEqual(atan.value_in_unit(u.degrees), 45) self.assertAlmostEqual(atan / u.radians, math.pi/4) # Check some sequence maths seq = [1, 2, 3, 4] * u.meters self.assertEqual(u.sum(seq), 10*u.meters) self.assertEqual(u.dot(seq, seq), (1+4+9+16)*u.meters**2) self.assertEqual(u.norm(seq), math.sqrt(30)*u.meters)
def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): """Create a new Topology in which missing atoms have been added. Parameters ---------- heavyAtomsOnly : bool If True, only heavy atoms will be added to the topology. omitUnknownMolecules : bool If True, unknown molecules will be omitted from the topology. Returns ------- newTopology : simtk.openmm.app.Topology A new Topology object containing atoms from the old. newPositions : list of simtk.unit.Quantity with units compatible with nanometers Atom positions for the new Topology object. newAtoms : simtk.openmm.app.Topology.Atom New atom objects. existingAtomMap : dict Mapping from old atoms to new atoms. """ newTopology = app.Topology() newPositions = []*unit.nanometer newAtoms = [] existingAtomMap = {} addedAtomMap = {} addedOXT = [] residueCenters = [self._computeResidueCenter(res).value_in_unit(unit.nanometers) for res in self.topology.residues()]*unit.nanometers for chain in self.topology.chains(): if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()): continue chainResidues = list(chain.residues()) newChain = newTopology.addChain(chain.id) for indexInChain, residue in enumerate(chain.residues()): # Insert missing residues here. if (chain.index, indexInChain) in self.missingResidues: insertHere = self.missingResidues[(chain.index, indexInChain)] endPosition = self._computeResidueCenter(residue) if indexInChain > 0: startPosition = self._computeResidueCenter(chainResidues[indexInChain-1]) loopDirection = _findUnoccupiedDirection((startPosition+endPosition)/2, residueCenters) else: outward = _findUnoccupiedDirection(endPosition, residueCenters)*unit.nanometers norm = unit.norm(outward) if norm > 0*unit.nanometer: outward *= len(insertHere)*0.5*unit.nanometer/norm startPosition = endPosition+outward loopDirection = None firstIndex = int(residue.id)-len(insertHere) self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, loopDirection, residue, newAtoms, newPositions, firstIndex) # Create the new residue and add existing heavy atoms. newResidue = newTopology.addResidue(residue.name, newChain, residue.id) addResiduesAfter = (residue == chainResidues[-1] and (chain.index, indexInChain+1) in self.missingResidues) for atom in residue.atoms(): if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): if atom.name == 'OXT' and (chain.index, indexInChain+1) in self.missingResidues: continue # Remove terminal oxygen, since we'll add more residues after this one newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) existingAtomMap[atom] = newAtom newPositions.append(self.positions[atom.index]) if residue in self.missingAtoms: # Find corresponding atoms in the residue and the template. template = self.templates[residue.name] atomPositions = dict((atom.name, self.positions[atom.index]) for atom in residue.atoms()) points1 = [] points2 = [] for atom in template.topology.atoms(): if atom.name in atomPositions: points1.append(atomPositions[atom.name].value_in_unit(unit.nanometer)) points2.append(template.positions[atom.index].value_in_unit(unit.nanometer)) # Compute the optimal transform to overlay them. (translate2, rotate, translate1) = _overlayPoints(points1, points2) # Add the missing atoms. addedAtomMap[residue] = {} for atom in self.missingAtoms[residue]: newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtoms.append(newAtom) addedAtomMap[residue][atom] = newAtom templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer) if residue in self.missingTerminals: terminalsToAdd = self.missingTerminals[residue] else: terminalsToAdd = None # If this is the end of the chain, add any missing residues that come after it. if residue == chainResidues[-1] and (chain.index, indexInChain+1) in self.missingResidues: insertHere = self.missingResidues[(chain.index, indexInChain+1)] if len(insertHere) > 0: startPosition = self._computeResidueCenter(residue) outward = _findUnoccupiedDirection(startPosition, residueCenters)*unit.nanometers norm = unit.norm(outward) if norm > 0*unit.nanometer: outward *= len(insertHere)*0.5*unit.nanometer/norm endPosition = startPosition+outward firstIndex = int(residue.id)+1 self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, None, residue, newAtoms, newPositions, firstIndex) newResidue = list(newChain.residues())[-1] if newResidue.name in proteinResidues: terminalsToAdd = ['OXT'] else: terminalsToAdd = None # If a terminal OXT is missing, add it. if terminalsToAdd is not None: atomPositions = dict((atom.name, newPositions[atom.index].value_in_unit(unit.nanometer)) for atom in newResidue.atoms()) if 'OXT' in terminalsToAdd: newAtom = newTopology.addAtom('OXT', oxygen, newResidue) newAtoms.append(newAtom) addedOXT.append(newAtom) d_ca_o = atomPositions['O']-atomPositions['CA'] d_ca_c = atomPositions['C']-atomPositions['CA'] d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c)) v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o) newPositions.append((atomPositions['O']+2*v)*unit.nanometer) newTopology.setUnitCellDimensions(self.topology.getUnitCellDimensions()) newTopology.createStandardBonds() newTopology.createDisulfideBonds(newPositions) # Return the results. return (newTopology, newPositions, newAtoms, existingAtomMap)
def addSolvent(self, forcefield, model='tip3p', boxSize=None, padding=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*molar): """Add solvent (both water and ions) to the model to fill a rectangular box. The algorithm works as follows: 1. Water molecules are added to fill the box. 2. Water molecules are removed if their distance to any solute atom is less than the sum of their van der Waals radii. 3. If the solute is charged, enough positive or negative ions are added to neutralize it. Each ion is added by randomly selecting a water molecule and replacing it with the ion. 4. Ion pairs are added to give the requested total ionic strength. The box size can be specified in three ways. First, you can explicitly give a box size to use. Alternatively, you can give a padding distance. The largest dimension of the solute (along the x, y, or z axis) is determined, and a cubic box of size (largest dimension)+2*padding is used. Finally, if neither a box size nor a padding distance is specified, the existing Topology's unit cell dimensions are used. Parameters: - forcefield (ForceField) the ForceField to use for determining van der Waals radii and atomic charges - model (string='tip3p') the water model to use. Supported values are 'tip3p', 'spce', 'tip4pew', and 'tip5p'. - boxSize (Vec3=None) the size of the box to fill with water - padding (distance=None) the padding distance to use - positiveIon (string='Na+') the type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+' - negativeIon (string='Cl-') the type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'. Be aware that not all force fields support all ion types. - ionicString (concentration=0*molar) the total concentration of ions (both positive and negative) to add. This does not include ions that are added to neutralize the system. """ # Pick a unit cell size. if boxSize is not None: if is_quantity(boxSize): boxSize = boxSize.value_in_unit(nanometer) box = Vec3(boxSize[0], boxSize[1], boxSize[2])*nanometer elif padding is not None: maxSize = max(max((pos[i] for pos in self.positions))-min((pos[i] for pos in self.positions)) for i in range(3)) box = (maxSize+2*padding)*Vec3(1, 1, 1) else: box = self.topology.getUnitCellDimensions() if box is None: raise ValueError('Neither the box size nor padding was specified, and the Topology does not define unit cell dimensions') box = box.value_in_unit(nanometer) invBox = Vec3(1.0/box[0], 1.0/box[1], 1.0/box[2]) # Identify the ion types. posIonElements = {'Cs+':elem.cesium, 'K+':elem.potassium, 'Li+':elem.lithium, 'Na+':elem.sodium, 'Rb+':elem.rubidium} negIonElements = {'Cl-':elem.chlorine, 'Br-':elem.bromine, 'F-':elem.fluorine, 'I-':elem.iodine} if positiveIon not in posIonElements: raise ValueError('Illegal value for positive ion: %s' % positiveIon) if negativeIon not in negIonElements: raise ValueError('Illegal value for negative ion: %s' % negativeIon) positiveElement = posIonElements[positiveIon] negativeElement = negIonElements[negativeIon] # Load the pre-equilibrated water box. vdwRadiusPerSigma = 0.5612310241546864907 if model == 'tip3p': waterRadius = 0.31507524065751241*vdwRadiusPerSigma elif model == 'spce': waterRadius = 0.31657195050398818*vdwRadiusPerSigma elif model == 'tip4pew': waterRadius = 0.315365*vdwRadiusPerSigma elif model == 'tip5p': waterRadius = 0.312*vdwRadiusPerSigma else: raise ValueError('Unknown water model: %s' % model) pdb = PDBFile(os.path.join(os.path.dirname(__file__), 'data', model+'.pdb')) pdbTopology = pdb.getTopology() pdbPositions = pdb.getPositions().value_in_unit(nanometer) pdbResidues = list(pdbTopology.residues()) pdbBoxSize = pdbTopology.getUnitCellDimensions().value_in_unit(nanometer) # Have the ForceField build a System for the solute from which we can determine van der Waals radii. system = forcefield.createSystem(self.topology) nonbonded = None for i in range(system.getNumForces()): if isinstance(system.getForce(i), NonbondedForce): nonbonded = system.getForce(i) if nonbonded is None: raise ValueError('The ForceField does not specify a NonbondedForce') cutoff = [nonbonded.getParticleParameters(i)[1].value_in_unit(nanometer)*vdwRadiusPerSigma+waterRadius for i in range(system.getNumParticles())] waterCutoff = waterRadius if len(cutoff) == 0: maxCutoff = waterCutoff else: maxCutoff = max(waterCutoff, max(cutoff)) # Copy the solute over. newTopology = Topology() newTopology.setUnitCellDimensions(box) newAtoms = {} newPositions = []*nanometer for chain in self.topology.chains(): newChain = newTopology.addChain() for residue in chain.residues(): newResidue = newTopology.addResidue(residue.name, newChain) for atom in residue.atoms(): newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtoms[atom] = newAtom newPositions.append(deepcopy(self.positions[atom.index])) for bond in self.topology.bonds(): newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) # Sort the solute atoms into cells for fast lookup. if len(self.positions) == 0: positions = [] else: positions = self.positions.value_in_unit(nanometer) cells = {} numCells = tuple((max(1, int(floor(box[i]/maxCutoff))) for i in range(3))) cellSize = tuple((box[i]/numCells[i] for i in range(3))) for i in range(len(positions)): cell = tuple((int(floor(positions[i][j]/cellSize[j]))%numCells[j] for j in range(3))) if cell in cells: cells[cell].append(i) else: cells[cell] = [i] # Create a generator that loops over atoms close to a position. def neighbors(pos): centralCell = tuple((int(floor(pos[i]/cellSize[i])) for i in range(3))) offsets = (-1, 0, 1) for i in offsets: for j in offsets: for k in offsets: cell = ((centralCell[0]+i+numCells[0])%numCells[0], (centralCell[1]+j+numCells[1])%numCells[1], (centralCell[2]+k+numCells[2])%numCells[2]) if cell in cells: for atom in cells[cell]: yield atom # Define a function to compute the distance between two points, taking periodic boundary conditions into account. def periodicDistance(pos1, pos2): delta = pos1-pos2 delta = [delta[i]-floor(delta[i]*invBox[i]+0.5)*box[i] for i in range(3)] return norm(delta) # Find the list of water molecules to add. newChain = newTopology.addChain() if len(positions) == 0: center = Vec3(0, 0, 0) else: center = [(max((pos[i] for pos in positions))+min((pos[i] for pos in positions)))/2 for i in range(3)] center = Vec3(center[0], center[1], center[2]) numBoxes = [int(ceil(box[i]/pdbBoxSize[i])) for i in range(3)] addedWaters = [] for boxx in range(numBoxes[0]): for boxy in range(numBoxes[1]): for boxz in range(numBoxes[2]): offset = Vec3(boxx*pdbBoxSize[0], boxy*pdbBoxSize[1], boxz*pdbBoxSize[2]) for residue in pdbResidues: oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0] atomPos = pdbPositions[oxygen.index]+offset if not any((atomPos[i] > box[i] for i in range(3))): # This molecule is inside the box, so see how close to it is to the solute. atomPos += center-box/2 for i in neighbors(atomPos): if periodicDistance(atomPos, positions[i]) < cutoff[i]: break else: # Record this water molecule as one to add. addedWaters.append((residue.index, atomPos)) # There could be clashes between water molecules at the box edges. Find ones to remove. upperCutoff = center+box/2-Vec3(waterCutoff, waterCutoff, waterCutoff) lowerCutoff = center-box/2+Vec3(waterCutoff, waterCutoff, waterCutoff) lowerSkinPositions = [pos for index, pos in addedWaters if pos[0] < lowerCutoff[0] or pos[1] < lowerCutoff[1] or pos[2] < lowerCutoff[2]] filteredWaters = [] cells = {} for i in range(len(lowerSkinPositions)): cell = tuple((int(floor(lowerSkinPositions[i][j]/cellSize[j]))%numCells[j] for j in range(3))) if cell in cells: cells[cell].append(i) else: cells[cell] = [i] for entry in addedWaters: pos = entry[1] if pos[0] < upperCutoff[0] and pos[1] < upperCutoff[1] and pos[2] < upperCutoff[2]: filteredWaters.append(entry) else: if not any((periodicDistance(lowerSkinPositions[i], pos) < waterCutoff and norm(lowerSkinPositions[i]-pos) > waterCutoff for i in neighbors(pos))): filteredWaters.append(entry) addedWaters = filteredWaters # Add ions to neutralize the system. totalCharge = int(floor(0.5+sum((nonbonded.getParticleParameters(i)[0].value_in_unit(elementary_charge) for i in range(system.getNumParticles()))))) if abs(totalCharge) > len(addedWaters): raise Exception('Cannot neutralize the system because the charge is greater than the number of available positions for ions') def addIon(element): # Replace a water by an ion. index = random.randint(0, len(addedWaters)-1) newResidue = newTopology.addResidue(element.symbol.upper(), newChain) newTopology.addAtom(element.symbol, element, newResidue) newPositions.append(addedWaters[index][1]*nanometer) del addedWaters[index] for i in range(abs(totalCharge)): addIon(positiveElement if totalCharge < 0 else negativeElement) # Add ions based on the desired ionic strength. numIons = len(addedWaters)*ionicStrength/(55.4*molar) # Pure water is about 55.4 molar (depending on temperature) numPairs = int(floor(numIons/2+0.5)) for i in range(numPairs): addIon(positiveElement) for i in range(numPairs): addIon(negativeElement) # Add the water molecules. for index, pos in addedWaters: newResidue = newTopology.addResidue(residue.name, newChain) residue = pdbResidues[index] oxygen = [atom for atom in residue.atoms() if atom.element == elem.oxygen][0] oPos = pdbPositions[oxygen.index] molAtoms = [] for atom in residue.atoms(): molAtoms.append(newTopology.addAtom(atom.name, atom.element, newResidue)) newPositions.append((pos+pdbPositions[atom.index]-oPos)*nanometer) for atom1 in molAtoms: if atom1.element == elem.oxygen: for atom2 in molAtoms: if atom2.element == elem.hydrogen: newTopology.addBond(atom1, atom2) newTopology.setUnitCellDimensions(deepcopy(box)*nanometer) self.topology = newTopology self.positions = newPositions
def periodicDistance(pos1, pos2): delta = pos1-pos2 delta = [delta[i]-floor(delta[i]*invBox[i]+0.5)*box[i] for i in range(3)] return norm(delta)
def createRigidBodies(system, positions, bodies): """Modify a System to turn specified sets of particles into rigid bodies. For every rigid body, four particles are selected as "real" particles whose positions are integrated. Constraints are added between them to make them move as a rigid body. All other particles in the body are then turned into virtual sites whose positions are computed based on the "real" particles. Because virtual sites are massless, the mass properties of the rigid bodies will be slightly different from the corresponding sets of particles in the original system. The masses of the non-virtual particles are chosen to guarantee that the total mass and center of mass of each rigid body exactly match those of the original particles. The moment of inertia will be similar to that of the original particles, but not identical. Care is needed when using constraints, since virtual particles cannot participate in constraints. If the input system includes any constraints, this function will automatically remove ones that connect two particles in the same rigid body. But if there is a constraint beween a particle in a rigid body and another particle not in that body, it will likely lead to an exception when you try to create a context. Parameters: - system (System) the System to modify - positions (list) the positions of all particles in the system - bodies (list) each element of this list defines one rigid body. Each element should itself be a list of the indices of all particles that make up that rigid body. """ # Remove any constraints involving particles in rigid bodies. for i in range(system.getNumConstraints() - 1, -1, -1): p1, p2, distance = system.getConstraintParameters(i) if (any(p1 in body and p2 in body for body in bodies)): system.removeConstraint(i) # Loop over rigid bodies and process them. for particles in bodies: if len(particles) < 5: # All the particles will be "real" particles. realParticles = particles realParticleMasses = [system.getParticleMass(i) for i in particles] else: # Select four particles to use as the "real" particles. All others will be virtual sites. pos = [positions[i] for i in particles] mass = [system.getParticleMass(i) for i in particles] cm = unit.sum([p * m for p, m in zip(pos, mass)]) / unit.sum(mass) r = [p - cm for p in pos] avgR = unit.sqrt( unit.sum([unit.dot(x, x) for x in r]) / len(particles)) rank = sorted(range(len(particles)), key=lambda i: abs(unit.norm(r[i]) - avgR)) for p in combinations(rank, 4): # Select masses for the "real" particles. If any is negative, reject this set of particles # and keep going. matrix = np.zeros((4, 4)) for i in range(4): particleR = r[p[i]].value_in_unit(unit.nanometers) matrix[0][i] = particleR[0] matrix[1][i] = particleR[1] matrix[2][i] = particleR[2] matrix[3][i] = 1.0 rhs = np.array( [0.0, 0.0, 0.0, unit.sum(mass).value_in_unit(unit.amu)]) weights = lin.solve(matrix, rhs) if all(w > 0.0 for w in weights): # We have a good set of particles. realParticles = [particles[i] for i in p] realParticleMasses = [float(w) for w in weights] * unit.amu break # Set particle masses. for i, m in zip(realParticles, realParticleMasses): system.setParticleMass(i, m) # Add constraints between the real particles. for p1, p2 in combinations(realParticles, 2): distance = unit.norm(positions[p1] - positions[p2]) key = (min(p1, p2), max(p1, p2)) system.addConstraint(p1, p2, distance) # Select which three particles to use for defining virtual sites. bestNorm = 0 for p1, p2, p3 in combinations(realParticles, 3): d12 = (positions[p2] - positions[p1]).value_in_unit(unit.nanometer) d13 = (positions[p3] - positions[p1]).value_in_unit(unit.nanometer) crossNorm = unit.norm((d12[1] * d13[2] - d12[2] * d13[1], d12[2] * d13[0] - d12[0] * d13[2], d12[0] * d13[1] - d12[1] * d13[0])) if crossNorm > bestNorm: bestNorm = crossNorm vsiteParticles = (p1, p2, p3) # Create virtual sites. d12 = (positions[vsiteParticles[1]] - positions[vsiteParticles[0]]).value_in_unit(unit.nanometer) d13 = (positions[vsiteParticles[2]] - positions[vsiteParticles[0]]).value_in_unit(unit.nanometer) cross = mm.Vec3(d12[1] * d13[2] - d12[2] * d13[1], d12[2] * d13[0] - d12[0] * d13[2], d12[0] * d13[1] - d12[1] * d13[0]) matrix = np.zeros((3, 3)) for i in range(3): matrix[i][0] = d12[i] matrix[i][1] = d13[i] matrix[i][2] = cross[i] for i in particles: if i not in realParticles: system.setParticleMass(i, 0) rhs = np.array((positions[i] - positions[vsiteParticles[0]]).value_in_unit( unit.nanometer)) weights = lin.solve(matrix, rhs) system.setVirtualSite( i, mm.OutOfPlaneSite(vsiteParticles[0], vsiteParticles[1], vsiteParticles[2], weights[0], weights[1], weights[2]))
def writeModel(topology, positions, file=sys.stdout, modelIndex=None, keepIds=False, extraParticleIdentifier=' '): """Write out a model to a PDB file. Parameters ---------- topology : Topology The Topology defining the model to write positions : list The list of atomic positions to write file : file=stdout A file to write the model to modelIndex : int=None If not None, the model will be surrounded by MODEL/ENDMDL records with this index keepIds : bool=False If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDB format. No guarantees are made about what will happen if they are not, and the output file could be invalid. extraParticleIdentifier : string=' ' String to write in the element column of the ATOM records for atoms whose element is None (extra particles) """ if len(list(topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') nonHeterogens = PDBFile._standardResidues[:] nonHeterogens.remove('HOH') atomIndex = 1 posIndex = 0 if modelIndex is not None: print("MODEL %4d" % modelIndex, file=file) for (chainIndex, chain) in enumerate(topology.chains()): if keepIds and len(chain.id) == 1: chainName = chain.id else: chainName = chr(ord('A') + chainIndex % 26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if len(res.name) > 3: resName = res.name[:3] else: resName = res.name if keepIds and len(res.id) < 5: resId = res.id else: resId = "%4d" % ((resIndex + 1) % 10000) if len(res.insertionCode) == 1: resIC = res.insertionCode else: resIC = " " if res.name in nonHeterogens: recordName = "ATOM " else: recordName = "HETATM" for atom in res.atoms(): if atom.element is not None: symbol = atom.element.symbol else: symbol = extraParticleIdentifier if len(atom.name) < 4 and atom.name[:1].isalpha( ) and len(symbol) < 2: atomName = ' ' + atom.name elif len(atom.name) > 4: atomName = atom.name[:4] else: atomName = atom.name coords = positions[posIndex] line = "%s%5d %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % ( recordName, atomIndex % 100000, atomName, resName, chainName, resId, resIC, _format_83(coords[0]), _format_83(coords[1]), _format_83(coords[2]), symbol) if len(line) != 80: raise ValueError('Fixed width overflow detected') print(line, file=file) posIndex += 1 atomIndex += 1 if resIndex == len(residues) - 1: print("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId), file=file) atomIndex += 1 if modelIndex is not None: print("ENDMDL", file=file)
def addHydrogens(self, forcefield, pH=7.0, variants=None, platform=None): """Add missing hydrogens to the model. Some residues can exist in multiple forms depending on the pH and properties of the local environment. These variants differ in the presence or absence of particular hydrogens. In particular, the following variants are supported: Aspartic acid: ASH: Neutral form with a hydrogen on one of the delta oxygens ASP: Negatively charged form without a hydrogen on either delta oxygen Cysteine: CYS: Neutral form with a hydrogen on the sulfur CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond) Glutamic acid: GLH: Neutral form with a hydrogen on one of the epsilon oxygens GLU: Negatively charged form without a hydrogen on either epsilon oxygen Histidine: HID: Neutral form with a hydrogen on the ND1 atom HIE: Neutral form with a hydrogen on the NE2 atom HIP: Positively charged form with hydrogens on both ND1 and NE2 Lysine: LYN: Neutral form with two hydrogens on the zeta nitrogen LYS: Positively charged form with three hydrogens on the zeta nitrogen The variant to use for each residue is determined by the following rules: 1. The most common variant at the specified pH is selected. 2. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH. 3. For a neutral Histidine residue, the HID or HIE variant is selected based on which one forms a better hydrogen bond. You can override these rules by explicitly specifying a variant for any residue. Also keep in mind that this function will only add hydrogens. It will never remove ones that are already present in the model, regardless of the specified pH. Definitions for standard amino acids and nucleotides are built in. You can call loadHydrogenDefinitions() to load additional definitions for other residue types. Parameters: - forcefield (ForceField) the ForceField to use for determining the positions of hydrogens - pH (float=7.0) the pH based on which to select variants - variants (list=None) an optional list of variants to use. If this is specified, its length must equal the number of residues in the model. variants[i] is the name of the variant to use for residue i (indexed starting at 0). If an element is None, the standard rules will be followed to select a variant for that residue. - platform (Platform=None) the Platform to use when computing the hydrogen atom positions. If this is None, the default Platform will be used. Returns: a list of what variant was actually selected for each residue, in the same format as the variants parameter """ # Check the list of variants. residues = list(self.topology.residues()) if variants is not None: if len(variants) != len(residues): raise ValueError("The length of the variants list must equal the number of residues") else: variants = [None]*len(residues) actualVariants = [None]*len(residues) # Load the residue specifications. if not Modeller._hasLoadedStandardHydrogens: Modeller.loadHydrogenDefinitions(os.path.join(os.path.dirname(__file__), 'data', 'hydrogens.xml')) # Make a list of atoms bonded to each atom. bonded = {} for atom in self.topology.atoms(): bonded[atom] = [] for atom1, atom2 in self.topology.bonds(): bonded[atom1].append(atom2) bonded[atom2].append(atom1) # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria. def isHbond(d, h, a): if norm(d-a) > 0.35*nanometer: return False deltaDH = h-d deltaHA = a-h deltaDH /= norm(deltaDH) deltaHA /= norm(deltaHA) return acos(dot(deltaDH, deltaHA)) < 50*degree # Loop over residues. newTopology = Topology() newTopology.setUnitCellDimensions(deepcopy(self.topology.getUnitCellDimensions())) newAtoms = {} newPositions = []*nanometer newIndices = [] acceptors = [atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen)] for chain in self.topology.chains(): newChain = newTopology.addChain() for residue in chain.residues(): newResidue = newTopology.addResidue(residue.name, newChain) isNTerminal = (residue == chain._residues[0]) isCTerminal = (residue == chain._residues[-1]) if residue.name in Modeller._residueHydrogens: # Add hydrogens. First select which variant to use. spec = Modeller._residueHydrogens[residue.name] variant = variants[residue.index] if variant is None: if residue.name == 'CYS': # If this is part of a disulfide, use CYX. sulfur = [atom for atom in residue.atoms() if atom.element == elem.sulfur] if len(sulfur) == 1 and any((atom.residue != residue for atom in bonded[sulfur[0]])): variant = 'CYX' if residue.name == 'HIS' and pH > 6.5: # See if either nitrogen already has a hydrogen attached. nd1 = [atom for atom in residue.atoms() if atom.name == 'ND1'] ne2 = [atom for atom in residue.atoms() if atom.name == 'NE2'] if len(nd1) != 1 or len(ne2) != 1: raise ValueError('HIS residue (%d) has the wrong set of atoms' % residue.index) nd1 = nd1[0] ne2 = ne2[0] nd1HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[nd1])) ne2HasHydrogen = any((atom.element == elem.hydrogen for atom in bonded[ne2])) if nd1HasHydrogen and ne2HasHydrogen: variant = 'HIP' elif nd1HasHydrogen: variant = 'HID' elif ne2HasHydrogen: variant = 'HIE' else: # Estimate the hydrogen positions. nd1Pos = self.positions[nd1.index] ne2Pos = self.positions[ne2.index] hd1Delta = Vec3(0, 0, 0)*nanometer for other in bonded[nd1]: hd1Delta += nd1Pos-self.positions[other.index] hd1Delta *= 0.1*nanometer/norm(hd1Delta) hd1Pos = nd1Pos+hd1Delta he2Delta = Vec3(0, 0, 0)*nanometer for other in bonded[ne2]: he2Delta += ne2Pos-self.positions[other.index] he2Delta *= 0.1*nanometer/norm(he2Delta) he2Pos = ne2Pos+he2Delta # See whether either hydrogen would form a hydrogen bond. nd1IsBonded = False ne2IsBonded = False for acceptor in acceptors: if acceptor.residue != residue: acceptorPos = self.positions[acceptor.index] if isHbond(nd1Pos, hd1Pos, acceptorPos): nd1IsBonded = True break if isHbond(ne2Pos, he2Pos, acceptorPos): ne2IsBonded = True if ne2IsBonded and not nd1IsBonded: variant = 'HIE' else: variant = 'HID' elif residue.name == 'HIS': variant = 'HIP' if variant is not None and variant not in spec.variants: raise ValueError('Illegal variant for %s residue: %s' % (residue.name, variant)) actualVariants[residue.index] = variant # Make a list of hydrogens that should be present in the residue. parents = [atom for atom in residue.atoms() if atom.element != elem.hydrogen] parentNames = [atom.name for atom in parents] hydrogens = [h for h in spec.hydrogens if (variant is None and pH <= h.maxph) or (h.variants is None and pH <= h.maxph) or (h.variants is not None and variant in h.variants)] hydrogens = [h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == 'N') or (isCTerminal and h.terminal == 'C')] hydrogens = [h for h in hydrogens if h.parent in parentNames] # Loop over atoms in the residue, adding them to the new topology along with required hydrogens. for parent in residue.atoms(): # Add the atom. newAtom = newTopology.addAtom(parent.name, parent.element, newResidue) newAtoms[parent] = newAtom newPositions.append(deepcopy(self.positions[parent.index])) if parent in parents: # Match expected hydrogens with existing ones and find which ones need to be added. existing = [atom for atom in bonded[parent] if atom.element == elem.hydrogen] expected = [h for h in hydrogens if h.parent == parent.name] if len(existing) < len(expected): # Try to match up existing hydrogens to expected ones. matches = [] for e in existing: match = [h for h in expected if h.name == e.name] if len(match) > 0: matches.append(match[0]) expected.remove(match[0]) else: matches.append(None) # If any hydrogens couldn't be matched by name, just match them arbitrarily. for i in range(len(matches)): if matches[i] is None: matches[i] = expected[-1] expected.remove(expected[-1]) # Add the missing hydrogens. for h in expected: newH = newTopology.addAtom(h.name, elem.hydrogen, newResidue) newIndices.append(newH.index) delta = Vec3(0, 0, 0)*nanometer if len(bonded[parent]) > 0: for other in bonded[parent]: delta += self.positions[parent.index]-self.positions[other.index] else: delta = Vec3(random.random(), random.random(), random.random())*nanometer delta *= 0.1*nanometer/norm(delta) delta += 0.05*Vec3(random.random(), random.random(), random.random())*nanometer delta *= 0.1*nanometer/norm(delta) newPositions.append(self.positions[parent.index]+delta) newTopology.addBond(newAtom, newH) else: # Just copy over the residue. for atom in residue.atoms(): newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtoms[atom] = newAtom newPositions.append(deepcopy(self.positions[atom.index])) for bond in self.topology.bonds(): if bond[0] in newAtoms and bond[1] in newAtoms: newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) # The hydrogens were added at random positions. Now use the ForceField to fix them up. system = forcefield.createSystem(newTopology, rigidWater=False) atoms = list(newTopology.atoms()) for i in range(system.getNumParticles()): if atoms[i].element != elem.hydrogen: # This is a heavy atom, so make it immobile. system.setParticleMass(i, 0) if platform is None: context = Context(system, VerletIntegrator(0.0)) else: context = Context(system, VerletIntegrator(0.0), platform) context.setPositions(newPositions) LocalEnergyMinimizer.minimize(context) self.topology = newTopology self.positions = context.getState(getPositions=True).getPositions() return actualVariants
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters ---------- positions : list The list of atomic positions to write unitCellDimensions : Vec3=None The dimensions of the crystallographic unit cell. periodicBoxVectors : tuple of Vec3=None The vectors defining the periodic box. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file self._modelCount += 1 if self._interval > 1 and self._firstStep + self._modelCount * self._interval > 1 << 31: # This will exceed the range of a 32 bit integer. To avoid crashing or producing a corrupt file, # update the header to say the trajectory consisted of a smaller number of larger steps (so the # total trajectory length remains correct). self._firstStep //= self._interval self._dt *= self._interval self._interval = 1 file.seek(0, os.SEEK_SET) file.write( struct.pack('<i4c9if', 84, b'C', b'O', b'R', b'D', 0, self._firstStep, self._interval, 0, 0, 0, 0, 0, 0, self._dt)) # Update the header. file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write( struct.pack('<i', self._firstStep + self._modelCount * self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if periodicBoxVectors is not None: boxVectors = periodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit( nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2])) * nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. b_length = b_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. c_length = c_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. angle1 = math.sin(math.pi / 2 - gamma) angle2 = math.sin(math.pi / 2 - beta) angle3 = math.sin(math.pi / 2 - alpha) file.write( struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4 * len(positions)) for i in range(3): file.write(length) data = array.array('f', (10 * x[i] for x in positions)) data.tofile(file) file.write(length) try: file.flush() except AttributeError: pass
def addHydrogens(self, forcefield=None, pH=None, variants=None, platform=None): """Add missing hydrogens to the model. This function automatically changes compatible residues into their constant-pH variant if no variant is specified.: Aspartic acid: AS4: Form with a 2 hydrogens on each one of the delta oxygens (syn,anti) It has 5 titration states. Alternative: AS2: Has 2 hydrogens (syn, anti) on one of the delta oxygens It has 3 titration states. Cysteine: CYS: Neutral form with a hydrogen on the sulfur CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond) Glutamic acid: GL4: Form with a 2 hydrogens on each one of the epsilon oxygens (syn,anti) It has 5 titration states. Histidine: HIP: Positively charged form with hydrogens on both ND1 and NE2 It has 3 titration states. The variant to use for each residue is determined by the following rules: 1. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH. 2. Other residues are all set to maximally protonated state, which can be updated using a proton drive You can override these rules by explicitly specifying a variant for any residue. To do that, provide a list for the 'variants' parameter, and set the corresponding element to the name of the variant to use. A special case is when the model already contains a hydrogen that should not be present in the desired variant. If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the desired variant, removing hydrogens if necessary. On the other hand, for residues whose variant is selected automatically, this function will only add hydrogens. It will never remove ones that are already present in the model. Definitions for standard amino acids and nucleotides are built in. You can call loadHydrogenDefinitions() to load additional definitions for other residue types. Parameters ---------- forcefield : ForceField=None the ForceField to use for determining the positions of hydrogens. If this is None, positions will be picked which are generally reasonable but not optimized for any particular ForceField. pH : None, Kept for compatibility reasons. Has no effect. variants : list=None an optional list of variants to use. If this is specified, its length must equal the number of residues in the model. variants[i] is the name of the variant to use for residue i (indexed starting at 0). If an element is None, the standard rules will be followed to select a variant for that residue. platform : Platform=None the Platform to use when computing the hydrogen atom positions. If this is None, the default Platform will be used. Returns ------- list a list of what variant was actually selected for each residue, in the same format as the variants parameter Notes ----- This function does not use a pH specification. The argument is kept for compatibility reasons. """ # Check the list of variants. if pH is not None: print("Ignored pH argument provided for constant-pH residues.") residues = list(self.topology.residues()) if variants is not None: if len(variants) != len(residues): raise ValueError( "The length of the variants list must equal the number of residues" ) else: variants = [None] * len(residues) actualVariants = [None] * len(residues) # Load the residue specifications. if not Modeller._hasLoadedStandardHydrogens: Modeller.loadHydrogenDefinitions( os.path.join(os.path.dirname(__file__), "data", "hydrogens-amber10-constph.xml")) # Make a list of atoms bonded to each atom. bonded = {} for atom in self.topology.atoms(): bonded[atom] = [] for atom1, atom2 in self.topology.bonds(): bonded[atom1].append(atom2) bonded[atom2].append(atom1) # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria. def isHbond(d, h, a): if norm(d - a) > 0.35 * nanometer: return False deltaDH = h - d deltaHA = a - h deltaDH /= norm(deltaDH) deltaHA /= norm(deltaHA) return acos(dot(deltaDH, deltaHA)) < 50 * degree # Loop over residues. newTopology = Topology() newTopology.setPeriodicBoxVectors( self.topology.getPeriodicBoxVectors()) newAtoms = {} newPositions = [] * nanometer newIndices = [] acceptors = [ atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen) ] for chain in self.topology.chains(): newChain = newTopology.addChain(chain.id) for residue in chain.residues(): newResidue = newTopology.addResidue(residue.name, newChain, residue.id) isNTerminal = residue == chain._residues[0] isCTerminal = residue == chain._residues[-1] if residue.name in Modeller._residueHydrogens: # Add hydrogens. First select which variant to use. spec = Modeller._residueHydrogens[residue.name] variant = variants[residue.index] if variant is None: if residue.name == "CYS": # If this is part of a disulfide, use CYX. sulfur = [ atom for atom in residue.atoms() if atom.element == elem.sulfur ] if len(sulfur) == 1 and any( (atom.residue != residue for atom in bonded[sulfur[0]])): variant = "CYX" if residue.name == "HIS": variant = "HIP" if residue.name == "GLU": variant = "GL4" if residue.name == "ASP": variant = "AS4" if variant is not None and variant not in spec.variants: raise ValueError("Illegal variant for %s residue: %s" % (residue.name, variant)) actualVariants[residue.index] = variant removeExtraHydrogens = variants[residue.index] is not None # Make a list of hydrogens that should be present in the residue. parents = [ atom for atom in residue.atoms() if atom.element != elem.hydrogen ] parentNames = [atom.name for atom in parents] hydrogens = [ h for h in spec.hydrogens if (variant is None) or (h.variants is None) or ( h.variants is not None and variant in h.variants) ] hydrogens = [ h for h in hydrogens if h.terminal is None or ( isNTerminal and h.terminal == "N") or ( isCTerminal and h.terminal == "C") ] hydrogens = [ h for h in hydrogens if h.parent in parentNames ] # Loop over atoms in the residue, adding them to the new topology along with required hydrogens. for parent in residue.atoms(): # Check whether this is a hydrogen that should be removed. if (removeExtraHydrogens and parent.element == elem.hydrogen and not any(parent.name == h.name for h in hydrogens)): continue # Add the atom. newAtom = newTopology.addAtom(parent.name, parent.element, newResidue) newAtoms[parent] = newAtom newPositions.append( deepcopy(self.positions[parent.index])) if parent in parents: # Match expected hydrogens with existing ones and find which ones need to be added. existing = [ atom for atom in bonded[parent] if atom.element == elem.hydrogen ] expected = [ h for h in hydrogens if h.parent == parent.name ] if len(existing) < len(expected): # Try to match up existing hydrogens to expected ones. matches = [] for e in existing: match = [ h for h in expected if h.name == e.name ] if len(match) > 0: matches.append(match[0]) expected.remove(match[0]) else: matches.append(None) # If any hydrogens couldn't be matched by name, just match them arbitrarily. for i in range(len(matches)): if matches[i] is None: matches[i] = expected[-1] expected.remove(expected[-1]) # Add the missing hydrogens. for h in expected: newH = newTopology.addAtom( h.name, elem.hydrogen, newResidue) newIndices.append(newH.index) delta = Vec3(0, 0, 0) * nanometer if len(bonded[parent]) > 0: for other in bonded[parent]: delta += ( self.positions[parent.index] - self.positions[other.index]) else: delta = (Vec3( random.random(), random.random(), random.random(), ) * nanometer) delta *= 0.1 * nanometer / norm(delta) delta += (0.05 * Vec3( random.random(), random.random(), random.random(), ) * nanometer) delta *= 0.1 * nanometer / norm(delta) newPositions.append( self.positions[parent.index] + delta) newTopology.addBond(newAtom, newH) else: # Just copy over the residue. for atom in residue.atoms(): newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtoms[atom] = newAtom newPositions.append( deepcopy(self.positions[atom.index])) for bond in self.topology.bonds(): if bond[0] in newAtoms and bond[1] in newAtoms: newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) # The hydrogens were added at random positions. Now perform an energy minimization to fix them up. if forcefield is not None: # Use the ForceField the user specified. system = forcefield.createSystem(newTopology, rigidWater=False) atoms = list(newTopology.atoms()) for i in range(system.getNumParticles()): if atoms[i].element != elem.hydrogen: # This is a heavy atom, so make it immobile. system.setParticleMass(i, 0) else: # Create a System that restrains the distance of each hydrogen from its parent atom # and causes hydrogens to spread out evenly. system = System() nonbonded = CustomNonbondedForce("100/((r/0.1)^4+1)") bonds = HarmonicBondForce() angles = HarmonicAngleForce() system.addForce(nonbonded) system.addForce(bonds) system.addForce(angles) bondedTo = [] for atom in newTopology.atoms(): nonbonded.addParticle([]) if atom.element != elem.hydrogen: system.addParticle(0.0) else: system.addParticle(1.0) bondedTo.append([]) for atom1, atom2 in newTopology.bonds(): if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen: bonds.addBond(atom1.index, atom2.index, 0.1, 100_000.0) bondedTo[atom1.index].append(atom2) bondedTo[atom2.index].append(atom1) for residue in newTopology.residues(): if residue.name == "HOH": # Add an angle term to make the water geometry correct. atoms = list(residue.atoms()) oindex = [ i for i in range(len(atoms)) if atoms[i].element == elem.oxygen ] if len(atoms) == 3 and len(oindex) == 1: hindex = list(set([0, 1, 2]) - set(oindex)) angles.addAngle( atoms[hindex[0]].index, atoms[oindex[0]].index, atoms[hindex[1]].index, 1.824, 836.8, ) else: # Add angle terms for any hydroxyls. for atom in residue.atoms(): index = atom.index if (atom.element == elem.oxygen and len(bondedTo[index]) == 2 and elem.hydrogen in (a.element for a in bondedTo[index])): angles.addAngle( bondedTo[index][0].index, index, bondedTo[index][1].index, 1.894, 460.24, ) if platform is None: context = Context(system, VerletIntegrator(0.0)) else: context = Context(system, VerletIntegrator(0.0), platform) context.setPositions(newPositions) LocalEnergyMinimizer.minimize(context, 1.0, 50) self.topology = newTopology self.positions = context.getState(getPositions=True).getPositions() del context return actualVariants
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters: - positions (list) The list of atomic positions to write - unitCellDimensions (Vec3=None) The dimensions of the crystallographic unit cell. - periodicBoxVectors (tuple of Vec3=None) The vectors defining the periodic box. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file # Update the header. self._modelCount += 1 file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write( struct.pack('<i', self._firstStep + self._modelCount * self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if getPeriodicBoxVectors is not None: boxVectors = getPeriodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit( nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2])) * nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length.value_in_unit(angstroms) b_length = b_length.value_in_unit(angstroms) c_length = c_length.value_in_unit(angstroms) angle1 = math.sin(math.pi / 2 - gamma) angle2 = math.sin(math.pi / 2 - beta) angle3 = math.sin(math.pi / 2 - alpha) file.write( struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4 * len(positions)) for i in range(3): file.write(length) data = array.array('f', (10 * x[i] for x in positions)) data.tofile(file) file.write(length)
def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): """Create a new Topology in which missing atoms have been added.""" newTopology = app.Topology() newPositions = []*unit.nanometer newAtoms = [] existingAtomMap = {} addedAtomMap = {} addedOXT = [] for chain in self.topology.chains(): if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()): continue chainResidues = list(chain.residues()) newChain = newTopology.addChain() for indexInChain, residue in enumerate(chain.residues()): # Insert missing residues here. if (chain.index, indexInChain) in self.missingResidues: insertHere = self.missingResidues[(chain.index, indexInChain)] endPosition = self._computeResidueCenter(residue) if indexInChain > 0: startPosition = self._computeResidueCenter(chainResidues[indexInChain-1]) else: outward = endPosition-self.centroid norm = unit.norm(outward) if norm > 0*unit.nanometer: outward *= len(insertHere)*0.5*unit.nanometer/norm startPosition = endPosition+outward self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, residue, newAtoms, newPositions) # Create the new residue and add existing heavy atoms. newResidue = newTopology.addResidue(residue.name, newChain) addResiduesAfter = (residue == chainResidues[-1] and (chain.index, indexInChain+1) in self.missingResidues) for atom in residue.atoms(): if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): if atom.name == 'OXT' and (chain.index, indexInChain+1) in self.missingResidues: continue # Remove terminal oxygen, since we'll add more residues after this one newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) existingAtomMap[atom] = newAtom newPositions.append(self.positions[atom.index]) if residue in self.missingAtoms: # Find corresponding atoms in the residue and the template. template = self.templates[residue.name] atomPositions = dict((atom.name, self.positions[atom.index]) for atom in residue.atoms()) points1 = [] points2 = [] for atom in template.topology.atoms(): if atom.name in atomPositions: points1.append(atomPositions[atom.name].value_in_unit(unit.nanometer)) points2.append(template.positions[atom.index].value_in_unit(unit.nanometer)) # Compute the optimal transform to overlay them. (translate2, rotate, translate1) = _overlayPoints(points1, points2) # Add the missing atoms. addedAtomMap[residue] = {} for atom in self.missingAtoms[residue]: newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtoms.append(newAtom) addedAtomMap[residue][atom] = newAtom templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer) if residue in self.missingTerminals: terminalsToAdd = self.missingTerminals[residue] else: terminalsToAdd = None # If this is the end of the chain, add any missing residues that come after it. if residue == chainResidues[-1] and (chain.index, indexInChain+1) in self.missingResidues: insertHere = self.missingResidues[(chain.index, indexInChain+1)] if len(insertHere) > 0: startPosition = self._computeResidueCenter(residue) outward = startPosition-self.centroid norm = unit.norm(outward) if norm > 0*unit.nanometer: outward *= len(insertHere)*0.5*unit.nanometer/norm endPosition = startPosition+outward self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, residue, newAtoms, newPositions) newResidue = list(newChain.residues())[-1] if newResidue.name in proteinResidues: terminalsToAdd = ['OXT'] else: terminalsToAdd = None # If a terminal OXT is missing, add it. if terminalsToAdd is not None: atomPositions = dict((atom.name, newPositions[atom.index].value_in_unit(unit.nanometer)) for atom in newResidue.atoms()) if 'OXT' in terminalsToAdd: newAtom = newTopology.addAtom('OXT', oxygen, newResidue) newAtoms.append(newAtom) addedOXT.append(newAtom) d_ca_o = atomPositions['O']-atomPositions['CA'] d_ca_c = atomPositions['C']-atomPositions['CA'] d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c)) v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o) newPositions.append((atomPositions['O']+2*v)*unit.nanometer) newTopology.setUnitCellDimensions(self.topology.getUnitCellDimensions()) newTopology.createStandardBonds() newTopology.createDisulfideBonds(newPositions) # Return the results. return (newTopology, newPositions, newAtoms, existingAtomMap)
def writeModel(topology, positions, file=sys.stdout, modelIndex=None, keepIds=False): """Write out a model to a PDB file. Parameters: - topology (Topology) The Topology defining the model to write - positions (list) The list of atomic positions to write - file (file=stdout) A file to write the model to - modelIndex (int=None) If not None, the model will be surrounded by MODEL/ENDMDL records with this index - keepIds (bool=False) If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDB format. Otherwise, the output file will be invalid. """ if len(list(topology.atoms())) != len(positions): raise ValueError( 'The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') atomIndex = 1 posIndex = 0 if modelIndex is not None: print("MODEL %4d" % modelIndex, file=file) for (chainIndex, chain) in enumerate(topology.chains()): if keepIds: chainName = chain.id else: chainName = chr(ord('A') + chainIndex % 26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if len(res.name) > 3: resName = res.name[:3] else: resName = res.name if keepIds: resId = res.id else: resId = "%4d" % ((resIndex + 1) % 10000) for atom in res.atoms(): if len(atom.name) < 4 and atom.name[:1].isalpha() and ( atom.element is None or len(atom.element.symbol) < 2): atomName = ' ' + atom.name elif len(atom.name) > 4: atomName = atom.name[:4] else: atomName = atom.name coords = positions[posIndex] if atom.element is not None: symbol = atom.element.symbol else: symbol = ' ' line = "ATOM %5d %-4s %3s %s%4s %s%s%s 1.00 0.00 %2s " % ( atomIndex % 100000, atomName, resName, chainName, resId, _format_83(coords[0]), _format_83( coords[1]), _format_83(coords[2]), symbol) assert len(line) == 80, 'Fixed width overflow detected' print(line, file=file) posIndex += 1 atomIndex += 1 if resIndex == len(residues) - 1: print("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId), file=file) atomIndex += 1 if modelIndex is not None: print("ENDMDL", file=file)
def addHydrogens(self, forcefield=None, pH=None, variants=None, platform=None): """Add missing hydrogens to the model. This function automatically changes compatible residues into their constant-pH variant if no variant is specified.: Aspartic acid: AS4: Form with a 2 hydrogens on each one of the delta oxygens (syn,anti) It has 5 titration states. Alternative: AS2: Has 2 hydrogens (syn, anti) on one of the delta oxygens It has 3 titration states. Cysteine: CYS: Neutral form with a hydrogen on the sulfur CYX: No hydrogen on the sulfur (either negatively charged, or part of a disulfide bond) Glutamic acid: GL4: Form with a 2 hydrogens on each one of the epsilon oxygens (syn,anti) It has 5 titration states. Histidine: HIP: Positively charged form with hydrogens on both ND1 and NE2 It has 3 titration states. The variant to use for each residue is determined by the following rules: 1. Any Cysteine that participates in a disulfide bond uses the CYX variant regardless of pH. 2. Other residues are all set to maximally protonated state, which can be updated using a proton drive You can override these rules by explicitly specifying a variant for any residue. To do that, provide a list for the 'variants' parameter, and set the corresponding element to the name of the variant to use. A special case is when the model already contains a hydrogen that should not be present in the desired variant. If you explicitly specify a variant using the 'variants' parameter, the residue will be modified to match the desired variant, removing hydrogens if necessary. On the other hand, for residues whose variant is selected automatically, this function will only add hydrogens. It will never remove ones that are already present in the model. Definitions for standard amino acids and nucleotides are built in. You can call loadHydrogenDefinitions() to load additional definitions for other residue types. Parameters ---------- forcefield : ForceField=None the ForceField to use for determining the positions of hydrogens. If this is None, positions will be picked which are generally reasonable but not optimized for any particular ForceField. pH : None, Kept for compatibility reasons. Has no effect. variants : list=None an optional list of variants to use. If this is specified, its length must equal the number of residues in the model. variants[i] is the name of the variant to use for residue i (indexed starting at 0). If an element is None, the standard rules will be followed to select a variant for that residue. platform : Platform=None the Platform to use when computing the hydrogen atom positions. If this is None, the default Platform will be used. Returns ------- list a list of what variant was actually selected for each residue, in the same format as the variants parameter Notes ----- This function does not use a pH specification. The argument is kept for compatibility reasons. """ # Check the list of variants. if pH is not None: print("Ignored pH argument provided for constant-pH residues.") residues = list(self.topology.residues()) if variants is not None: if len(variants) != len(residues): raise ValueError( "The length of the variants list must equal the number of residues" ) else: variants = [None] * len(residues) actualVariants = [None] * len(residues) # Load the residue specifications. if not Modeller._hasLoadedStandardHydrogens: Modeller.loadHydrogenDefinitions( os.path.join( os.path.dirname(__file__), "data", "hydrogens-amber10-constph.xml" ) ) # Make a list of atoms bonded to each atom. bonded = {} for atom in self.topology.atoms(): bonded[atom] = [] for atom1, atom2 in self.topology.bonds(): bonded[atom1].append(atom2) bonded[atom2].append(atom1) # Define a function that decides whether a set of atoms form a hydrogen bond, using fairly tolerant criteria. def isHbond(d, h, a): if norm(d - a) > 0.35 * nanometer: return False deltaDH = h - d deltaHA = a - h deltaDH /= norm(deltaDH) deltaHA /= norm(deltaHA) return acos(dot(deltaDH, deltaHA)) < 50 * degree # Loop over residues. newTopology = Topology() newTopology.setPeriodicBoxVectors(self.topology.getPeriodicBoxVectors()) newAtoms = {} newPositions = [] * nanometer newIndices = [] acceptors = [ atom for atom in self.topology.atoms() if atom.element in (elem.oxygen, elem.nitrogen) ] for chain in self.topology.chains(): newChain = newTopology.addChain(chain.id) for residue in chain.residues(): newResidue = newTopology.addResidue(residue.name, newChain, residue.id) isNTerminal = residue == chain._residues[0] isCTerminal = residue == chain._residues[-1] if residue.name in Modeller._residueHydrogens: # Add hydrogens. First select which variant to use. spec = Modeller._residueHydrogens[residue.name] variant = variants[residue.index] if variant is None: if residue.name == "CYS": # If this is part of a disulfide, use CYX. sulfur = [ atom for atom in residue.atoms() if atom.element == elem.sulfur ] if len(sulfur) == 1 and any( (atom.residue != residue for atom in bonded[sulfur[0]]) ): variant = "CYX" if residue.name == "HIS": variant = "HIP" if residue.name == "GLU": variant = "GL4" if residue.name == "ASP": variant = "AS4" if variant is not None and variant not in spec.variants: raise ValueError( "Illegal variant for %s residue: %s" % (residue.name, variant) ) actualVariants[residue.index] = variant removeExtraHydrogens = variants[residue.index] is not None # Make a list of hydrogens that should be present in the residue. parents = [ atom for atom in residue.atoms() if atom.element != elem.hydrogen ] parentNames = [atom.name for atom in parents] hydrogens = [ h for h in spec.hydrogens if (variant is None) or (h.variants is None) or (h.variants is not None and variant in h.variants) ] hydrogens = [ h for h in hydrogens if h.terminal is None or (isNTerminal and h.terminal == "N") or (isCTerminal and h.terminal == "C") ] hydrogens = [h for h in hydrogens if h.parent in parentNames] # Loop over atoms in the residue, adding them to the new topology along with required hydrogens. for parent in residue.atoms(): # Check whether this is a hydrogen that should be removed. if ( removeExtraHydrogens and parent.element == elem.hydrogen and not any(parent.name == h.name for h in hydrogens) ): continue # Add the atom. newAtom = newTopology.addAtom( parent.name, parent.element, newResidue ) newAtoms[parent] = newAtom newPositions.append(deepcopy(self.positions[parent.index])) if parent in parents: # Match expected hydrogens with existing ones and find which ones need to be added. existing = [ atom for atom in bonded[parent] if atom.element == elem.hydrogen ] expected = [h for h in hydrogens if h.parent == parent.name] if len(existing) < len(expected): # Try to match up existing hydrogens to expected ones. matches = [] for e in existing: match = [h for h in expected if h.name == e.name] if len(match) > 0: matches.append(match[0]) expected.remove(match[0]) else: matches.append(None) # If any hydrogens couldn't be matched by name, just match them arbitrarily. for i in range(len(matches)): if matches[i] is None: matches[i] = expected[-1] expected.remove(expected[-1]) # Add the missing hydrogens. for h in expected: newH = newTopology.addAtom( h.name, elem.hydrogen, newResidue ) newIndices.append(newH.index) delta = Vec3(0, 0, 0) * nanometer if len(bonded[parent]) > 0: for other in bonded[parent]: delta += ( self.positions[parent.index] - self.positions[other.index] ) else: delta = ( Vec3( random.random(), random.random(), random.random(), ) * nanometer ) delta *= 0.1 * nanometer / norm(delta) delta += ( 0.05 * Vec3( random.random(), random.random(), random.random(), ) * nanometer ) delta *= 0.1 * nanometer / norm(delta) newPositions.append( self.positions[parent.index] + delta ) newTopology.addBond(newAtom, newH) else: # Just copy over the residue. for atom in residue.atoms(): newAtom = newTopology.addAtom( atom.name, atom.element, newResidue ) newAtoms[atom] = newAtom newPositions.append(deepcopy(self.positions[atom.index])) for bond in self.topology.bonds(): if bond[0] in newAtoms and bond[1] in newAtoms: newTopology.addBond(newAtoms[bond[0]], newAtoms[bond[1]]) # The hydrogens were added at random positions. Now perform an energy minimization to fix them up. if forcefield is not None: # Use the ForceField the user specified. system = forcefield.createSystem(newTopology, rigidWater=False) atoms = list(newTopology.atoms()) for i in range(system.getNumParticles()): if atoms[i].element != elem.hydrogen: # This is a heavy atom, so make it immobile. system.setParticleMass(i, 0) else: # Create a System that restrains the distance of each hydrogen from its parent atom # and causes hydrogens to spread out evenly. system = System() nonbonded = CustomNonbondedForce("100/((r/0.1)^4+1)") bonds = HarmonicBondForce() angles = HarmonicAngleForce() system.addForce(nonbonded) system.addForce(bonds) system.addForce(angles) bondedTo = [] for atom in newTopology.atoms(): nonbonded.addParticle([]) if atom.element != elem.hydrogen: system.addParticle(0.0) else: system.addParticle(1.0) bondedTo.append([]) for atom1, atom2 in newTopology.bonds(): if atom1.element == elem.hydrogen or atom2.element == elem.hydrogen: bonds.addBond(atom1.index, atom2.index, 0.1, 100_000.0) bondedTo[atom1.index].append(atom2) bondedTo[atom2.index].append(atom1) for residue in newTopology.residues(): if residue.name == "HOH": # Add an angle term to make the water geometry correct. atoms = list(residue.atoms()) oindex = [ i for i in range(len(atoms)) if atoms[i].element == elem.oxygen ] if len(atoms) == 3 and len(oindex) == 1: hindex = list(set([0, 1, 2]) - set(oindex)) angles.addAngle( atoms[hindex[0]].index, atoms[oindex[0]].index, atoms[hindex[1]].index, 1.824, 836.8, ) else: # Add angle terms for any hydroxyls. for atom in residue.atoms(): index = atom.index if ( atom.element == elem.oxygen and len(bondedTo[index]) == 2 and elem.hydrogen in (a.element for a in bondedTo[index]) ): angles.addAngle( bondedTo[index][0].index, index, bondedTo[index][1].index, 1.894, 460.24, ) if platform is None: context = Context(system, VerletIntegrator(0.0)) else: context = Context(system, VerletIntegrator(0.0), platform) context.setPositions(newPositions) LocalEnergyMinimizer.minimize(context, 1.0, 50) self.topology = newTopology self.positions = context.getState(getPositions=True).getPositions() del context return actualVariants
def writeModel(topology, positions, file=sys.stdout, modelIndex=None, keepIds=False, extraParticleIdentifier=' '): """Write out a model to a PDB file. Parameters ---------- topology : Topology The Topology defining the model to write positions : list The list of atomic positions to write file : file=stdout A file to write the model to modelIndex : int=None If not None, the model will be surrounded by MODEL/ENDMDL records with this index keepIds : bool=False If True, keep the residue and chain IDs specified in the Topology rather than generating new ones. Warning: It is up to the caller to make sure these are valid IDs that satisfy the requirements of the PDB format. Otherwise, the output file will be invalid. extraParticleIdentifier : string=' ' String to write in the element column of the ATOM records for atoms whose element is None (extra particles) """ if len(list(topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(angstroms) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') nonHeterogens = PDBFile._standardResidues[:] nonHeterogens.remove('HOH') atomIndex = 1 posIndex = 0 if modelIndex is not None: print("MODEL %4d" % modelIndex, file=file) for (chainIndex, chain) in enumerate(topology.chains()): if keepIds: chainName = chain.id else: chainName = chr(ord('A')+chainIndex%26) residues = list(chain.residues()) for (resIndex, res) in enumerate(residues): if len(res.name) > 3: resName = res.name[:3] else: resName = res.name if keepIds: resId = res.id else: resId = "%4d" % ((resIndex+1)%10000) if res.name in nonHeterogens: recordName = "ATOM " else: recordName = "HETATM" for atom in res.atoms(): if atom.element is not None: symbol = atom.element.symbol else: symbol = extraParticleIdentifier if len(atom.name) < 4 and atom.name[:1].isalpha() and len(symbol) < 2: atomName = ' '+atom.name elif len(atom.name) > 4: atomName = atom.name[:4] else: atomName = atom.name coords = positions[posIndex] line = "%s%5d %-4s %3s %s%4s %s%s%s 1.00 0.00 %2s " % ( recordName, atomIndex%100000, atomName, resName, chainName, resId, _format_83(coords[0]), _format_83(coords[1]), _format_83(coords[2]), symbol) assert len(line) == 80, 'Fixed width overflow detected' print(line, file=file) posIndex += 1 atomIndex += 1 if resIndex == len(residues)-1: print("TER %5d %3s %s%4s" % (atomIndex, resName, chainName, resId), file=file) atomIndex += 1 if modelIndex is not None: print("ENDMDL", file=file)
def writeModel(self, positions, unitCellDimensions=None, periodicBoxVectors=None): """Write out a model to the DCD file. The periodic box can be specified either by the unit cell dimensions (for a rectangular box), or the full set of box vectors (for an arbitrary triclinic box). If neither is specified, the box vectors specified in the Topology will be used. Regardless of the value specified, no dimensions will be written if the Topology does not represent a periodic system. Parameters ---------- positions : list The list of atomic positions to write unitCellDimensions : Vec3=None The dimensions of the crystallographic unit cell. periodicBoxVectors : tuple of Vec3=None The vectors defining the periodic box. """ if len(list(self._topology.atoms())) != len(positions): raise ValueError('The number of positions must match the number of atoms') if is_quantity(positions): positions = positions.value_in_unit(nanometers) if any(math.isnan(norm(pos)) for pos in positions): raise ValueError('Particle position is NaN') if any(math.isinf(norm(pos)) for pos in positions): raise ValueError('Particle position is infinite') file = self._file self._modelCount += 1 if self._interval > 1 and self._firstStep+self._modelCount*self._interval > 1<<31: # This will exceed the range of a 32 bit integer. To avoid crashing or producing a corrupt file, # update the header to say the trajectory consisted of a smaller number of larger steps (so the # total trajectory length remains correct). self._firstStep //= self._interval self._dt *= self._interval self._interval = 1 file.seek(0, os.SEEK_SET) file.write(struct.pack('<i4c9if', 84, b'C', b'O', b'R', b'D', 0, self._firstStep, self._interval, 0, 0, 0, 0, 0, 0, self._dt)) # Update the header. file.seek(8, os.SEEK_SET) file.write(struct.pack('<i', self._modelCount)) file.seek(20, os.SEEK_SET) file.write(struct.pack('<i', self._firstStep+self._modelCount*self._interval)) # Write the data. file.seek(0, os.SEEK_END) boxVectors = self._topology.getPeriodicBoxVectors() if boxVectors is not None: if periodicBoxVectors is not None: boxVectors = periodicBoxVectors elif unitCellDimensions is not None: if is_quantity(unitCellDimensions): unitCellDimensions = unitCellDimensions.value_in_unit(nanometers) boxVectors = (Vec3(unitCellDimensions[0], 0, 0), Vec3(0, unitCellDimensions[1], 0), Vec3(0, 0, unitCellDimensions[2]))*nanometers (a_length, b_length, c_length, alpha, beta, gamma) = computeLengthsAndAngles(boxVectors) a_length = a_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. b_length = b_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. c_length = c_length * 10. # computeLengthsAndAngles returns unitless nanometers, but need angstroms here. angle1 = math.sin(math.pi/2-gamma) angle2 = math.sin(math.pi/2-beta) angle3 = math.sin(math.pi/2-alpha) file.write(struct.pack('<i6di', 48, a_length, angle1, b_length, angle2, angle3, c_length, 48)) length = struct.pack('<i', 4*len(positions)) for i in range(3): file.write(length) data = array.array('f', (10*x[i] for x in positions)) data.tofile(file) file.write(length) try: file.flush() except AttributeError: pass