def _write_residue(dest, res): """ Writes a residue to an open file handle Parameters ---------- dest : file-like File object to write the residue information to res : :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The residue template (or template container) to write to the file """ if isinstance(res, ResidueTemplate): # Put it into a template container with the same name tmp = ResidueTemplateContainer(res.name) tmp.append(res) res = tmp dest.write('!entry.%s.unit.atoms table str name str type int typex ' 'int resx int flags int seq int elmnt dbl chg\n' % res.name) for i, r in enumerate(res): for atom in r: dest.write(' "%s" "%s" 0 %d 131072 %d %d %.6f\n' % (atom.name, atom.type, i+1, atom.idx+1, atom.atomic_number, atom.charge)) dest.write('!entry.%s.unit.atomspertinfo table str pname str ptype ' 'int ptypex int pelmnt dbl pchg\n' % res.name) for r in res: for atom in r: dest.write(' "%s" "%s" 0 -1 0.0\n' % (atom.name, atom.type)) dest.write('!entry.%s.unit.boundbox array dbl\n' % res.name) if res.box is None: dest.write((' -1.000000\n' + ' 0.0\n' * 4)) else: dest.write(' 1.000000\n') if res.box[3] == res.box[4] == res.box[5]: dest.write(' %f\n' % res.box[3]) else: raise ValueError('Cannot write boxes with different angles') dest.write(' %f\n' % res.box[0]) dest.write(' %f\n' % res.box[1]) dest.write(' %f\n' % res.box[2]) dest.write('!entry.%s.unit.childsequence single int\n %d\n' % (res.name, len(res)+1)) dest.write('!entry.%s.unit.connect array int\n' % res.name) if len(res) > 1: dest.write(' 0\n 0\n') else: if res[0].head is not None: dest.write(' %d\n' % (res[0].head.idx + 1)) else: dest.write(' 0\n') if res[0].tail is not None: dest.write(' %d\n' % (res[0].tail.idx + 1)) else: dest.write(' 0\n') dest.write('!entry.%s.unit.connectivity table int atom1x int atom2x ' 'int flags\n' % res.name) for r in res: for bond in r.bonds: dest.write(' %d %d 1\n' % (bond.atom1.idx+1, bond.atom2.idx+1)) dest.write('!entry.%s.unit.hierarchy table str abovetype int ' 'abovex str belowtype int belowx\n' % res.name) c = 1 for i, r in enumerate(res): dest.write(' "U" 0 "R" %d\n' % (i+1)) for atom in r: dest.write(' "R" %d "A" %d\n' % (i+1, c)) c += 1 dest.write('!entry.%s.unit.name single str\n' % res.name) dest.write(' "%s"\n' % res.name) dest.write('!entry.%s.unit.positions table dbl x dbl y dbl z\n' % res.name) for r in res: for atom in r: dest.write(' %.6g %.6g %.6g\n' % (atom.xx, atom.xy, atom.xz)) dest.write('!entry.%s.unit.residueconnect table int c1x int c2x ' 'int c3x int c4x int c5x int c6x\n' % res.name) for r in res: # Make the CONECT1 and 0 default to 1 so that the TREE gets set # correctly by tleap. Not used for anything else... conn = [1, 1, 0, 0, 0, 0] if r.head is not None: conn[0] = r.head.idx + 1 if r.tail is not None: conn[1] = r.tail.idx + 1 for i, at in enumerate(r.connections): conn[i+2] = at.idx + 1 dest.write(' %d %d %d %d %d %d\n' % tuple(conn)) dest.write('!entry.%s.unit.residues table str name int seq int ' 'childseq int startatomx str restype int imagingx\n' % res.name) c = 1 for i, r in enumerate(res): if r.type is PROTEIN: typ = 'p' elif r.type is NUCLEIC: typ = 'n' elif r.type is SOLVENT: typ='w' elif r.type is UNKNOWN: typ='?' else: warnings.warn('Unrecognized residue type %r' % r.type, AmberWarning) typ = '?' dest.write(' "%s" %d %d %d "%s" %d\n' % (r.name, i+1, 1+len(r), c, typ, _imaging_atom(r))) c += len(r) dest.write('!entry.%s.unit.residuesPdbSequenceNumber array int\n' % res.name) for i, r in enumerate(res): if len(res) == 1: dest.write(' 0\n') else: dest.write(' %d\n' % (i+1)) dest.write('!entry.%s.unit.solventcap array dbl\n' % res.name) dest.write(' -1.000000\n' + ' 0.0\n' * 4) dest.write('!entry.%s.unit.velocities table dbl x dbl y dbl z\n' % res.name) for r in res: for atom in r: try: s = ' %g %g %g\n' % (atom.vx, atom.vy, atom.vz) except AttributeError: dest.write(' 0.0 0.0 0.0\n') else: dest.write(s)
def _parse_residue(fileobj, name): """ Parses the residue information out of the OFF file assuming the file is pointed at the first line of an atoms table section of the OFF file Parameters ---------- fileobj : file-like Assumed to be open for read, this file is parsed until the *next* atom table is read name : str The name of the residue being processed right now """ container = ResidueTemplateContainer(name) nres = 1 templ = ResidueTemplate(name) line = fileobj.readline() while line[0] != '!': nam, typ, typx, resx, flags, seq, elmnt, chg = line.split() nam = _strip_enveloping_quotes(nam) typ = _strip_enveloping_quotes(typ) typx = int(typx) resx = int(resx) flags = int(flags) seq = int(seq) elmnt = int(elmnt) chg = float(chg) atom = Atom(atomic_number=elmnt, type=typ, name=nam, charge=chg) if resx == nres + 1: container.append(templ) nres += 1 templ = ResidueTemplate(name) templ.add_atom(atom) line = fileobj.readline() container.append(templ) if nres > 1: start_atoms = [] runsum = 0 for res in container: start_atoms.append(runsum) runsum += len(res) # Make sure we get the next section rematch = AmberOFFLibrary._sec2re.match(line) if not rematch: raise RuntimeError('Expected pertinfo table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': if not line: raise RuntimeError('Unexpected EOF in Amber OFF library') # Not used, just skip # TODO sanity check line = fileobj.readline() rematch = AmberOFFLibrary._sec3re.match(line) if not rematch: raise RuntimeError('Expected boundbox table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) # Only 5 lines try: hasbox = float(fileobj.readline().strip()) angle = float(fileobj.readline().strip()) a = float(fileobj.readline().strip()) b = float(fileobj.readline().strip()) c = float(fileobj.readline().strip()) except ValueError: raise RuntimeError('Error processing boundbox table entries') else: if hasbox > 0: angle *= RAD_TO_DEG container.box = [a, b, c, angle, angle, angle] # Get the child sequence entry line = fileobj.readline() rematch = AmberOFFLibrary._sec4re.match(line) if not rematch: raise RuntimeError('Expected childsequence table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) n = int(fileobj.readline().strip()) if nres + 1 != n: warnings.warn('Unexpected childsequence (%d); expected %d for ' 'residue %s' % (n, nres+1, name), AmberWarning) elif not isinstance(templ, ResidueTemplate) and n != len(templ) + 1: raise RuntimeError('child sequence must be 1 greater than the ' 'number of residues in the unit') # Get the CONNECT array to set head and tail line = fileobj.readline() rematch = AmberOFFLibrary._sec5re.match(line) if not rematch: raise RuntimeError('Expected connect array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) try: head = int(fileobj.readline().strip()) tail = int(fileobj.readline().strip()) except ValueError: raise RuntimeError('Error processing connect table entries') if head > 0 and nres == 1: templ.head = templ[head-1] elif head > 0 and nres > 1: if head < sum([len(r) for r in container]): raise RuntimeError('HEAD on multi-residue unit not supported') if tail > 0 and nres == 1: templ.tail = templ[tail-1] elif tail > 0 and nres > 1: if tail < sum([len(r) for r in container]): warnings.warn('TAIL on multi-residue unit not supported (%s). ' 'Ignored...' % name, AmberWarning) # Get the connectivity array to set bonds line = fileobj.readline() rematch = AmberOFFLibrary._sec6re.match(line) if not rematch: raise RuntimeError('Expected connectivity table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': i, j, flag = line.split() line = fileobj.readline() if nres > 1: # Find which residue we belong in i = int(i) - 1 j = int(j) - 1 for ii, idx in enumerate(start_atoms): if idx > i: ii -= 1 break start_idx = start_atoms[ii] container[ii].add_bond(i-start_idx, j-start_idx) else: templ.add_bond(int(i)-1, int(j)-1) # Get the hierarchy table rematch = AmberOFFLibrary._sec7re.match(line) if not rematch: raise RuntimeError('Expected hierarchy table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': # Skip this section... not used # TODO turn this into a sanity check line = fileobj.readline() # Get the unit name rematch = AmberOFFLibrary._sec8re.match(line) if not rematch: raise RuntimeError('Expected unit name string not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) fileobj.readline() # Skip this... not used line = fileobj.readline() # Get the atomic positions rematch = AmberOFFLibrary._sec9re.match(line) if not rematch: raise RuntimeError('Expected unit positions table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for res in container: for atom in res: x, y, z = fileobj.readline().split() atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) line = fileobj.readline() # Get the residueconnect table rematch = AmberOFFLibrary._sec10re.match(line) if not rematch: raise RuntimeError('Expected unit residueconnect table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): c1,c2,c3,c4,c5,c6 = [int(x) for x in fileobj.readline().split()] if templ.head is not None and templ.head is not templ[c1-1]: warnings.warn('HEAD atom is not connect0') if templ.tail is not None and templ.tail is not templ[c2-1]: warnings.warn('TAIL atom is not connect1') for i in (c3, c4, c5, c6): if i == 0: continue templ.connections.append(templ[i-1]) # Get the residues table line = fileobj.readline() rematch = AmberOFFLibrary._sec11re.match(line) if not rematch: raise RuntimeError('Expected unit residues table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): resname, id, next, start, typ, img = fileobj.readline().split() resname = _strip_enveloping_quotes(resname) id = int(id) start = int(start) next = int(next) typ = _strip_enveloping_quotes(typ) img = int(img) if next - start != len(container[i]): warnings.warn('residue table predicted %d, not %d atoms for ' 'residue %s' % (next-start, len(container[i]), name), AmberWarning) if typ == 'p': container[i].type = PROTEIN elif typ == 'n': container[i].type = NUCLEIC elif typ == 'w': container[i].type = SOLVENT elif typ != '?': warnings.warn('Unknown residue type "%s"' % typ, AmberWarning) if nres > 1: container[i].name = resname # Get the residues sequence table line = fileobj.readline() rematch = AmberOFFLibrary._sec12re.match(line) if not rematch: raise RuntimeError('Expected residue sequence number not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): #TODO sanity check fileobj.readline() line = fileobj.readline() # Get the solventcap array rematch = AmberOFFLibrary._sec13re.match(line) if not rematch: raise RuntimeError('Expected unit solventcap array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) # Ignore the solvent cap fileobj.readline() fileobj.readline() fileobj.readline() fileobj.readline() fileobj.readline() # Velocities line = fileobj.readline() rematch = AmberOFFLibrary._sec14re.match(line) if not rematch: raise RuntimeError('Expected unit solventcap array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for res in container: for atom in res: vx, vy, vz = [float(x) for x in fileobj.readline().split()] atom.vx, atom.vy, atom.vz = vx, vy, vz if nres > 1: return container return templ
def _write_residue(dest, res): """ Writes a residue to an open file handle Parameters ---------- dest : file-like File object to write the residue information to res : :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The residue template (or template container) to write to the file """ if isinstance(res, ResidueTemplate): # Put it into a template container with the same name tmp = ResidueTemplateContainer(res.name) tmp.append(res) res = tmp dest.write('!entry.%s.unit.atoms table str name str type int typex ' 'int resx int flags int seq int elmnt dbl chg\n' % res.name) for i, r in enumerate(res): for atom in r: dest.write(' "%s" "%s" 0 %d 131072 %d %d %.6f\n' % (atom.name, atom.type, i+1, atom.idx+1, atom.atomic_number, atom.charge)) dest.write('!entry.%s.unit.atomspertinfo table str pname str ptype ' 'int ptypex int pelmnt dbl pchg\n' % res.name) for r in res: for atom in r: dest.write(' "%s" "%s" 0 -1 0.0\n' % (atom.name, atom.type)) dest.write('!entry.%s.unit.boundbox array dbl\n' % res.name) if res.box is None: dest.write((' -1.000000\n' + ' 0.0\n' * 4)) else: dest.write(' 1.000000\n') if res.box[3] == res.box[4] == res.box[5]: dest.write(' %f\n' % res.box[3]) else: raise RuntimeError('Cannot write boxes with different angles') dest.write(' %f\n' % res.box[0]) dest.write(' %f\n' % res.box[1]) dest.write(' %f\n' % res.box[2]) dest.write('!entry.%s.unit.childsequence single int\n %d\n' % (res.name, len(res)+1)) dest.write('!entry.%s.unit.connect array int\n' % res.name) if len(res) > 1: dest.write(' 0\n 0\n') else: if res[0].head is not None: dest.write(' %d\n' % (res[0].head.idx + 1)) else: dest.write(' 0\n') if res[0].tail is not None: dest.write(' %d\n' % (res[0].tail.idx + 1)) else: dest.write(' 0\n') if any(len(r) > 1 for r in res): dest.write('!entry.%s.unit.connectivity table int atom1x ' 'int atom2x int flags\n' % res.name) base = 1 for r in res: for bond in r.bonds: dest.write(' %d %d 1\n' % (bond.atom1.idx+base, bond.atom2.idx+base)) base += len(r) dest.write('!entry.%s.unit.hierarchy table str abovetype int ' 'abovex str belowtype int belowx\n' % res.name) c = 1 for i, r in enumerate(res): dest.write(' "U" 0 "R" %d\n' % (i+1)) for atom in r: dest.write(' "R" %d "A" %d\n' % (i+1, c)) c += 1 dest.write('!entry.%s.unit.name single str\n' % res.name) dest.write(' "%s"\n' % res.name) dest.write('!entry.%s.unit.positions table dbl x dbl y dbl z\n' % res.name) for r in res: for atom in r: dest.write(' %.6g %.6g %.6g\n' % (atom.xx, atom.xy, atom.xz)) dest.write('!entry.%s.unit.residueconnect table int c1x int c2x ' 'int c3x int c4x int c5x int c6x\n' % res.name) c = 1 for r in res: # Make the CONECT1 and 0 default to first and last atom so that the # TREE gets set correctly by tleap. Not used for anything else... conn = [c, c+len(r)-1, 0, 0, 0, 0] if r.head is not None: conn[0] = r.head.idx + 1 if r.tail is not None: conn[1] = r.tail.idx + 1 for i, at in enumerate(r.connections[:4]): conn[i+2] = at.idx + 1 dest.write(' %d %d %d %d %d %d\n' % tuple(conn)) c += len(r) dest.write('!entry.%s.unit.residues table str name int seq int ' 'childseq int startatomx str restype int imagingx\n' % res.name) c = 1 for i, r in enumerate(res): if r.type is PROTEIN: typ = 'p' elif r.type is NUCLEIC: typ = 'n' elif r.type is SOLVENT: typ='w' elif r.type is UNKNOWN: typ='?' else: warnings.warn('Unrecognized residue type %r' % r.type, AmberWarning) typ = '?' dest.write(' "%s" %d %d %d "%s" %d\n' % (r.name, i+1, 1+len(r), c, typ, _imaging_atom(r)+c)) c += len(r) dest.write('!entry.%s.unit.residuesPdbSequenceNumber array int\n' % res.name) for i, r in enumerate(res): if len(res) == 1: dest.write(' 0\n') else: dest.write(' %d\n' % (i+1)) dest.write('!entry.%s.unit.solventcap array dbl\n' % res.name) dest.write(' -1.000000\n' + ' 0.0\n' * 4) dest.write('!entry.%s.unit.velocities table dbl x dbl y dbl z\n' % res.name) for r in res: for atom in r: try: s = ' %g %g %g\n' % (atom.vx, atom.vy, atom.vz) except AttributeError: dest.write(' 0.0 0.0 0.0\n') else: dest.write(s)
def _parse_residue(fileobj, name): """ Parses the residue information out of the OFF file assuming the file is pointed at the first line of an atoms table section of the OFF file Parameters ---------- fileobj : file-like Assumed to be open for read, this file is parsed until the *next* atom table is read name : str The name of the residue being processed right now """ container = ResidueTemplateContainer(name) nres = 1 templ = ResidueTemplate(name) line = fileobj.readline() while line[0] != '!': nam, typ, typx, resx, flags, seq, elmnt, chg = line.split() nam = _strip_enveloping_quotes(nam) typ = _strip_enveloping_quotes(typ) typx = int(typx) resx = int(resx) flags = int(flags) seq = int(seq) elmnt = int(elmnt) chg = float(chg) atom = Atom(atomic_number=elmnt, type=typ, name=nam, charge=chg) if resx == nres + 1: container.append(templ) nres += 1 templ = ResidueTemplate(name) templ.add_atom(atom) line = fileobj.readline() # Skip blank lines while line and not line.strip(): line = fileobj.readline() container.append(templ) if nres > 1: start_atoms = [] runsum = 0 for res in container: start_atoms.append(runsum) runsum += len(res) # Make sure we get the next section rematch = AmberOFFLibrary._sec2re.match(line) if not rematch: raise RuntimeError('Expected pertinfo table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': if not line: raise RuntimeError('Unexpected EOF in Amber OFF library') # Not used, just skip # TODO sanity check line = fileobj.readline() rematch = AmberOFFLibrary._sec3re.match(line) if not rematch: raise RuntimeError('Expected boundbox table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) # Only 5 lines try: hasbox = float(fileobj.readline().strip()) angle = float(fileobj.readline().strip()) a = float(fileobj.readline().strip()) b = float(fileobj.readline().strip()) c = float(fileobj.readline().strip()) except ValueError: raise RuntimeError('Error processing boundbox table entries') else: if hasbox > 0: if angle < 3.15: # No box is this acute -- must be in radians angle *= RAD_TO_DEG container.box = [a, b, c, angle, angle, angle] # Get the child sequence entry line = fileobj.readline() rematch = AmberOFFLibrary._sec4re.match(line) if not rematch: raise RuntimeError('Expected childsequence table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) n = int(fileobj.readline().strip()) if nres + 1 != n: warnings.warn('Unexpected childsequence (%d); expected %d for ' 'residue %s' % (n, nres+1, name), AmberWarning) elif not isinstance(templ, ResidueTemplate) and n != len(templ) + 1: raise RuntimeError('child sequence must be 1 greater than the ' 'number of residues in the unit') # Get the CONNECT array to set head and tail line = fileobj.readline() rematch = AmberOFFLibrary._sec5re.match(line) if not rematch: raise RuntimeError('Expected connect array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) try: head = int(fileobj.readline().strip()) tail = int(fileobj.readline().strip()) except ValueError: raise RuntimeError('Error processing connect table entries') if head > 0 and nres == 1: templ.head = templ[head-1] elif head > 0 and nres > 1: if head < sum((len(r) for r in container)): raise RuntimeError('HEAD on multi-residue unit not supported') if tail > 0 and nres == 1: templ.tail = templ[tail-1] elif tail > 0 and nres > 1: if tail < sum((len(r) for r in container)): warnings.warn('TAIL on multi-residue unit not supported (%s). ' 'Ignored...' % name, AmberWarning) # Get the connectivity array to set bonds line = fileobj.readline() if len(templ.atoms) > 1: rematch = AmberOFFLibrary._sec6re.match(line) if not rematch: raise RuntimeError('Expected connectivity table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': i, j, flag = line.split() line = fileobj.readline() if nres > 1: # Find which residue we belong in i = int(i) - 1 j = int(j) - 1 for ii, idx in enumerate(start_atoms): if idx > i: ii -= 1 break start_idx = start_atoms[ii] container[ii].add_bond(i-start_idx, j-start_idx) else: templ.add_bond(int(i)-1, int(j)-1) # Get the hierarchy table rematch = AmberOFFLibrary._sec7re.match(line) if not rematch: raise RuntimeError('Expected hierarchy table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) line = fileobj.readline() while line[0] != '!': # Skip this section... not used # TODO turn this into a sanity check line = fileobj.readline() # Get the unit name rematch = AmberOFFLibrary._sec8re.match(line) if not rematch: raise RuntimeError('Expected unit name string not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) fileobj.readline() # Skip this... not used line = fileobj.readline() # Get the atomic positions rematch = AmberOFFLibrary._sec9re.match(line) if not rematch: raise RuntimeError('Expected unit positions table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for res in container: for atom in res: x, y, z = fileobj.readline().split() atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) line = fileobj.readline() # Get the residueconnect table rematch = AmberOFFLibrary._sec10re.match(line) if not rematch: raise RuntimeError('Expected unit residueconnect table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): c1,c2,c3,c4,c5,c6 = (int(x) for x in fileobj.readline().split()) if (c1 > 0 and templ.head is not None and templ.head is not templ[c1-1]): raise RuntimeError('HEAD atom is not connect0') if (c2 > 0 and templ.tail is not None and templ.tail is not templ[c2-1]): raise RuntimeError('TAIL atom is not connect1') for i in (c3, c4, c5, c6): if i == 0: continue templ.connections.append(templ[i-1]) # Get the residues table line = fileobj.readline() rematch = AmberOFFLibrary._sec11re.match(line) if not rematch: raise RuntimeError('Expected unit residues table not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): resname, id, next, start, typ, img = fileobj.readline().split() resname = _strip_enveloping_quotes(resname) id = int(id) start = int(start) next = int(next) typ = _strip_enveloping_quotes(typ) img = int(img) if next - start != len(container[i]): warnings.warn('residue table predicted %d, not %d atoms for ' 'residue %s' % (next-start, len(container[i]), name), AmberWarning) if typ == 'p': container[i].type = PROTEIN elif typ == 'n': container[i].type = NUCLEIC elif typ == 'w': container[i].type = SOLVENT elif typ != '?': warnings.warn('Unknown residue type "%s"' % typ, AmberWarning) if nres > 1: container[i].name = resname # Get the residues sequence table line = fileobj.readline() rematch = AmberOFFLibrary._sec12re.match(line) if not rematch: raise RuntimeError('Expected residue sequence number not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for i in range(nres): #TODO sanity check fileobj.readline() line = fileobj.readline() # Get the solventcap array rematch = AmberOFFLibrary._sec13re.match(line) if not rematch: raise RuntimeError('Expected unit solventcap array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) # Ignore the solvent cap fileobj.readline() fileobj.readline() fileobj.readline() fileobj.readline() fileobj.readline() # Velocities line = fileobj.readline() rematch = AmberOFFLibrary._sec14re.match(line) if not rematch: raise RuntimeError('Expected unit solventcap array not found') elif rematch.groups()[0] != name: raise RuntimeError('Found residue %s while processing residue %s' % (rematch.groups()[0], name)) for res in container: for atom in res: vx, vy, vz = (float(x) for x in fileobj.readline().split()) atom.vx, atom.vy, atom.vz = vx, vy, vz if nres > 1: return container return templ
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True try: restemp.add_atom(copy.copy(atom)) except ValueError: # Allow mol2 files being parsed as a Structure to have # duplicate atom names if not structure: raise continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' if atom2.residue.idx == len(rescont): res2 = restemp elif atom2.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: if not structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(w) for w in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: res.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()
def write(struct, dest, mol3=False, split=False): """ Writes a mol2 file from a structure or residue template Parameters ---------- struct : :class:`Structure` or :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The input structure to write the mol2 file from dest : str or file-like obj Name of the file to write or open file handle to write to mol3 : bool, optional If True and ``struct`` is a ResidueTemplate or container, write HEAD/TAIL sections. Default is False split : bool, optional If True and ``struct`` is a ResidueTemplateContainer or a Structure with multiple residues, each residue is printed in a separate @<MOLECULE> section that appear sequentially in the output file """ own_handle = False if not hasattr(dest, 'write'): own_handle = True dest = genopen(dest, 'w') if split: # Write sequentially if it is a multi-residue container or Structure if isinstance(struct, ResidueTemplateContainer): try: for res in struct: Mol2File.write(res, dest, mol3) finally: if own_handle: dest.close() return elif isinstance(struct, Structure) and len(struct.residues) > 1: try: for res in ResidueTemplateContainer.from_structure(struct): Mol2File.write(res, dest, mol3) finally: if own_handle: dest.close() return try: if isinstance(struct, ResidueTemplateContainer): natom = sum([len(c) for c in struct]) # To find the number of bonds, we need to total number of bonds # + the number of bonds that would be formed by "stitching" # together residues via their head and tail bonds = [] charges = [] bases = [1 for res in struct] for i, res in enumerate(struct): if i < len(struct) - 1: bases[i+1] = bases[i] + len(res) for i, res in enumerate(struct): for bond in res.bonds: bonds.append((bond.atom1.idx+bases[i], bond.atom2.idx+bases[i])) if i < len(struct)-1 and (res.tail is not None and struct[i+1].head is not None): bonds.append((res.tail.idx+bases[i], struct[i+1].head.idx+bases[i+1])) charges.extend([a.charge for a in res]) residues = struct name = struct.name or struct[0].name else: natom = len(struct.atoms) bonds = [(b.atom1.idx+1, b.atom2.idx+1) for b in struct.bonds] if isinstance(struct, ResidueTemplate): residues = [struct] name = struct.name else: residues = struct.residues name = struct.residues[0].name charges = [a.charge for a in struct.atoms] dest.write('@<TRIPOS>MOLECULE\n') dest.write('%s\n' % name) dest.write('%d %d %d 0 1\n' % (natom, len(bonds), len(residues))) if len(residues) == 1: dest.write('SMALL\n') else: for residue in residues: if AminoAcidResidue.has(residue.name): dest.write('PROTEIN\n') break if (RNAResidue.has(residue.name) or DNAResidue.has(residue.name)): dest.write('NUCLEIC\n') break else: dest.write('BIOPOLYMER\n') if not any(charges): dest.write('NO_CHARGES\n') printchg = False else: dest.write('USER_CHARGES\n') printchg = True # See if we want to print box info if hasattr(struct, 'box') and struct.box is not None: box = struct.box dest.write('@<TRIPOS>CRYSIN\n') dest.write('%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f 1 1\n' % (box[0], box[1], box[2], box[3], box[4], box[5])) # Now do ATOM section dest.write('@<TRIPOS>ATOM\n') j = 1 for i, res in enumerate(residues): for atom in res: try: x = atom.xx except AttributeError: x = 0 try: y = atom.xy except AttributeError: y = 0 try: z = atom.xz except AttributeError: z = 0 dest.write('%8d %-8s %10.4f %10.4f %10.4f %-8s %6d %-8s' % ( j, atom.name, x, y, z, atom.type.strip() or atom.name, i+1, res.name)) if printchg: dest.write(' %10.6f\n' % atom.charge) else: dest.write('\n') j += 1 dest.write('@<TRIPOS>BOND\n') for i, bond in enumerate(bonds): dest.write('%8d %8d %8d 1\n' % (i+1, bond[0], bond[1])) dest.write('@<TRIPOS>SUBSTRUCTURE\n') first_atom = 0 for i, res in enumerate(residues): if not hasattr(res, 'chain') or not res.chain: chain = '****' else: chain = res.chain intresbonds = 0 if isinstance(res, ResidueTemplate): if i != len(residues)-1 and (res.tail is not None and residues[i+1].head is not None): intresbonds += 1 if i != 0 and (res.head is not None and residues[i-1].tail is not None): intresbonds += 1 else: for atom in res: for a2 in atom.bond_partners: if a2.residue is not res: intresbonds += 1 dest.write('%8d %-8s %8d RESIDUE %4d %-4s ROOT %6d\n' % (i+1, res.name, first_atom+1, 0, chain[:4], intresbonds)) first_atom += len(res) if mol3: dest.write('@<TRIPOS>HEADTAIL\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): if res.head is not None: dest.write('%s %d\n' % (res.head.name, i+1)) else: dest.write('0 0\n') if res.tail is not None: dest.write('%s %d\n' % (res.tail.name, i+1)) else: dest.write('0 0\n') else: head = tail = None for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: head = atom if a2.residue.idx == res.idx + 1: tail = atom if head is not None: dest.write('%s %d\n' % (head.name, i+1)) else: dest.write('0 0\n') if tail is not None: dest.write('%s %d\n' % (tail.name, i+1)) else: dest.write('0 0\n') dest.write('@<TRIPOS>RESIDUECONNECT\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): con = [res.head, res.tail, None, None, None, None] for i, a in enumerate(res.connections): con[i+2] = a else: con = [None, None, None, None, None, None] ncon = 2 for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: con[0] = atom elif a2.residue.idx == res.idx + 1: con[1] = atom elif a2.residue.idx != res.idx: con[ncon] = atom ncon += 1 dest.write('%d' % (i+1)) for a in con: if a is not None: dest.write(' %s' % a.name) else: dest.write(' 0') dest.write('\n') finally: if own_handle: dest.close()