def id_format(filename): """ Identifies the file type as either Amber-format file (like prmtop) or an old-style topology file. Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber-style format, False otherwise """ f = io.genopen(filename, 'r') lines = [f.readline().decode() for i in range(5)] f.close() if lines[0].startswith('%VERSION'): return True # Try old-style format try: return AmberFormat().rdparm_old(lines, check=True) except ValueError: return False
def id_format(filename): """ Identifies the file type as an Amber mdcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber mdcrd file. False otherwise """ f = io.genopen(filename, 'r') lines = [f.readline().decode() for i in xrange(5)] f.close() # Next 4 lines, make sure we have %8.3f format try: for i in xrange(4): i += 1 for j in xrange(10): j8 = j * 8 if lines[i][j8 + 4] != '.': return False float(lines[i][j8:j8 + 8]) if lines[i][j8 + 7] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a mdcrd return True
def id_format(filename): """ Identifies the file type as an Amber mdcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber mdcrd file. False otherwise """ f = io.genopen(filename, 'r') lines = [f.readline().decode() for i in xrange(5)] f.close() # Next 4 lines, make sure we have %8.3f format try: for i in xrange(4): i += 1 for j in xrange(10): j8 = j * 8 if lines[i][j8+4] != '.': return False float(lines[i][j8:j8+8]) if lines[i][j8+7] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a mdcrd return True
def id_format(filename): """ Identifies the file type as a CHARMM coordinate file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM coordinate file """ f = io.genopen(filename) line = f.readline().decode() try: while len(line.strip()) == 0: # Skip whitespace, as a precaution line = f.readline().decode() intitle = True while intitle: line = f.readline().decode() if len(line.strip()) == 0: intitle = False elif line.strip()[0] != '*': intitle = False else: intitle = True while len(line.strip()) == 0: # Skip whitespace line = f.readline().decode() try: natom = int(line.strip().split()[0]) for row in xrange(min(natom, 3)): line = f.readline().decode().strip().split() int(line[0]) int(line[1]) float(line[4]) float(line[5]) float(line[6]) int(line[8]) float(line[9]) except (IndexError, ValueError): return False return True finally: f.close()
def id_format(filename): """ Identifies the file type as a CHARMM PSF file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM or Xplor-style PSF file """ f = genopen(filename, 'r') line = f.readline().decode() f.close() return line.strip().startswith('PSF')
def id_format(filename): """ Identifies the file type as a CHARMM restart file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM restart file """ f = io.genopen(filename) line = f.readline().decode() f.close() return line.startswith('REST')
def id_format(filename): """ Sees if an open file is an OFF library file. Parameters ---------- filename : str The name of the file to see if it is an OFF file format Returns ------- is_fmt : bool True if it is recognized as OFF, False otherwise """ f = io.genopen(filename, 'r') try: if AmberOFFLibrary._headerre.match(f.readline().decode()): return True return False finally: f.close()
def id_format(filename): """ Identify the file as a Mol2 (or Mol3) file format or not Parameters ---------- filename : str Name of the file to test whether or not it is a mol2 file Returns ------- is_fmt : bool True if it is a mol2 (or mol3) file, False otherwise """ f = TextToBinaryFile(genopen(filename, 'r')) try: for line in f: if line.startswith('#'): continue if not line.strip(): continue return line.startswith('@<TRIPOS>') return False finally: f.close()
def id_format(filename): """ Identifies the file type as an Amber restart/inpcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber restart/inpcrd file. False otherwise """ f = io.genopen(filename, 'r') lines = [f.readline().decode() for i in xrange(5)] f.close() # Look for natom try: int(lines[1].split()[0]) except (ValueError, IndexError): return False # Next 3 lines, make sure we have %12.7f format try: for i in xrange(3): i += 2 for j in xrange(6): j12 = j * 12 if lines[i][j12+4] != '.': return False float(lines[i][j12:j12+12]) if lines[i][j12+11] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a restart... return True
def id_format(filename): """ Identifies the file type as an Amber restart/inpcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber restart/inpcrd file. False otherwise """ f = io.genopen(filename, 'r') lines = [f.readline().decode() for i in xrange(5)] f.close() # Look for natom try: int(lines[1].split()[0]) except (ValueError, IndexError): return False # Next 3 lines, make sure we have %12.7f format try: for i in xrange(3): i += 2 for j in xrange(6): j12 = j * 12 if lines[i][j12 + 4] != '.': return False float(lines[i][j12:j12 + 12]) if lines[i][j12 + 11] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a restart... return True
def write(struct, dest, vmd=False): """ Writes a PSF file from the stored molecule Parameters ---------- struct : :class:`Structure` The Structure instance from which the PSF should be written dest : str or file-like The place to write the output PSF file. If it has a "write" attribute, it will be used to print the PSF file. Otherwise, it will be treated like a string and a file will be opened, printed, then closed vmd : bool If True, it will write out a PSF in the format that VMD prints it in (i.e., no NUMLP/NUMLPH or MOLNT sections) Examples -------- >>> cs = CharmmPsfFile('testfiles/test.psf') >>> cs.write_psf('testfiles/test2.psf') """ # See if this is an extended format try: ext = 'EXT' in struct.flags except AttributeError: ext = True own_handle = False # Index the atoms and residues if not hasattr(dest, 'write'): own_handle = True dest = TextToBinaryFile(genopen(dest, 'w')) # Assign the formats we need to write with if ext: atmfmt1 = ('%10d %-8s %-8i %-8s %-8s %4d %10.6f %13.4f' + 11 * ' ') atmfmt2 = ('%10d %-8s %-8i %-8s %-8s %-4s %10.6f %13.4f' + 11 * ' ') intfmt = '%10d' # For pointers else: atmfmt1 = ('%8d %-4s %-4i %-4s %-4s %4d %10.6f %13.4f' + 11 * ' ') atmfmt2 = ('%8d %-4s %-4i %-4s %-4s %-4s %10.6f %13.4f' + 11 * ' ') intfmt = '%8d' # For pointers # Now print the header then the title dest.write('PSF ') if hasattr(struct, 'flags'): dest.write(' '.join(struct.flags)) else: dest.write('EXT') # EXT is always active dest.write('\n\n') dest.write(intfmt % len(struct.title) + ' !NTITLE\n') dest.write('\n'.join(struct.title) + '\n\n') # Now time for the atoms dest.write(intfmt % len(struct.atoms) + ' !NATOM\n') # atmfmt1 is for CHARMM format (i.e., atom types are integers) # atmfmt is for XPLOR format (i.e., atom types are strings) for i, atom in enumerate(struct.atoms): typ = atom.type if isinstance(atom.type, str): fmt = atmfmt2 if not atom.type: typ = atom.name else: fmt = atmfmt1 if hasattr(atom, 'segid'): segid = atom.segid else: segid = 'SYS' atmstr = fmt % (i + 1, segid, atom.residue.number, atom.residue.name, atom.name, typ, atom.charge, atom.mass) if hasattr(atom, 'props'): dest.write(atmstr + ' '.join(atom.props) + '\n') else: dest.write('\n') dest.write('\n') # Bonds dest.write(intfmt % len(struct.bonds) + ' !NBOND: bonds\n') for i, bond in enumerate(struct.bonds): dest.write((intfmt * 2) % (bond.atom1.idx + 1, bond.atom2.idx + 1)) if i % 4 == 3: # Write 4 bonds per line dest.write('\n') # See if we need to terminate if len(struct.bonds) % 4 != 0 or len(struct.bonds) == 0: dest.write('\n') dest.write('\n') # Angles dest.write(intfmt % len(struct.angles) + ' !NTHETA: angles\n') for i, angle in enumerate(struct.angles): dest.write( (intfmt * 3) % (angle.atom1.idx + 1, angle.atom2.idx + 1, angle.atom3.idx + 1)) if i % 3 == 2: # Write 3 angles per line dest.write('\n') # See if we need to terminate if len(struct.angles) % 3 != 0 or len(struct.angles) == 0: dest.write('\n') dest.write('\n') # Dihedrals dest.write(intfmt % len(struct.dihedrals) + ' !NPHI: dihedrals\n') for i, dih in enumerate(struct.dihedrals): dest.write((intfmt * 4) % (dih.atom1.idx + 1, dih.atom2.idx + 1, dih.atom3.idx + 1, dih.atom4.idx + 1)) if i % 2 == 1: # Write 2 dihedrals per line dest.write('\n') # See if we need to terminate if len(struct.dihedrals) % 2 != 0 or len(struct.dihedrals) == 0: dest.write('\n') dest.write('\n') # Impropers dest.write(intfmt % len(struct.impropers) + ' !NIMPHI: impropers\n') for i, imp in enumerate(struct.impropers): dest.write((intfmt * 4) % (imp.atom1.idx + 1, imp.atom2.idx + 1, imp.atom3.idx + 1, imp.atom4.idx + 1)) if i % 2 == 1: # Write 2 dihedrals per line dest.write('\n') # See if we need to terminate if len(struct.impropers) % 2 != 0 or len(struct.impropers) == 0: dest.write('\n') dest.write('\n') # Donor section dest.write(intfmt % len(struct.donors) + ' !NDON: donors\n') for i, don in enumerate(struct.donors): dest.write((intfmt * 2) % (don.atom1.idx + 1, don.atom2.idx + 1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.donors) % 4 != 0 or len(struct.donors) == 0: dest.write('\n') dest.write('\n') # Acceptor section dest.write(intfmt % len(struct.acceptors) + ' !NACC: acceptors\n') for i, acc in enumerate(struct.acceptors): dest.write((intfmt * 2) % (acc.atom1.idx + 1, acc.atom2.idx + 1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.acceptors) % 4 != 0 or len(struct.acceptors) == 0: dest.write('\n') dest.write('\n') # NNB section ?? dest.write(intfmt % 0 + ' !NNB\n\n') for i in xrange(len(struct.atoms)): dest.write(intfmt % 0) if i % 8 == 7: # Write 8 0's per line dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # Group section dest.write((intfmt * 2) % (len(struct.groups), struct.groups.nst2)) dest.write(' !NGRP NST2\n') for i, gp in enumerate(struct.groups): dest.write((intfmt * 3) % (gp.bs, gp.type, gp.move)) if i % 3 == 2: dest.write('\n') if len(struct.groups) % 3 != 0 or len(struct.groups) == 0: dest.write('\n') dest.write('\n') # The next two sections are never found in VMD prmtops... if not vmd: # Molecule section; first set molecularity set_molecules(struct.atoms) mollist = [a.marked for a in struct.atoms] dest.write(intfmt % max(mollist) + ' !MOLNT\n') for i, atom in enumerate(struct.atoms): dest.write(intfmt % atom.marked) if i % 8 == 7: dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # NUMLP/NUMLPH section dest.write((intfmt * 2) % (0, 0) + ' !NUMLP NUMLPH\n') dest.write('\n') # CMAP section dest.write(intfmt % len(struct.cmaps) + ' !NCRTERM: cross-terms\n') for i, cmap in enumerate(struct.cmaps): dest.write( (intfmt * 8) % (cmap.atom1.idx + 1, cmap.atom2.idx + 1, cmap.atom3.idx + 1, cmap.atom4.idx + 1, cmap.atom2.idx + 1, cmap.atom3.idx + 1, cmap.atom4.idx + 1, cmap.atom5.idx + 1)) dest.write('\n') # Done! # If we opened our own handle, close it if own_handle: dest.close()
def write(struct, dest, vmd=False): """ Writes a PSF file from the stored molecule Parameters ---------- struct : :class:`Structure` The Structure instance from which the PSF should be written dest : str or file-like The place to write the output PSF file. If it has a "write" attribute, it will be used to print the PSF file. Otherwise, it will be treated like a string and a file will be opened, printed, then closed vmd : bool If True, it will write out a PSF in the format that VMD prints it in (i.e., no NUMLP/NUMLPH or MOLNT sections) Examples -------- >>> cs = CharmmPsfFile('testfiles/test.psf') >>> cs.write_psf('testfiles/test2.psf') """ # See if this is an extended format try: ext = 'EXT' in struct.flags except AttributeError: ext = True own_handle = False # Index the atoms and residues if not hasattr(dest, 'write'): own_handle = True dest = TextToBinaryFile(genopen(dest, 'w')) # Assign the formats we need to write with if ext: atmfmt1 = ('%10d %-8s %-8i %-8s %-8s %4d %10.6f %13.4f' + 11*' ') atmfmt2 = ('%10d %-8s %-8i %-8s %-8s %-4s %10.6f %13.4f' + 11*' ') intfmt = '%10d' # For pointers else: atmfmt1 = ('%8d %-4s %-4i %-4s %-4s %4d %10.6f %13.4f' + 11*' ') atmfmt2 = ('%8d %-4s %-4i %-4s %-4s %-4s %10.6f %13.4f' + 11*' ') intfmt = '%8d' # For pointers # Now print the header then the title dest.write('PSF ') if hasattr(struct, 'flags'): dest.write(' '.join(struct.flags)) else: dest.write('EXT') # EXT is always active dest.write('\n\n') dest.write(intfmt % len(struct.title) + ' !NTITLE\n') dest.write('\n'.join(struct.title) + '\n\n') # Now time for the atoms dest.write(intfmt % len(struct.atoms) + ' !NATOM\n') # atmfmt1 is for CHARMM format (i.e., atom types are integers) # atmfmt is for XPLOR format (i.e., atom types are strings) for i, atom in enumerate(struct.atoms): typ = atom.type if isinstance(atom.type, str): fmt = atmfmt2 if not atom.type: typ = atom.name else: fmt = atmfmt1 if hasattr(atom, 'segid'): segid = atom.segid else: segid = 'SYS' atmstr = fmt % (i+1, segid, atom.residue.number, atom.residue.name, atom.name, typ, atom.charge, atom.mass) if hasattr(atom, 'props'): dest.write(atmstr + ' '.join(atom.props) + '\n') else: dest.write('\n') dest.write('\n') # Bonds dest.write(intfmt % len(struct.bonds) + ' !NBOND: bonds\n') for i, bond in enumerate(struct.bonds): dest.write((intfmt*2) % (bond.atom1.idx+1, bond.atom2.idx+1)) if i % 4 == 3: # Write 4 bonds per line dest.write('\n') # See if we need to terminate if len(struct.bonds) % 4 != 0 or len(struct.bonds) == 0: dest.write('\n') dest.write('\n') # Angles dest.write(intfmt % len(struct.angles) + ' !NTHETA: angles\n') for i, angle in enumerate(struct.angles): dest.write((intfmt*3) % (angle.atom1.idx+1, angle.atom2.idx+1, angle.atom3.idx+1) ) if i % 3 == 2: # Write 3 angles per line dest.write('\n') # See if we need to terminate if len(struct.angles) % 3 != 0 or len(struct.angles) == 0: dest.write('\n') dest.write('\n') # Dihedrals dest.write(intfmt % len(struct.dihedrals) + ' !NPHI: dihedrals\n') for i, dih in enumerate(struct.dihedrals): dest.write((intfmt*4) % (dih.atom1.idx+1, dih.atom2.idx+1, dih.atom3.idx+1, dih.atom4.idx+1) ) if i % 2 == 1: # Write 2 dihedrals per line dest.write('\n') # See if we need to terminate if len(struct.dihedrals) % 2 != 0 or len(struct.dihedrals) == 0: dest.write('\n') dest.write('\n') # Impropers dest.write(intfmt % len(struct.impropers) + ' !NIMPHI: impropers\n') for i, imp in enumerate(struct.impropers): dest.write((intfmt*4) % (imp.atom1.idx+1, imp.atom2.idx+1, imp.atom3.idx+1, imp.atom4.idx+1) ) if i % 2 == 1: # Write 2 dihedrals per line dest.write('\n') # See if we need to terminate if len(struct.impropers) % 2 != 0 or len(struct.impropers) == 0: dest.write('\n') dest.write('\n') # Donor section dest.write(intfmt % len(struct.donors) + ' !NDON: donors\n') for i, don in enumerate(struct.donors): dest.write((intfmt*2) % (don.atom1.idx+1, don.atom2.idx+1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.donors) % 4 != 0 or len(struct.donors) == 0: dest.write('\n') dest.write('\n') # Acceptor section dest.write(intfmt % len(struct.acceptors) + ' !NACC: acceptors\n') for i, acc in enumerate(struct.acceptors): dest.write((intfmt*2) % (acc.atom1.idx+1, acc.atom2.idx+1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.acceptors) % 4 != 0 or len(struct.acceptors) == 0: dest.write('\n') dest.write('\n') # NNB section ?? dest.write(intfmt % 0 + ' !NNB\n\n') for i in xrange(len(struct.atoms)): dest.write(intfmt % 0) if i % 8 == 7: # Write 8 0's per line dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # Group section dest.write((intfmt*2) % (len(struct.groups), struct.groups.nst2)) dest.write(' !NGRP NST2\n') for i, gp in enumerate(struct.groups): dest.write((intfmt*3) % (gp.bs, gp.type, gp.move)) if i % 3 == 2: dest.write('\n') if len(struct.groups) % 3 != 0 or len(struct.groups) == 0: dest.write('\n') dest.write('\n') # The next two sections are never found in VMD prmtops... if not vmd: # Molecule section; first set molecularity set_molecules(struct.atoms) mollist = [a.marked for a in struct.atoms] dest.write(intfmt % max(mollist) + ' !MOLNT\n') for i, atom in enumerate(struct.atoms): dest.write(intfmt % atom.marked) if i % 8 == 7: dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # NUMLP/NUMLPH section dest.write((intfmt*2) % (0, 0) + ' !NUMLP NUMLPH\n') dest.write('\n') # CMAP section dest.write(intfmt % len(struct.cmaps) + ' !NCRTERM: cross-terms\n') for i, cmap in enumerate(struct.cmaps): dest.write((intfmt*8) % (cmap.atom1.idx+1, cmap.atom2.idx+1, cmap.atom3.idx+1, cmap.atom4.idx+1, cmap.atom2.idx+1, cmap.atom3.idx+1, cmap.atom4.idx+1, cmap.atom5.idx+1) ) dest.write('\n') # Done! # If we opened our own handle, close it if own_handle: dest.close()
def write(struct, dest, mol3=False): """ Writes a mol2 file from a structure or residue template Parameters ---------- struct : :class:`Structure` or :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The input structure to write the mol2 file from dest : str or file-like obj Name of the file to write or open file handle to write to mol3 : bool, optional If True and ``struct`` is a ResidueTemplate or container, write HEAD/TAIL sections. Default is False """ own_handle = False if not hasattr(dest, 'write'): own_handle = True dest = TextToBinaryFile(genopen(dest, 'w')) try: if isinstance(struct, ResidueTemplateContainer): natom = sum([len(c) for c in struct]) # To find the number of bonds, we need to total number of bonds # + the number of bonds that would be formed by "stitching" # together residues via their head and tail bonds = [] charges = [] bases = [1 for res in struct] for i, res in enumerate(struct): if i < len(struct) - 1: bases[i+1] = bases[i] + len(res) for i, res in enumerate(struct): for bond in res.bonds: bonds.append((bond.atom1.idx+bases[i], bond.atom2.idx+bases[i])) if i < len(struct)-1 and (res.tail is not None and struct[i+1].head is not None): bonds.append((res.tail.idx+bases[i], struct[i+1].head.idx+bases[i+1])) charges.extend([a.charge for a in res]) residues = struct else: natom = len(struct.atoms) bonds = [(b.atom1.idx+1, b.atom2.idx+1) for b in struct.bonds] if isinstance(struct, ResidueTemplate): residues = [struct] else: residues = struct.residues charges = [a.charge for a in struct.atoms] dest.write('@<TRIPOS>MOLECULE\n') dest.write('\n') dest.write('%d %d %d 0 1\n' % (natom, len(bonds), len(residues))) if len(residues) == 1: dest.write('SMALL\n') else: for residue in residues: if AminoAcidResidue.has(residue.name): dest.write('PROTEIN\n') break if (RNAResidue.has(residue.name) or DNAResidue.has(residue.name)): dest.write('NUCLEIC\n') break else: dest.write('BIOPOLYMER\n') if not any(charges): dest.write('NO_CHARGES\n') printchg = False else: dest.write('USER_CHARGES\n') printchg = True # Now do ATOM section dest.write('@<TRIPOS>ATOM\n') j = 1 for i, res in enumerate(residues): for atom in res: try: x = atom.xx except AttributeError: x = 0 try: y = atom.xy except AttributeError: y = 0 try: z = atom.xz except AttributeError: z = 0 dest.write('%d %s %.4f %.4f %.4f %s %d %s' % (j, atom.name, x, y, z, atom.type, i+1, res.name)) if printchg: dest.write(' %.4f\n' % atom.charge) else: dest.write('\n') j += 1 dest.write('@<TRIPOS>BOND\n') for i, bond in enumerate(bonds): dest.write('%d %d %d 1\n' % (i+1, bond[0], bond[1])) dest.write('@<TRIPOS>SUBSTRUCTURE\n') first_atom = 0 for i, res in enumerate(residues): if not hasattr(res, 'chain') or not res.chain: chain = '****' else: chain = res.chain intresbonds = 0 if isinstance(res, ResidueTemplate): if i != len(residues)-1 and (res.tail is not None and residues[i+1].head is not None): intresbonds += 1 if i != 0 and (res.head is not None and residues[i-1].tail is not None): intresbonds += 1 else: for atom in res: for a2 in atom.bond_partners: if a2.residue is not res: intresbonds += 1 dest.write('%d %s %d RESIDUE %d %s ROOT %d\n' % (i+1, res.name, first_atom+1, 0, chain[:4], intresbonds)) first_atom += len(res) if mol3: dest.write('@<TRIPOS>HEADTAIL\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): if res.head is not None: dest.write('%s %d\n' % (res.head.name, i+1)) else: dest.write('0 0\n') if res.tail is not None: dest.write('%s %d\n' % (res.tail.name, i+1)) else: dest.write('0 0\n') else: head = tail = None for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: head = atom if a2.residue.idx == res.idx + 1: tail = atom if head is not None: dest.write('%s %d\n' % (head.name, i+1)) else: dest.write('0 0\n') if tail is not None: dest.write('%s %d\n' % (tail.name, i+1)) else: dest.write('0 0\n') dest.write('@<TRIPOS>RESIDUECONNECT\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): con = [res.head, res.tail, None, None, None, None] for i, a in enumerate(res.connections): con[i+2] = a else: con = [None, None, None, None, None, None] ncon = 2 for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: con[0] = atom elif a2.residue.idx == res.idx + 1: con[1] = atom elif a2.residue.idx != res.idx: con[ncon] = atom ncon += 1 dest.write('%d' % (i+1)) for a in con: if a is not None: dest.write(' %s' % a.name) else: dest.write(' 0') dest.write('\n') finally: if own_handle: dest.close()
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str Name of the file to parse structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected """ f = TextToBinaryFile(genopen(filename, 'r')) rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] try: section = None last_residue = None headtail = 'head' for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) restemp.add_atom(copy.copy(atom)) continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') if atom2.residue.idx == len(rescont): res2 = restemp elif atom1.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(x) for x in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() id = int(words[0]) resname = words[1] root_atom = int(words[2]) try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: atom.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: rescont.append(restemp) return rescont else: return restemp except ValueError, e: raise Mol2Error('String conversion trouble: %s' % e)