def write(self, selection, frame=None): """Write selection at current trajectory frame to file. write(selection,frame=FRAME) selection MDAnalysis AtomGroup frame optionally move to frame FRAME """ u = selection.universe if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 1 # should catch cases when we are analyzing a single PDB (?) atoms = selection.atoms # make sure to use atoms (Issue 46) coor = atoms.coordinates() # can write from selection == Universe (Issue 49) with util.openany(self.filename, 'w') as self.crd: self._TITLE("FRAME " + str(frame) + " FROM " + str(u.trajectory.filename)) self._TITLE("") self._NUMATOMS(len(atoms)) current_resid = 0 for i, atom in enumerate(atoms): if atoms[i].resid != atoms[i - 1].resid: # note that this compares first and LAST atom on first iteration... but it works current_resid += 1 self._ATOM(serial=i + 1, resSeq=atom.resid, resName=atom.resname, name=atom.name, x=coor[i, 0], y=coor[i, 1], z=coor[i, 2], chainID=atom.segid, tempFactor=atom.bfactor, TotRes=current_resid, numatoms=len(atoms))
def parse(self): """Parse CHARMM/NAMD/XPLOR PSF_ file *filename*. :Returns: MDAnalysis internal *structure* dict as defined here. """ # Open and check psf validity with openany(self.filename, 'r') as psffile: header = psffile.next() if header[:3] != "PSF": err = ("{} is not valid PSF file (header = {})" "".format(self.filename, header)) logger.error(err) raise ValueError(err) header_flags = header[3:].split() if "NAMD" in header_flags: self._format = "NAMD" # NAMD/VMD elif "EXT" in header_flags: self._format = "EXTENDED" # CHARMM else: self._format = "STANDARD" # CHARMM psffile.next() title = psffile.next().split() if not (title[1] == "!NTITLE"): err = "{} is not a valid PSF file".format(psffile.name) logger.error(err) raise ValueError(err) # psfremarks = [psffile.next() for i in range(int(title[0]))] for _ in range(int(title[0])): psffile.next() logger.debug("PSF file {}: format {}" "".format(psffile.name, self._format)) structure = {} sections = ( ("_atoms", ("NATOM", 1, 1, self._parseatoms)), ("_bonds", ("NBOND", 2, 4, self._parsesection)), ("_angles", ("NTHETA", 3, 3, self._parsesection)), ("_dihe", ("NPHI", 4, 2, self._parsesection)), ("_impr", ("NIMPHI", 4, 2, self._parsesection)), ("_donors", ("NDON", 2, 4, self._parsesection)), ("_acceptors", ("NACC", 2, 4, self._parsesection)) ) try: for attr, info in sections: psffile.next() structure[attr] = self._parse_sec(psffile, info) except StopIteration: # Reached the end of the file before we expected if "_atoms" not in structure: err = ("The PSF file didn't contain the required" " section of NATOM") logger.error(err) raise ValueError(err) # Who cares about the rest return structure
def __init__(self, crdfilename, convert_units=None, **kwargs): # EXT: # (i10,2x,a) natoms,'EXT' # (2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10) # iatom,ires,resn,typr,x,y,z,segid,rid,wmain # standard: # (i5) natoms # (2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5) # iatom,ires,resn,typr,x,y,z,segid,orig_resid,wmain self.crdfilename = crdfilename self.filename = self.crdfilename if convert_units is None: # Note: not used at the moment in CRDReader/Writer convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units coords_list = [] with util.openany(crdfilename, 'r') as crdfile: extended = False natoms = 0 for linenum, line in enumerate(crdfile): if line.strip().startswith('*') or line.strip() == "": continue # ignore TITLE and empty lines fields = line.split() if len(fields) <= 2: # should be the natoms line natoms = int(fields[0]) extended = (fields[-1] == 'EXT') continue # process coordinates try: if extended: coords_list.append( numpy.array(map(float, line[45:100].split()[0:3]))) else: coords_list.append( numpy.array(map(float, line[20:50].split()[0:3]))) except: raise FormatError("Check CRD format at line %d: %s" % (linenum, line.rstrip())) self.numatoms = len(coords_list) self.numframes = 1 self.fixed = 0 # parse wmain field for fixed atoms? self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1 self.ts = self._Timestep(numpy.array(coords_list)) self.ts.frame = 1 # 1-based frame number # if self.convert_units: # self.convert_pos_from_native(self.ts._pos) # in-place ! # sanity check if self.numatoms != natoms: raise FormatError( "Found %d coordinates in %r but the header claims that there " "should be %d coordinates." % (self.numatoms, self.filename, natoms))
def run(self, store=True, force=False, start_frame=1, end_frame=None, step_value=1): """Analyze trajectory and produce timeseries. Stores results in :attr:`ContactAnalysis1.timeseries` (if store=True) and writes them to a data file. The average q is written to a second data file. *start_frame* The value of the first frame number in the trajectory to be used (default: frame 1) *end_frame* The value of the last frame number in the trajectory to be used (default: None -- use all frames) *step_value* The number of frames to skip during trajectory iteration (default: use every frame) """ if self.output_exists(force=force): import warnings warnings.warn( "File %r already exists, loading it INSTEAD of trajectory %r. " "Use force=True to overwrite the output file. " % (self.output, self.universe.trajectory.filename)) self.load(self.output) return None with openany(self.output, 'w') as out: out.write("# q1 analysis\n# nref = %d\n" % (self.nref)) out.write("# frame q1 n1\n") records = [] self.qavg *= 0 # average contact existence A, B = self.selections # determine the end_frame value to use: total_frames = self.universe.trajectory.numframes if not end_frame: # use the total number of frames in trajectory if no final value specified end_frame = total_frames for ts in self.universe.trajectory[ start_frame:end_frame:step_value]: frame = ts.frame # use pre-allocated distance array to save a little bit of time MDAnalysis.core.distances.distance_array(A.coordinates(), B.coordinates(), result=self.d) self.qarray(self.d, out=self.q) n1, q1 = self.qN(self.q, out=self._qtmp) self.qavg += self.q if store: records.append((frame, q1, n1)) out.write("%(frame)4d %(q1)8.6f %(n1)5d\n" % vars()) if store: self.timeseries = numpy.array(records).T numframes = len(range(total_frames)[start_frame:end_frame:step_value]) self.qavg /= numframes numpy.savetxt(self.outarray, self.qavg, fmt="%8.6f") return self.output
def parse(self): """Parse CHARMM/NAMD/XPLOR PSF_ file *filename*. :Returns: MDAnalysis internal *structure* dict as defined here. """ # Open and check psf validity with openany(self.filename, 'r') as psffile: header = psffile.next() if header[:3] != "PSF": err = ("{} is not valid PSF file (header = {})" "".format(self.filename, header)) logger.error(err) raise ValueError(err) header_flags = header[3:].split() if "NAMD" in header_flags: self._format = "NAMD" # NAMD/VMD elif "EXT" in header_flags: self._format = "EXTENDED" # CHARMM else: self._format = "STANDARD" # CHARMM psffile.next() title = psffile.next().split() if not (title[1] == "!NTITLE"): err = "{} is not a valid PSF file".format(psffile.name) logger.error(err) raise ValueError(err) # psfremarks = [psffile.next() for i in range(int(title[0]))] for _ in range(int(title[0])): psffile.next() logger.debug("PSF file {}: format {}" "".format(psffile.name, self._format)) structure = {} sections = (("_atoms", ("NATOM", 1, 1, self._parseatoms)), ("_bonds", ("NBOND", 2, 4, self._parsesection)), ("_angles", ("NTHETA", 3, 3, self._parsesection)), ("_dihe", ("NPHI", 4, 2, self._parsesection)), ("_impr", ("NIMPHI", 4, 2, self._parsesection)), ("_donors", ("NDON", 2, 4, self._parsesection)), ("_acceptors", ("NACC", 2, 4, self._parsesection))) try: for attr, info in sections: psffile.next() structure[attr] = self._parse_sec(psffile, info) except StopIteration: # Reached the end of the file before we expected if "_atoms" not in structure: err = ("The PSF file didn't contain the required" " section of NATOM") logger.error(err) raise ValueError(err) # Who cares about the rest return structure
def parse(self): """Parse GRO file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.base`. """ atom_iter = 0 atoms = [] with openany(self.filename, "r") as grofile: segid = "SYSTEM" for line in grofile: try: resid, resname, name = int( line[0:5]), line[5:10].strip(), line[10:15].strip() # guess based on atom name elem = guess_atom_element(name) atype = elem mass = get_atom_mass(elem) charge = guess_atom_charge(name) # segid = "SYSTEM" # ignore coords and velocities, they can be read by coordinates.GRO # Not currently doing anything with other lines except (ValueError, IndexError): pass # if linenum == 0: # Header comment #hdr_cmt = line #pass #elif linenum == 1: # Header: number of particles #hdr_np = int(line) # A bit dodgy; should find a better way # of locating the box_vectors line # pass #else: #ftr_box = line If the line can't # otherwise be read properly, then this # probably indicates a problem with the # gro line, and an error will be raised # pass except: raise IOError( "Couldn't read the following line of the .gro file:\n" "{}".format(line)) else: # Just use the atom_iter (counting from 0) rather than # the number in the .gro file (which wraps at 99999) atoms.append( Atom(atom_iter, name, atype, resname, resid, segid, mass, charge)) atom_iter += 1 structure = {'_atoms': atoms} return structure
def __init__(self, filename, convert_units=None, **kwargs): """Read coordinates from *filename*. *filename* can be a gzipped or bzip2ed compressed PDBQT file. """ self.filename = filename if convert_units is None: convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units # Ugly inner method: moved outside of for-loop below def _c(start, stop, typeclass=float): return self._col(line, start, stop, typeclass=typeclass) coords = [] atoms = [] unitcell = numpy.zeros(6, dtype=numpy.float32) with util.openany(filename, 'r') as pdbfile: for line in pdbfile: if line[:4] == 'END\n': # Should only break at the 'END' of a model definition not # and prevent premature exit for a torsion termination, eg, ENDBRANCH break if line[:6] == 'CRYST1': A, B, C = _c(7, 15), _c(16, 24), _c(25, 33) alpha, beta, gamma = _c(34, 40), _c(41, 47), _c(48, 54) unitcell[:] = A, B, C, alpha, beta, gamma if line[:6] in ('ATOM ', 'HETATM'): # directly use COLUMNS from PDB/PDBQT spec serial = _c(7, 11, int) name = _c(13, 16, str).strip() resName = _c(18, 21, str).strip() chainID = _c(22, 22, str) # empty chainID is a single space ' '! resSeq = _c(23, 26, int) x, y, z = _c(31, 38), _c(39, 46), _c(47, 54) occupancy = _c(55, 60) tempFactor = _c(61, 66) partialCharge = _c(67, 76, str).strip() # PDBQT partial charge atomtype = _c(77, 80, str).strip() # PDBQT atom type coords.append((x, y, z)) atoms.append( (serial, name, resName, chainID, resSeq, occupancy, tempFactor, partialCharge, atomtype)) self.numatoms = len(coords) self.ts = self._Timestep(numpy.array(coords, dtype=numpy.float32)) self.ts._unitcell[:] = unitcell self.ts.frame = 1 # 1-based frame number if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell[:3]) # in-place ! (only lengths) self.numframes = 1 self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1 # hack for PDBQTParser: self._atoms = numpy.rec.fromrecords(atoms, names="serial,name,resName,chainID,resSeq,occupancy,tempFactor," "partialCharge,type")
def writePDB(self, filename): """Export coordinates to a simple PDB file.""" atom_format = "%6s%.5s %4s %4s %.4s %8.3f%8.3f%8.3f%6.2f%6.2f %2s \n" p = self.positions with openany(filename, 'w') as file: for i, atom in enumerate(self.sections["Atoms"]): line = [ "ATOM ", str(i + 1), 'XXXX', 'TEMP', str(atom.type + 1), p[i, 0], p[i, 1], p[i, 2], 0.0, 0.0, str(atom.type)] file.write(atom_format % tuple(line))
def parse(self): """Parse GRO file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.base`. """ atom_iter = 0 atoms = [] with openany(self.filename, "r") as grofile: segid = "SYSTEM" for line in grofile: try: resid, resname, name = int(line[0:5]), line[5:10].strip(), line[10:15].strip() # guess based on atom name elem = guess_atom_element(name) atype = elem mass = get_atom_mass(elem) charge = guess_atom_charge(name) # segid = "SYSTEM" # ignore coords and velocities, they can be read by coordinates.GRO # Not currently doing anything with other lines except (ValueError, IndexError): pass # if linenum == 0: # Header comment #hdr_cmt = line #pass #elif linenum == 1: # Header: number of particles #hdr_np = int(line) # A bit dodgy; should find a better way # of locating the box_vectors line # pass #else: #ftr_box = line If the line can't # otherwise be read properly, then this # probably indicates a problem with the # gro line, and an error will be raised # pass except: raise IOError("Couldn't read the following line of the .gro file:\n" "{}".format(line)) else: # Just use the atom_iter (counting from 0) rather than # the number in the .gro file (which wraps at 99999) atoms.append(Atom(atom_iter, name, atype, resname, resid, segid, mass, charge)) atom_iter += 1 structure = {'_atoms':atoms} return structure
def load(self, filename): """Load the data file.""" from MDAnalysis.core.util import openany records = [] with openany(filename) as data: for line in data: if line.startswith('#'): continue records.append(map(float, line.split())) self.timeseries = numpy.array(records).T
def __init__(self, crdfilename, convert_units=None, **kwargs): # EXT: # (i10,2x,a) natoms,'EXT' # (2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10) # iatom,ires,resn,typr,x,y,z,segid,rid,wmain # standard: # (i5) natoms # (2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5) # iatom,ires,resn,typr,x,y,z,segid,orig_resid,wmain self.crdfilename = crdfilename self.filename = self.crdfilename if convert_units is None: # Note: not used at the moment in CRDReader/Writer convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units coords_list = [] with util.openany(crdfilename, 'r') as crdfile: extended = False natoms = 0 for linenum, line in enumerate(crdfile): if line.strip().startswith('*') or line.strip() == "": continue # ignore TITLE and empty lines fields = line.split() if len(fields) <= 2: # should be the natoms line natoms = int(fields[0]) extended = (fields[-1] == 'EXT') continue # process coordinates try: if extended: coords_list.append(numpy.array(map(float, line[45:100].split()[0:3]))) else: coords_list.append(numpy.array(map(float, line[20:50].split()[0:3]))) except: raise FormatError("Check CRD format at line %d: %s" % (linenum, line.rstrip())) self.numatoms = len(coords_list) self.numframes = 1 self.fixed = 0 # parse wmain field for fixed atoms? self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1 self.ts = self._Timestep(numpy.array(coords_list)) self.ts.frame = 1 # 1-based frame number # if self.convert_units: # self.convert_pos_from_native(self.ts._pos) # in-place ! # sanity check if self.numatoms != natoms: raise FormatError("Found %d coordinates in %r but the header claims that there " "should be %d coordinates." % (self.numatoms, self.filename, natoms))
def __init__(self, filename, convert_units=None, **kwargs): """Read coordinates from *filename*. *filename* can be a gzipped or bzip2ed compressed PDB file. If the pdb file contains multiple MODEL records then it is read as a trajectory where the MODEL numbers correspond to frame numbers. Therefore, the MODEL numbers must be a sequence of integers (typically starting at 1 or 0). """ self.filename = filename if convert_units is None: convert_units = core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units # = NOTE to clear up confusion over 0-based vs 1-based frame numbering = # self.frame is 1-based for this Reader, which matches the behavior of # the MODEL record in a typical multi-model PDB file. If the MODEL # record is 0-based, this is accounted for by __init__. # self._read_frame assumes that it is passed a 0-based frame number, so # that it functions as expected when slicing is used. blocks = [] with util.openany(filename) as f: for i, line in enumerate(f): # found new molecules if "@<TRIPOS>MOLECULE" in line: blocks.append({"start_line": i, "lines": []}) blocks[-1]["lines"].append(line) block = blocks[0] sections, coords = self.parse_block(block) self.numatoms = len(coords) self.ts = self._Timestep(np.array(coords, dtype=np.float32)) self.ts.frame = 1 # 1-based frame number as starting frame if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell[:3]) # in-place ! (only lengths) self.molecule = {} self.substructure = {} self.frames = blocks self.numframes = len(blocks) self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1
def run(self, store=True, force=False, start_frame=1, end_frame=None, step_value=1): """Analyze trajectory and produce timeseries. Stores results in :attr:`ContactAnalysis1.timeseries` (if store=True) and writes them to a data file. The average q is written to a second data file. *start_frame* The value of the first frame number in the trajectory to be used (default: frame 1) *end_frame* The value of the last frame number in the trajectory to be used (default: None -- use all frames) *step_value* The number of frames to skip during trajectory iteration (default: use every frame) """ if self.output_exists(force=force): import warnings warnings.warn("File %r already exists, loading it INSTEAD of trajectory %r. " "Use force=True to overwrite the output file. " % (self.output, self.universe.trajectory.filename)) self.load(self.output) return None with openany(self.output, 'w') as out: out.write("# q1 analysis\n# nref = %d\n" % (self.nref)) out.write("# frame q1 n1\n") records = [] self.qavg *= 0 # average contact existence A, B = self.selections # determine the end_frame value to use: total_frames = self.universe.trajectory.numframes if not end_frame: # use the total number of frames in trajectory if no final value specified end_frame = total_frames for ts in self.universe.trajectory[start_frame:end_frame:step_value]: frame = ts.frame # use pre-allocated distance array to save a little bit of time MDAnalysis.core.distances.distance_array(A.coordinates(), B.coordinates(), result=self.d) self.qarray(self.d, out=self.q) n1, q1 = self.qN(self.q, out=self._qtmp) self.qavg += self.q if store: records.append((frame, q1, n1)) out.write("%(frame)4d %(q1)8.6f %(n1)5d\n" % vars()) if store: self.timeseries = numpy.array(records).T numframes = len(range(total_frames)[start_frame:end_frame:step_value]) self.qavg /= numframes numpy.savetxt(self.outarray, self.qavg, fmt="%8.6f") return self.output
def read_DATA_timestep(self, ts): """Read a DATA file and try and extract: - positions - velocities (optional) - box information Fills this into the Timestep object and returns it .. versionadded:: 0.9.0 """ read_atoms = False read_velocities = False with openany(self.filename, 'r') as psffile: nitems, ntypes, box = self._parse_header(psffile) # lammps box: xlo, xhi, ylo, yhi, zlo, zhi lx = box[1] - box[0] ly = box[3] - box[2] lz = box[5] - box[4] # mda unitcell: A alpha B beta gamma C ts._unitcell[[0, 1, 2]] = lx, ly, lz ts._unitcell[[3, 4, 5]] = 90.0 while not (read_atoms & read_velocities): try: section = psffile.next().strip().split()[0] except IndexError: # blank lines don't split section = '' except StopIteration: break if section == 'Atoms': self._parse_pos(psffile, ts._pos) read_atoms = True elif section == 'Velocities': ts._velocities = numpy.zeros((ts.numatoms, 3), dtype=numpy.float32, order='F') self._parse_vel(psffile, ts._velocities) read_velocities = True elif len(section) > 0: self._skip_section(psffile) else: continue if not read_atoms: # Reaches here if StopIteration hit raise IOError("Position information not found") return ts
def writePSF(self, filename, names=None): """Export topology information to a simple PSF file.""" # Naveen formatted -- works with MDAnalysis verison 52 # psf_atom_format = " %5d %-4s %-4d %-4s %-4s %-4s %10.6f %7.4f %1d\n" # Liz formatted -- works with MDAnalysis verison 59 # psf_atom_format = "%8d %4.4s %-4.4s %-4.4s %-4.4s %-4.4s %16.8e %1s %-7.4f %7.7s %s\n" # Oli formatted -- works with MDAnalysis verison 81 psf_atom_format = "%8d %4s %-4s %4s %-4s% 4s %-14.6f%-14.6f%8s\n" with openany(filename, "w") as file: file.write("PSF\n\n") file.write(string.rjust("0", 8) + " !NTITLE\n\n") file.write(string.rjust(str(len(self.sections["Atoms"])), 8) + " !NATOM\n") # print self.sections["Masses"] for i, atom in enumerate(self.sections["Atoms"]): if names is not None: resname, atomname = names[i] else: resname, atomname = "TEMP", "XXXX" for j, liz in enumerate(self.sections["Masses"]): liz = liz[0] # print j+1, atom.type, liz if j + 1 == atom.type: line = [ i + 1, "TEMP", str(atom.chainid), resname, atomname, str(atom.type + 1), atom.charge, float(liz), 0.0, ] else: continue # print line file.write(psf_atom_format % tuple(line)) file.write("\n") num_bonds = len(self.sections["Bonds"]) bond_list = self.sections["Bonds"] file.write(string.rjust(str(num_bonds), 8) + " !NBOND\n") for index in range(0, num_bonds, 4): try: bonds = bond_list[index : index + 4] except IndexError: bonds = bond_list[index:-1] bond_line = map(lambda bond: string.rjust(str(bond[1]), 8) + string.rjust(str(bond[2]), 8), bonds) file.write("".join(bond_line) + "\n")
def parse(self): """Parse CRD file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology` """ extformat = FORTRANReader( '2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10') stdformat = FORTRANReader('2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5') atoms = [] atom_serial = 0 with openany(self.filename) as crd: for linenum, line in enumerate(crd): # reading header if line.split()[0] == '*': continue elif line.split()[-1] == 'EXT' and bool(int( line.split()[0])) is True: r = extformat continue elif line.split()[0] == line.split( )[-1] and line.split()[0] != '*': r = stdformat continue # anything else should be an atom try: serial, TotRes, resName, name, x, y, z, chainID, resSeq, tempFactor = r.read( line) except: raise ValueError("Check CRD format at line {}: {}".format( linenum, line.rstrip())) atomtype = guess_atom_type(name) mass = guess_atom_mass(name) charge = guess_atom_charge(name) atoms.append( Atom(atom_serial, name, atomtype, resName, TotRes, chainID, mass, charge)) atom_serial += 1 structure = {} structure["_atoms"] = atoms return structure
def read_DATA_timestep(self, ts): """Read a DATA file and try and extract: - positions - velocities (optional) - box information Fills this into the Timestep object and returns it .. versionadded:: 0.9.0 """ read_atoms = False read_velocities = False with openany(self.filename, "r") as psffile: nitems, ntypes, box = self._parse_header(psffile) # lammps box: xlo, xhi, ylo, yhi, zlo, zhi lx = box[1] - box[0] ly = box[3] - box[2] lz = box[5] - box[4] # mda unitcell: A alpha B beta gamma C ts._unitcell[[0, 1, 2]] = lx, ly, lz ts._unitcell[[3, 4, 5]] = 90.0 while not (read_atoms & read_velocities): try: section = psffile.next().strip().split()[0] except IndexError: # blank lines don't split section = "" except StopIteration: break if section == "Atoms": self._parse_pos(psffile, ts._pos) read_atoms = True elif section == "Velocities": ts._velocities = numpy.zeros((ts.numatoms, 3), dtype=numpy.float32, order="F") self._parse_vel(psffile, ts._velocities) read_velocities = True elif len(section) > 0: self._skip_section(psffile) else: continue if not read_atoms: # Reaches here if StopIteration hit raise IOError("Position information not found") return ts
def load(self, filename): """Load the data file.""" from MDAnalysis.core.util import openany records = [] with openany(filename) as data: for line in data: if line.startswith('#'): continue records.append(map(float, line.split())) self.timeseries = numpy.array(records).T try: self.qavg = numpy.loadtxt(self.outarray) except IOError as err: if err.errno != errno.ENOENT: raise
def __init__(self, filename, convert_units=None, **kwargs): """Read coordinates from *filename*. *filename* can be a gzipped or bzip2ed compressed PQR_ file. .. _PQR: http://www.poissonboltzmann.org/file-formats/biomolecular-structurw/pqr """ self.filename = filename if convert_units is None: convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units coords = [] atoms = [] unitcell = numpy.zeros(6, dtype=numpy.float32) segID = '' # use empty string (not in PQR), PQRParsers sets it to SYSTEM with util.openany(filename, 'r') as pqrfile: for line in pqrfile: if line[:6] in ('ATOM ', 'HETATM'): fields = line.split() try: recordName, serial, name, resName, chainID, resSeq, x, y, z, charge, radius = fields except ValueError: # files without the chainID recordName, serial, name, resName, resSeq, x, y, z, charge, radius = fields chainID = '' coords.append((float(x), float(y), float(z))) atoms.append( (int(serial), name, resName, chainID, int(resSeq), float(charge), float(radius), segID)) self.numatoms = len(coords) self.ts = self._Timestep(numpy.array(coords, dtype=numpy.float32)) self.ts._unitcell[:] = unitcell self.ts.frame = 1 # 1-based frame number if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell[:3]) # in-place ! (only lengths) self.numframes = 1 self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1 # hack for PQRParser: self._atoms = numpy.rec.fromrecords(atoms, names="serial,name,resName,chainID,resSeq,charge,radius,segID")
def parse(self): """Parse CRD file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology` """ extformat = FORTRANReader('2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10') stdformat = FORTRANReader('2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5') atoms = [] atom_serial = 0 with openany(self.filename) as crd: for linenum, line in enumerate(crd): # reading header if line.split()[0] == '*': continue elif line.split()[-1] == 'EXT' and bool(int(line.split()[0])) is True: r = extformat continue elif line.split()[0] == line.split()[-1] and line.split()[0] != '*': r = stdformat continue # anything else should be an atom try: serial, TotRes, resName, name, x, y, z, chainID, resSeq, tempFactor = r.read(line) except: raise ValueError("Check CRD format at line {}: {}".format( linenum, line.rstrip())) atomtype = guess_atom_type(name) mass = guess_atom_mass(name) charge = guess_atom_charge(name) atoms.append(Atom(atom_serial, name, atomtype, resName, TotRes, chainID, mass, charge)) atom_serial += 1 structure = {} structure["_atoms"] = atoms return structure
def writePSF(self, filename, names=None): """Export topology information to a simple PSF file.""" # Naveen formatted -- works with MDAnalysis verison 52 #psf_atom_format = " %5d %-4s %-4d %-4s %-4s %-4s %10.6f %7.4f %1d\n" # Liz formatted -- works with MDAnalysis verison 59 #psf_atom_format = "%8d %4.4s %-4.4s %-4.4s %-4.4s %-4.4s %16.8e %1s %-7.4f %7.7s %s\n" # Oli formatted -- works with MDAnalysis verison 81 psf_atom_format = "%8d %4s %-4s %4s %-4s% 4s %-14.6f%-14.6f%8s\n" with openany(filename, 'w') as file: file.write("PSF\n\n") file.write(string.rjust('0', 8) + ' !NTITLE\n\n') file.write(string.rjust(str(len(self.sections["Atoms"])), 8) + ' !NATOM\n') #print self.sections["Masses"] for i, atom in enumerate(self.sections["Atoms"]): if names is not None: resname, atomname = names[i] else: resname, atomname = 'TEMP', 'XXXX' for j, liz in enumerate(self.sections["Masses"]): liz = liz[0] #print j+1, atom.type, liz if j + 1 == atom.type: line = [ i + 1, 'TEMP', str(atom.chainid), resname, atomname, str(atom.type + 1), atom.charge, float(liz), 0.] else: continue #print line file.write(psf_atom_format % tuple(line)) file.write("\n") num_bonds = len(self.sections["Bonds"]) bond_list = self.sections["Bonds"] file.write(string.rjust(str(num_bonds), 8) + ' !NBOND\n') for index in range(0, num_bonds, 4): try: bonds = bond_list[index:index + 4] except IndexError: bonds = bond_list[index:-1] bond_line = map(lambda bond: string.rjust(str(bond[1]), 8) + string.rjust(str(bond[2]), 8), bonds) file.write(''.join(bond_line) + '\n')
def writePDB(self, filename): """Export coordinates to a simple PDB file.""" atom_format = "%6s%.5s %4s %4s %.4s %8.3f%8.3f%8.3f%6.2f%6.2f %2s \n" p = self.positions with openany(filename, "w") as file: for i, atom in enumerate(self.sections["Atoms"]): line = [ "ATOM ", str(i + 1), "XXXX", "TEMP", str(atom.type + 1), p[i, 0], p[i, 1], p[i, 2], 0.0, 0.0, str(atom.type), ] file.write(atom_format % tuple(line))
def write(self, selection, frame=None): """Write selection at current trajectory frame to file. write(selection,frame=FRAME) selection MDAnalysis AtomGroup frame optionally move to frame FRAME """ u = selection.universe if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 1 # should catch cases when we are analyzing a single PDB (?) atoms = selection.atoms # make sure to use atoms (Issue 46) coor = atoms.coordinates( ) # can write from selection == Universe (Issue 49) with util.openany(self.filename, 'w') as self.crd: self._TITLE("FRAME " + str(frame) + " FROM " + str(u.trajectory.filename)) self._TITLE("") self._NUMATOMS(len(atoms)) current_resid = 0 for i, atom in enumerate(atoms): if atoms[i].resid != atoms[i - 1].resid: # note that this compares first and LAST atom on first iteration... but it works current_resid += 1 self._ATOM(serial=i + 1, resSeq=atom.resid, resName=atom.resname, name=atom.name, x=coor[i, 0], y=coor[i, 1], z=coor[i, 2], chainID=atom.segid, tempFactor=atom.bfactor, TotRes=current_resid, numatoms=len(atoms))
def _parsebonds(self, primitive_pdb_reader): if self.guess_bonds_mode: guessed_bonds = guess_bonds(self.structure["_atoms"], np.array(primitive_pdb_reader.ts)) self.structure["_guessed_bonds"] = guessed_bonds # Mapping between the atom array indicies a.number and atom ids # (serial) in the original PDB file mapping = dict((a.serial, a.number) for a in self.structure["_atoms"]) bonds = set() with openany(self.filename, "r") as fname: lines = ((num, line[6:].split()) for num, line in enumerate(fname) if line[:6] == "CONECT") for num, bond in lines: atom, atoms = int(bond[0]), map(int, bond[1:]) for a in atoms: bond = tuple([mapping[atom], mapping[a]]) bonds.add(bond) self.structure["_bonds"] = tuple(bonds)
def write(self, selection, frame=None): """Write selection at current trajectory frame to file. :Arguments: *selection* MDAnalysis AtomGroup (selection or Universe.atoms) or also Universe :Keywords: *frame* optionally move to frame number *frame* """ # write() method that complies with the Trajectory API u = selection.universe if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 1 # should catch cases when we are analyzing a single frame(?) atoms = selection.atoms # make sure to use atoms (Issue 46) coordinates = atoms.coordinates() # can write from selection == Universe (Issue 49) if self.convert_units: self.convert_pos_to_native(coordinates) # inplace because coordinates is already a copy with util.openany(self.filename, 'w') as pqrfile: # Header self._write_REMARK(pqrfile, self.remarks) self._write_REMARK(pqrfile, "Input: frame {0} of {1}".format(frame, u.trajectory.filename), 5) self._write_REMARK(pqrfile, "total charge: {0:+8.4f} e".format(atoms.totalCharge()), 6) # Atom descriptions and coords for atom_index, atom in enumerate(atoms): XYZ = coordinates[atom_index] self._write_ATOM(pqrfile, atom_index + 1, atom.name, atom.resname, atom.segid, atom.resid, XYZ, atom.charge, atom.radius)
def parse(self): """Parse Amber PRMTOP topology file *filename*. :Returns: MDAnalysis internal *structure* dict. """ formatversion = 10 with openany(self.filename) as topfile: for line in topfile: if line.startswith("%FLAG ATOMIC_NUMBER"): formatversion = 12 break if formatversion == 12: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("ATOMIC_NUMBER", 1, 10, self._parsesectionint, "_skip", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe"), #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors"), elif formatversion == 10: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe")] #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors")] # Open and check top validity # Reading header info POINTERS with openany(self.filename) as topfile: next_line = topfile.next header = next_line() if header[:3] != "%VE": raise ValueError( "{} is not a valid TOP file. %VE Missing in header".format( topfile)) title = next_line().split() if not (title[1] == "TITLE"): raise ValueError( "{} is not a valid TOP file. 'TITLE' missing in header". format(topfile)) while header[:14] != '%FLAG POINTERS': header = next_line() header = next_line() topremarks = [next_line().strip() for i in xrange(4)] sys_info = [int(k) for i in topremarks for k in i.split()] structure = {} final_structure = {} try: for info in sections: self._parse_sec(sys_info, info, next_line, structure, final_structure) except StopIteration: raise ValueError("The TOP file didn't contain the minimum" " required section of ATOM_NAME") # Completing info respoint to include all atoms in last resid structure["_respoint"].append(sys_info[0]) structure["_respoint"][-1] = structure["_respoint"][-1] + 1 atoms = [ None, ] * sys_info[0] j = 0 segid = "SYSTEM" for i in range(sys_info[0]): charge = convert(structure["_charge"][i], 'Amber', flags['charge_unit']) if structure["_respoint"][j] <= i + 1 < structure["_respoint"][j + 1]: resid = j + 1 resname = structure["_resname"][j] else: j += 1 resid = j + 1 resname = structure["_resname"][j] mass = structure["_mass"][i] atomtype = structure["_atom_type"][i] atomname = structure["_name"][i] #segid = 'SYSTEM' # does not exist in Amber atoms[i] = Atom(i, atomname, atomtype, resname, resid, segid, mass, charge) final_structure["_atoms"] = atoms final_structure["_numatoms"] = sys_info[0] return final_structure
def write(self, selection, frame=None): """Write selection at current trajectory frame to file. :Arguments: selection MDAnalysis AtomGroup (selection or Universe.atoms) or also Universe :Keywords: frame optionally move to frame number *frame* The GRO format only allows 5 digits for resid and atom number. If these number become larger than 99,999 then this routine will chop off the leading digits. .. versionchanged:: 0.7.6 resName and atomName are truncated to a maximum of 5 characters """ # write() method that complies with the Trajectory API u = selection.universe if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 1 # should catch cases when we are analyzing a single GRO (?) atoms = selection.atoms # make sure to use atoms (Issue 46) coordinates = atoms.coordinates() # can write from selection == Universe (Issue 49) if self.convert_units: # Convert back to nm from Angstroms, inplace because coordinates is already a copy self.convert_pos_to_native(coordinates) # check if any coordinates are illegal (checks the coordinates in native nm!) if not self.has_valid_coordinates(self.gro_coor_limits, coordinates): raise ValueError( "GRO files must have coordinate values between %.3f and %.3f nm:" "No file was written." % (self.gro_coor_limits["min"], self.gro_coor_limits["max"]) ) with util.openany(self.filename, "w") as output_gro: # Header output_gro.write("Written by MDAnalysis\n") output_gro.write(self.fmt["numatoms"] % len(atoms)) # Atom descriptions and coords for atom_index, atom in enumerate(atoms): c = coordinates[atom_index] output_line = self.fmt["xyz"] % ( str(atom.resid)[-5:], # truncate highest digits on overflow atom.resname.strip()[:5], atom.name.strip()[:5], str(atom.number + 1)[-5:], # number (1-based), truncate highest digits on overflow c[0], c[1], c[2], # coords - outputted with 3 d.p. ) output_gro.write(output_line) # Footer: box dimensions box = self.convert_dimensions_to_unitcell(u.trajectory.ts) if numpy.all(u.trajectory.ts.dimensions[3:] == [90.0, 90.0, 90.0]): # orthorhombic cell, only lengths along axes needed in gro output_gro.write(self.fmt["box_orthorhombic"] % (box[0, 0], box[1, 1], box[2, 2])) else: # full output output_gro.write( self.fmt["box_triclinic"] % ( box[0, 0], box[1, 1], box[2, 2], box[0, 1], box[0, 2], box[1, 0], box[1, 2], box[2, 0], box[2, 1], ) )
def __init__(self, grofilename, convert_units=None, **kwargs): self.grofilename = grofilename self.filename = self.grofilename if convert_units is None: convert_units = MDAnalysis.core.flags["convert_lengths"] self.convert_units = convert_units # convert length and time to base units coords_list = [] velocities_list = [] with util.openany(grofilename, "r") as grofile: # Read first two lines to get number of atoms grofile.readline() total_atnums = int(grofile.readline()) # and the third line to get the spacing between coords (cs) (dependent upon the GRO file precision) cs = grofile.readline()[25:].find(".") + 1 grofile.seek(0) for linenum, line in enumerate(grofile): # Should work with any precision if linenum not in (0, 1, total_atnums + 2): coords_list.append( numpy.array( ( float(line[20 : 20 + cs]), float(line[20 + cs : 20 + (cs * 2)]), float(line[20 + (cs * 2) : 20 + (cs * 3)]), ) ) ) if line[20:].count(".") > 3: # if there are enough decimals to indicate the presence of velocities velocities_list.append( numpy.array( ( float(line[20 + (cs * 3) : 20 + (cs * 4)]), float(line[20 + (cs * 4) : 20 + (cs * 5)]), float(line[20 + (cs * 5) : 20 + (cs * 6)]), ) ) ) # Unit cell footer elif linenum == total_atnums + 2: unitcell = numpy.array(map(float, line.split())) self.numatoms = len(coords_list) coords_list = numpy.array(coords_list) self.ts = self._Timestep(coords_list) self.ts.frame = 1 # 1-based frame number if velocities_list: # perform this operation only if velocities are present in coord file # TODO: use a Timestep that knows about velocities such as TRR.Timestep or better, TRJ.Timestep self.ts._velocities = numpy.array(velocities_list, dtype=numpy.float32) self.convert_velocities_from_native(self.ts._velocities) # converts nm/ps to A/ps units # ts._unitcell layout is format dependent; Timestep.dimensions does the conversion # behind the scene self.ts._unitcell = numpy.zeros(9, dtype=numpy.float32) # GRO has 9 entries if len(unitcell) == 3: # special case: a b c --> (a 0 0) (b 0 0) (c 0 0) # see Timestep.dimensions() above for format (!) self.ts._unitcell[:3] = unitcell elif len(unitcell) == 9: self.ts._unitcell[:] = unitcell # fill all else: # or maybe raise an error for wrong format?? import warnings warnings.warn("GRO unitcell has neither 3 nor 9 entries --- might be wrong.") self.ts._unitcell[: len(unitcell)] = unitcell # fill linearly ... not sure about this if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell) # in-place ! (all are lengths) self.numframes = 1 self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1
def parse(self): """Parse Amber PRMTOP topology file *filename*. :Returns: MDAnalysis internal *structure* dict. """ formatversion = 10 with openany(self.filename) as topfile: for line in topfile: if line.startswith("%FLAG ATOMIC_NUMBER"): formatversion = 12 break if formatversion == 12: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("ATOMIC_NUMBER", 1, 10, self._parsesectionint, "_skip", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe"), #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors"), elif formatversion == 10: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe")] #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors")] # Open and check top validity # Reading header info POINTERS with openany(self.filename) as topfile: next_line = topfile.next header = next_line() if header[:3] != "%VE": raise ValueError("{} is not a valid TOP file. %VE Missing in header".format(topfile)) title = next_line().split() if not (title[1] == "TITLE"): raise ValueError("{} is not a valid TOP file. 'TITLE' missing in header".format(topfile)) while header[:14] != '%FLAG POINTERS': header = next_line() header = next_line() topremarks = [next_line().strip() for i in xrange(4)] sys_info = [int(k) for i in topremarks for k in i.split()] structure = {} final_structure = {} try: for info in sections: self._parse_sec(sys_info, info, next_line, structure, final_structure) except StopIteration: raise ValueError("The TOP file didn't contain the minimum" " required section of ATOM_NAME") # Completing info respoint to include all atoms in last resid structure["_respoint"].append(sys_info[0]) structure["_respoint"][-1] = structure["_respoint"][-1] + 1 atoms = [None, ]*sys_info[0] j = 0 segid = "SYSTEM" for i in range(sys_info[0]): charge = convert(structure["_charge"][i], 'Amber', flags['charge_unit']) if structure["_respoint"][j] <= i+1 < structure["_respoint"][j+1]: resid = j + 1 resname = structure["_resname"][j] else: j += 1 resid = j + 1 resname = structure["_resname"][j] mass = structure["_mass"][i] atomtype = structure["_atom_type"][i] atomname = structure["_name"][i] #segid = 'SYSTEM' # does not exist in Amber atoms[i] = Atom(i, atomname, atomtype, resname, resid, segid, mass, charge) final_structure["_atoms"] = atoms final_structure["_numatoms"] = sys_info[0] return final_structure
def parse(self): """Parses a LAMMPS_ DATA file. The parser implements the `LAMMPS DATA file format`_ but only for the LAMMPS `atom_style`_ *full* (numeric ids 7, 10) and *molecular* (6, 9). :Returns: MDAnalysis internal *structure* dict. .. versionadded:: 0.9.0 .. _LAMMPS DATA file format: :http://lammps.sandia.gov/doc/2001/data_format.html .. _`atom_style`: http://lammps.sandia.gov/doc/atom_style.html """ # Can pass atom_style to help parsing atom_style = self.kwargs.get('atom_style', None) # Used this to do data format: # http://lammps.sandia.gov/doc/2001/data_format.html with openany(self.filename, 'r') as psffile: # Check format of file somehow structure = {} try: nitems, ntypes, box = self._parse_header(psffile) except: raise IOError("Failed to read DATA header") strkey = { 'Bonds': '_bonds', 'Angles': '_angles', 'Dihedrals': '_dihe', 'Impropers': '_impr'} nentries = { '_bonds': 2, '_angles': 3, '_dihe': 4, '_impr': 4} # Masses can appear after Atoms section. # If this happens, this blank dict will be used and all atoms # will have zero mass, can fix this later masses = {} read_masses = False read_coords = False # Now go through section by section while True: try: section = psffile.next().strip().split()[0] except IndexError: section = '' # blank lines don't split except StopIteration: break logger.info("Parsing section '{}'".format(section)) if section == 'Atoms': fix_masses = False if read_masses else True structure['_atoms'] = self._parse_atoms(psffile, nitems['_atoms'], masses, atom_style) read_coords = True elif section == 'Masses': read_masses = True try: masses = self._parse_masses(psffile, ntypes['_atoms']) except: raise IOError("Failed to read masses section") elif section in strkey: # for sections we use in MDAnalysis logger.debug("Doing strkey section for {}".format(section)) f = strkey[section] try: structure[f] = self._parse_section(psffile, nitems[f], nentries[f]) except: raise IOError("Failed to read section {}".format(section)) elif len(section) > 0: # for sections we don't use in MDAnalysis logger.debug("Skipping section, found: {}".format(section)) self._skip_section(psffile) else: # for blank lines continue if not read_coords: raise IOError("Failed to find coordinate data") if fix_masses: for a in structure['_atoms']: try: a.mass = masses[a.type] except KeyError: a.mass = 0.0 # a.mass = guess_atom_mass(a.name) return structure
def parse(self, filename=None): """Parse MOL2 file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.PSFParser.PSFParser`. """ if not filename: filename = self.filename blocks = [] with openany(filename) as f: for i, line in enumerate(f): # found new molecules if "@<TRIPOS>MOLECULE" in line: if len(blocks): break blocks.append({"start_line": i, "lines": []}) blocks[-1]["lines"].append(line) if not len(blocks): raise ValueError("The mol2 file '{}' needs to have at least one" " @<TRIPOS>MOLECULE block".format(filename)) block = blocks[0] sections = {} cursor = None for line in block["lines"]: if "@<TRIPOS>" in line: cursor = line.split("@<TRIPOS>")[1].strip().lower() sections[cursor] = [] continue elif line.startswith("#") or line == "\n": continue sections[cursor].append(line) atom_lines, bond_lines = sections["atom"], sections["bond"] if not len(atom_lines): raise ValueError("The mol2 block ({}:{}) has no atoms".format( os.path.basename(filename), block["start_line"])) if not len(bond_lines): raise ValueError("The mol2 block ({}:{}) has no bonds".format( os.path.basename(filename), block["start_line"])) atoms = [] for a in atom_lines: aid, name, x, y, z, atom_type, resid, resname, charge = a.split() aid = int(aid) - 1 #x, y, z = float(x), float(y), float(z) resid = int(resid) charge = float(charge) element = guess_atom_type(name) mass = guess_atom_mass(element) # atom type is sybl atom type atoms.append( Atom(aid, name, atom_type, resname, resid, "X", mass, charge)) #guess_atom_type(a.split()[1] bonds = [] bondorder = {} for b in bond_lines: # bond_type can be: 1, 2, am, ar bid, a0, a1, bond_type = b.split() a0, a1 = int(a0) - 1, int(a1) - 1 bond = tuple(sorted([a0, a1])) bondorder[bond] = bond_type bonds.append(bond) structure = {"_atoms": atoms, "_bonds": bonds, "_bondorder": bondorder} return structure
def __init__(self, filename=None): self.names = {} self.headers = {} self.sections = {} if filename is None: self.title = "LAMMPS data file" else: # Open and check validity with openany(filename, "r") as file: file_iter = file.xreadlines() self.title = file_iter.next() # Parse headers headers = self.headers for l in file_iter: line = l.strip() if len(line) == 0: continue found = False for keyword in self.header_keywords: if line.find(keyword) >= 0: found = True values = line.split() if keyword in ("xlo xhi", "ylo yhi", "zlo zhi"): headers[keyword] = (float(values[0]), float(values[1])) else: headers[keyword] = int(values[0]) if found is False: break # Parse sections # XXX This is a crappy way to do it with openany(filename, "r") as file: file_iter = file.xreadlines() # Create coordinate array positions = numpy.zeros((headers["atoms"], 3), numpy.float64) sections = self.sections for l in file_iter: line = l.strip() if len(line) == 0: continue if line in self.coeff: h, numcoeff = self.coeff[line] # skip line file_iter.next() data = [] for i in xrange(headers[h]): fields = file_iter.next().strip().split() data.append(tuple(map(conv_float, fields[1:]))) sections[line] = data elif line in self.connections: h, numfields = self.connections[line] # skip line file_iter.next() data = [] for i in range(headers[h]): fields = file_iter.next().strip().split() data.append(tuple(map(int, fields[1:]))) sections[line] = data elif line == "Atoms": file_iter.next() data = [] for i in xrange(headers["atoms"]): fields = file_iter.next().strip().split() index = int(fields[0]) - 1 a = LAMMPSAtom( index=index, name=fields[2], type=int(fields[2]), chain_id=int(fields[1]), charge=float(fields[3]), ) a._positions = positions data.append(a) positions[index] = numpy.array([float(fields[4]), float(fields[5]), float(fields[6])]) sections[line] = data elif line == "Masses": file_iter.next() data = [] for i in xrange(headers["atom type"]): fields = file_iter.next().strip().split() print "help" self.positions = positions
def __init__(self, filename, convert_units=None, **kwargs): """Read coordinates from *filename*. *filename* can be a gzipped or bzip2ed compressed PDBQT file. """ self.filename = filename if convert_units is None: convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units # Ugly inner method: moved outside of for-loop below def _c(start, stop, typeclass=float): return self._col(line, start, stop, typeclass=typeclass) coords = [] atoms = [] unitcell = numpy.zeros(6, dtype=numpy.float32) with util.openany(filename, 'r') as pdbfile: for line in pdbfile: if line[: 4] == 'END\n': # Should only break at the 'END' of a model definition not # and prevent premature exit for a torsion termination, eg, ENDBRANCH break if line[:6] == 'CRYST1': A, B, C = _c(7, 15), _c(16, 24), _c(25, 33) alpha, beta, gamma = _c(34, 40), _c(41, 47), _c(48, 54) unitcell[:] = A, B, C, alpha, beta, gamma if line[:6] in ('ATOM ', 'HETATM'): # directly use COLUMNS from PDB/PDBQT spec serial = _c(7, 11, int) name = _c(13, 16, str).strip() resName = _c(18, 21, str).strip() chainID = _c(22, 22, str) # empty chainID is a single space ' '! resSeq = _c(23, 26, int) x, y, z = _c(31, 38), _c(39, 46), _c(47, 54) occupancy = _c(55, 60) tempFactor = _c(61, 66) partialCharge = _c(67, 76, str).strip() # PDBQT partial charge atomtype = _c(77, 80, str).strip() # PDBQT atom type coords.append((x, y, z)) atoms.append( (serial, name, resName, chainID, resSeq, occupancy, tempFactor, partialCharge, atomtype)) self.numatoms = len(coords) self.ts = self._Timestep(numpy.array(coords, dtype=numpy.float32)) self.ts._unitcell[:] = unitcell self.ts.frame = 1 # 1-based frame number if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native( self.ts._unitcell[:3]) # in-place ! (only lengths) self.numframes = 1 self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1 # hack for PDBQTParser: self._atoms = numpy.rec.fromrecords( atoms, names="serial,name,resName,chainID,resSeq,occupancy,tempFactor," "partialCharge,type")
def __init__(self, grofilename, convert_units=None, **kwargs): self.grofilename = grofilename self.filename = self.grofilename if convert_units is None: convert_units = MDAnalysis.core.flags['convert_lengths'] self.convert_units = convert_units # convert length and time to base units coords_list = [] velocities_list = [] with util.openany(grofilename, 'r') as grofile: # Read first two lines to get number of atoms grofile.readline() total_atnums = int(grofile.readline()) # and the third line to get the spacing between coords (cs) (dependent upon the GRO file precision) cs = grofile.readline()[25:].find('.') + 1 grofile.seek(0) for linenum, line in enumerate(grofile): # Should work with any precision if linenum not in (0, 1, total_atnums + 2): coords_list.append( numpy.array(( float(line[20:20 + cs]), float(line[20 + cs:20 + (cs * 2)]), float(line[20 + (cs * 2):20 + (cs * 3)])))) if line[20:].count('.') > 3: # if there are enough decimals to indicate the presence of velocities velocities_list.append( numpy.array(( float(line[20 + (cs * 3):20 + (cs * 4)]), float(line[20 + (cs * 4):20 + (cs * 5)]), float(line[20 + (cs * 5):20 + (cs * 6)])))) # Unit cell footer elif linenum == total_atnums + 2: unitcell = numpy.array(map(float, line.split())) self.numatoms = len(coords_list) coords_list = numpy.array(coords_list) self.ts = self._Timestep(coords_list) self.ts.frame = 1 # 1-based frame number if velocities_list: # perform this operation only if velocities are present in coord file # TODO: use a Timestep that knows about velocities such as TRR.Timestep or better, TRJ.Timestep self.ts._velocities = numpy.array(velocities_list, dtype=numpy.float32) self.convert_velocities_from_native(self.ts._velocities) # converts nm/ps to A/ps units # ts._unitcell layout is format dependent; Timestep.dimensions does the conversion # behind the scene self.ts._unitcell = numpy.zeros(9, dtype=numpy.float32) # GRO has 9 entries if len(unitcell) == 3: # special case: a b c --> (a 0 0) (b 0 0) (c 0 0) # see Timestep.dimensions() above for format (!) self.ts._unitcell[:3] = unitcell elif len(unitcell) == 9: self.ts._unitcell[:] = unitcell # fill all else: # or maybe raise an error for wrong format?? import warnings warnings.warn("GRO unitcell has neither 3 nor 9 entries --- might be wrong.") self.ts._unitcell[:len(unitcell)] = unitcell # fill linearly ... not sure about this if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! self.convert_pos_from_native(self.ts._unitcell) # in-place ! (all are lengths) self.numframes = 1 self.fixed = 0 self.skip = 1 self.periodic = False self.delta = 0 self.skip_timestep = 1
def write(self, selection, frame=None): """Write selection at current trajectory frame to file. :Arguments: selection MDAnalysis AtomGroup (selection or Universe.atoms) or also Universe :Keywords: frame optionally move to frame number *frame* The GRO format only allows 5 digits for resid and atom number. If these number become larger than 99,999 then this routine will chop off the leading digits. .. versionchanged:: 0.7.6 resName and atomName are truncated to a maximum of 5 characters """ # write() method that complies with the Trajectory API u = selection.universe if frame is not None: u.trajectory[frame] # advance to frame else: try: frame = u.trajectory.ts.frame except AttributeError: frame = 1 # should catch cases when we are analyzing a single GRO (?) atoms = selection.atoms # make sure to use atoms (Issue 46) coordinates = atoms.coordinates() # can write from selection == Universe (Issue 49) if self.convert_units: # Convert back to nm from Angstroms, inplace because coordinates is already a copy self.convert_pos_to_native(coordinates) # check if any coordinates are illegal (checks the coordinates in native nm!) if not self.has_valid_coordinates(self.gro_coor_limits, coordinates): raise ValueError("GRO files must have coordinate values between %.3f and %.3f nm:" "No file was written." % (self.gro_coor_limits["min"], self.gro_coor_limits["max"])) with util.openany(self.filename, 'w') as output_gro: # Header output_gro.write('Written by MDAnalysis\n') output_gro.write(self.fmt['numatoms'] % len(atoms)) # Atom descriptions and coords for atom_index, atom in enumerate(atoms): c = coordinates[atom_index] output_line = self.fmt['xyz'] % ( str(atom.resid)[-5:], # truncate highest digits on overflow atom.resname.strip()[:5], atom.name.strip()[:5], str(atom.number + 1)[-5:], # number (1-based), truncate highest digits on overflow c[0], c[1], c[2], # coords - outputted with 3 d.p. ) output_gro.write(output_line) # Footer: box dimensions box = self.convert_dimensions_to_unitcell(u.trajectory.ts) if numpy.all(u.trajectory.ts.dimensions[3:] == [90., 90., 90.]): # orthorhombic cell, only lengths along axes needed in gro output_gro.write(self.fmt['box_orthorhombic'] % (box[0, 0], box[1, 1], box[2, 2])) else: # full output output_gro.write(self.fmt['box_triclinic'] % (box[0, 0], box[1, 1], box[2, 2], box[0, 1], box[0, 2], box[1, 0], box[1, 2], box[2, 0], box[2, 1]))
def parse(self): """Parses a LAMMPS_ DATA file. The parser implements the `LAMMPS DATA file format`_ but only for the LAMMPS `atom_style`_ *full* (numeric ids 7, 10) and *molecular* (6, 9). :Returns: MDAnalysis internal *structure* dict. .. versionadded:: 0.9.0 .. _LAMMPS DATA file format: :http://lammps.sandia.gov/doc/2001/data_format.html .. _`atom_style`: http://lammps.sandia.gov/doc/atom_style.html """ # Can pass atom_style to help parsing atom_style = self.kwargs.get("atom_style", None) # Used this to do data format: # http://lammps.sandia.gov/doc/2001/data_format.html with openany(self.filename, "r") as psffile: # Check format of file somehow structure = {} try: nitems, ntypes, box = self._parse_header(psffile) except: raise IOError("Failed to read DATA header") strkey = {"Bonds": "_bonds", "Angles": "_angles", "Dihedrals": "_dihe", "Impropers": "_impr"} nentries = {"_bonds": 2, "_angles": 3, "_dihe": 4, "_impr": 4} # Masses can appear after Atoms section. # If this happens, this blank dict will be used and all atoms # will have zero mass, can fix this later masses = {} read_masses = False read_coords = False # Now go through section by section while True: try: section = psffile.next().strip().split()[0] except IndexError: section = "" # blank lines don't split except StopIteration: break logger.info("Parsing section '{}'".format(section)) if section == "Atoms": fix_masses = False if read_masses else True structure["_atoms"] = self._parse_atoms(psffile, nitems["_atoms"], masses, atom_style) read_coords = True elif section == "Masses": read_masses = True try: masses = self._parse_masses(psffile, ntypes["_atoms"]) except: raise IOError("Failed to read masses section") elif section in strkey: # for sections we use in MDAnalysis logger.debug("Doing strkey section for {}".format(section)) f = strkey[section] try: structure[f] = self._parse_section(psffile, nitems[f], nentries[f]) except: raise IOError("Failed to read section {}".format(section)) elif len(section) > 0: # for sections we don't use in MDAnalysis logger.debug("Skipping section, found: {}".format(section)) self._skip_section(psffile) else: # for blank lines continue if not read_coords: raise IOError("Failed to find coordinate data") if fix_masses: for a in structure["_atoms"]: try: a.mass = masses[a.type] except KeyError: a.mass = 0.0 # a.mass = guess_atom_mass(a.name) return structure
def __init__(self, filename=None): self.names = {} self.headers = {} self.sections = {} if filename is None: self.title = "LAMMPS data file" else: # Open and check validity with openany(filename, 'r') as file: file_iter = file.xreadlines() self.title = file_iter.next() # Parse headers headers = self.headers for l in file_iter: line = l.strip() if len(line) == 0: continue found = False for keyword in self.header_keywords: if line.find(keyword) >= 0: found = True values = line.split() if keyword in ("xlo xhi", "ylo yhi", "zlo zhi"): headers[keyword] = (float(values[0]), float(values[1])) else: headers[keyword] = int(values[0]) if found is False: break # Parse sections # XXX This is a crappy way to do it with openany(filename, 'r') as file: file_iter = file.xreadlines() # Create coordinate array positions = numpy.zeros((headers['atoms'], 3), numpy.float64) sections = self.sections for l in file_iter: line = l.strip() if len(line) == 0: continue if line in self.coeff: h, numcoeff = self.coeff[line] # skip line file_iter.next() data = [] for i in xrange(headers[h]): fields = file_iter.next().strip().split() data.append(tuple(map(conv_float, fields[1:]))) sections[line] = data elif line in self.connections: h, numfields = self.connections[line] # skip line file_iter.next() data = [] for i in range(headers[h]): fields = file_iter.next().strip().split() data.append(tuple(map(int, fields[1:]))) sections[line] = data elif line == "Atoms": file_iter.next() data = [] for i in xrange(headers["atoms"]): fields = file_iter.next().strip().split() index = int(fields[0]) - 1 a = LAMMPSAtom(index=index, name=fields[2], type=int(fields[2]), chain_id=int(fields[1]), charge=float(fields[3])) a._positions = positions data.append(a) positions[index] = numpy.array([float(fields[4]), float(fields[5]), float(fields[6])]) sections[line] = data elif line == "Masses": file_iter.next() data = [] for i in xrange(headers["atom type"]): fields = file_iter.next().strip().split() print "help" self.positions = positions
def parse(self, filename=None): """Parse MOL2 file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.PSFParser.PSFParser`. """ if not filename: filename = self.filename blocks = [] with openany(filename) as f: for i, line in enumerate(f): # found new molecules if "@<TRIPOS>MOLECULE" in line: if len(blocks): break blocks.append({"start_line": i, "lines": []}) blocks[-1]["lines"].append(line) if not len(blocks): raise ValueError("The mol2 file '{}' needs to have at least one" " @<TRIPOS>MOLECULE block".format(filename)) block = blocks[0] sections = {} cursor = None for line in block["lines"]: if "@<TRIPOS>" in line: cursor = line.split("@<TRIPOS>")[1].strip().lower() sections[cursor] = [] continue elif line.startswith("#") or line == "\n": continue sections[cursor].append(line) atom_lines, bond_lines = sections["atom"], sections["bond"] if not len(atom_lines): raise ValueError("The mol2 block ({}:{}) has no atoms".format( os.path.basename(filename), block["start_line"])) if not len(bond_lines): raise ValueError("The mol2 block ({}:{}) has no bonds".format( os.path.basename(filename), block["start_line"])) atoms = [] for a in atom_lines: aid, name, x, y, z, atom_type, resid, resname, charge = a.split() aid = int(aid) - 1 #x, y, z = float(x), float(y), float(z) resid = int(resid) charge = float(charge) element = guess_atom_type(name) mass = guess_atom_mass(element) # atom type is sybl atom type atoms.append(Atom(aid, name, atom_type, resname, resid, "X", mass, charge)) #guess_atom_type(a.split()[1] bonds = [] bondorder = {} for b in bond_lines: # bond_type can be: 1, 2, am, ar bid, a0, a1, bond_type = b.split() a0, a1 = int(a0) - 1, int(a1) - 1 bond = tuple(sorted([a0, a1])) bondorder[bond] = bond_type bonds.append(bond) structure = {"_atoms": atoms, "_bonds": bonds, "_bondorder": bondorder} return structure