def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a gro file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices) if len(coordinates) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a lammpstrj file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. See Also -------- read : Returns the raw data from the file Notes ----- If coordinates are specified in more than one style, the first complete trio of x/y/z coordinates will be read in according to the following order: 1) x,y,z (unscaled coordinates) 2) xs,ys,zs (scaled atom coordinates) 3) xu,yu,zu (unwrapped atom coordinates) 4) xsu,ysu,zsu (scaled unwrapped atom coordinates) E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used. if the file contains x, y, xs, ys, zs then xs, ys, zs will be used. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a mdcrd file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, cell_lengths = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is None: cell_angles = None else: # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_xml(filename, top=None): """Load a single conformation from an OpenMM XML file. The OpenMM serialized state XML format contains additional information that is not read by this method, including forces, energies, and velocities. Here, we just read the positions and the box vectors. Parameters ---------- filename : string The path on disk to the XML file top : {str, Trajectory, Topology} The XML format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ import xml.etree.cElementTree as etree from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) tree = etree.parse(filename) # get all of the positions from the XML into a list of tuples # then convert to a numpy array positions = [] for position in tree.getroot().find('Positions'): positions.append((float(position.attrib['x']), float(position.attrib['y']), float(position.attrib['z']))) box = [] vectors = tree.getroot().find('PeriodicBoxVectors') for name in ['A', 'B', 'C']: box.append((float(vectors.find(name).attrib['x']), float(vectors.find(name).attrib['y']), float(vectors.find(name).attrib['z']))) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = np.array(box).reshape(1,3,3) return traj
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a gro file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) coordinates, time, unitcell_vectors = self.read( stride=stride, atom_indices=atom_indices) if len(coordinates) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read( n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read( stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from the HDF5 file Parameters ---------- n_frames : {int, None} The number of frames to read. If not supplied, all of the remaining frames will be read. stride : {int, None} By default all of the frames will be read, but you can pass this flag to read a subset of of the data by grabbing only every `stride`-th frame from disk. atom_indices : {int, None} By default all of the atom will be read, but you can pass this flag to read only a subsets of the atoms for the `coordinates` and `velocities` fields. Note that you will have to carefully manage the indices and the offsets, since the `i`-th atom in the topology will not necessarily correspond to the `i`-th atom in your subset. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ _check_mode(self.mode, ('r',)) from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) data = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles)
def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with AmberRestartFile(filename) as f: xyz, time, cell_lengths, cell_angles = f.read( atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a XYZ file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial return Trajectory(xyz=xyz, topology=topology, time=time)
def create_water_topology_on_disc(n): topfile = tempfile.mktemp('.pdb') top = Topology() chain = top.add_chain() for i in range(n): res = top.add_residue('r%i' % i, chain) h1 = top.add_atom('H', hydrogen, res) o = top.add_atom('O', oxygen, res) h2 = top.add_atom('H', hydrogen, res) top.add_bond(h1, o) top.add_bond(h2, o) xyz = np.zeros((n * 3, 3)) Trajectory(xyz, top).save_pdb(topfile) return topfile
def frames_from_files(files, top, frames, chunksize=1000, stride=1, verbose=False, copy_not_join=None): from pyemma.coordinates import source # Enforce topology to be a md.Topology object top = _enforce_top(top) reader = source(files, top=top) stride = int(stride) if stride != 1: frames[:, 1] *= int(stride) if verbose: log.info('A stride value of = %u was parsed, ' 'interpreting "indexes" accordingly.' % stride) # sort by file and frame index sort_inds = np.lexsort((frames[:, 1], frames[:, 0])) sorted_inds = frames[sort_inds] assert len(sorted_inds) == len(frames) for u in np.unique(sorted_inds[:, 0]): largest_ind_in_traj = np.max(sorted_inds[sorted_inds == u]) if reader.trajectory_length(u) < largest_ind_in_traj: raise ValueError( "largest specified index (%i * stride=%i * %i=%i) " "is larger than trajectory length '%s' = %i" ( largest_ind_in_traj / stride, largest_ind_in_traj / stride, stride, largest_ind_in_traj, reader.filenames[u], reader.trajectory_length(u))) collected_frames = [] with reader.iterator(chunk=chunksize, stride=sorted_inds, return_trajindex=False) as it: for x in it: collected_frames.append(x) collected_frames = np.vstack(collected_frames) collected_frames = collected_frames[sort_inds.argsort()] collected_frames = collected_frames.reshape(-1, top.n_atoms, 3) return Trajectory(collected_frames, top)
def to_mdtraj_Trajectory(item, atom_indices='all', coordinates=None, box=None, check=True): if check: digest_item(item, 'mdtraj.Topology') atom_indices = digest_atom_indices(atom_indices) coordinates = digest_coordinates(coordinates) box = digest_box(box) from mdtraj.core.trajectory import Trajectory from . import extract tmp_item = extract(item, atom_indices=atom_indices, check=False) tmp_item = Trajectory(coordinates, item) return tmp_item
def read_as_traj(self, topology, atom_indices=None): """Read an AMBER ASCII restart file as a trajectory. Parameters ---------- topology : Topology The system topology atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object with 1 frame created from the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) xyz, time, cell_lengths, cell_angles = self.read( atom_indices=atom_indices) xyz = in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles)
def load_mol2(filename): """Load a TRIPOS mol2 file from disk. Parameters ---------- filename : str Path to the prmtop file on disk. Returns ------- traj : md.Trajectory The resulting topology, as an md.Topology object. Notes ----- This function should work on GAFF and sybyl style MOL2 files, but has been primarily tested on GAFF mol2 files. This function does NOT accept multi-structure MOL2 files!!! The elements are guessed using GAFF atom types or via the atype string. Examples -------- >>> traj = md.load_mol2('mysystem.mol2') """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide atoms, bonds = mol2_to_dataframes(filename) atoms_mdtraj = atoms[["name", "resName"]].copy() atoms_mdtraj["serial"] = atoms.index #Figure out 1 letter element names # IF this is a GAFF mol2, this line should work without issues atoms_mdtraj["element"] = atoms.atype.map(gaff_elements) # If this is a sybyl mol2, there should be NAN (null) values if atoms_mdtraj.element.isnull().any(): # If this is a sybyl mol2, I think this works generally. atoms_mdtraj["element"] = atoms.atype.apply(lambda x: x.strip(".")[0]) atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int') atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int') bond_type_map = { '1': Single, '2': Double, '3': Triple, 'am': Amide, 'ar': Aromatic } if bonds is not None: bonds_mdtraj = bonds[["id0", "id1"]].values offset = bonds_mdtraj.min() # Should this just be 1??? bonds_mdtraj -= offset # Create the bond augment information n_bonds = bonds_mdtraj.shape[0] bond_augment = np.zeros([n_bonds, 2], dtype=float) # Add bond type information bond_augment[:, 0] = [float(bond_type_map[bond_value]) for bond_value in bonds["bond_type"].values] # Add Bond "order" information, this is not known from Mol2 files bond_augment[:, 1] = [0.0 for _ in range(n_bonds)] # Augment array, dtype is cast to minimal representation of float bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1) else: bonds_mdtraj = None top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj) xyzlist = np.array([atoms[["x", "y", "z"]].values]) xyzlist /= 10.0 # Convert from angstrom to nanometer traj = Trajectory(xyzlist, top) return traj
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None, frame=None, unit_set='real'): """Load a LAMMPS trajectory file. Parameters ---------- filename : str String filename of LAMMPS trajectory file. top : {str, Trajectory, Topology} The lammpstrj format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.LAMMPSTrajectoryFile : Low level interface to lammpstrj files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_lammpstrj') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lammpstrj. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with LAMMPSTrajectoryFile(filename) as f: # TODO: Support other unit sets. if unit_set == 'real': f.distance_unit == 'angstroms' else: raise ValueError( 'Unsupported unit set specified: {0}.'.format(unit_set)) if frame is not None: f.seek(frame) xyz, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_gsd(filename, top=None, start=None, n_frames=None, stride=None, atom_indices=None, frame=None): """Load a GSD trajectory file. Parameters ----------- filename : path-like Path of GSD trajectory file. top : {path-like, Trajectory, Topology}, None A pdb file, a trajectory, or a topology to supply topology information If None, topology information will be parsed from the GSD file start : int, None First frame to convert n_frames : int, None Number of frames after `start` to convert stride : int Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ from mdtraj.core.trajectory import Trajectory, _parse_topology import gsd.hoomd if not isinstance(filename, (string_types, os.PathLike)): raise TypeError('filename must be of type path-like for load_gsd. ' 'you supplied %s'.format(type(filename))) if top is not None: topology = _parse_topology(top) else: topology = load_gsd_topology(filename) atom_indices = cast_indices(atom_indices) with gsd.hoomd.open(filename, 'rb') as f: if frame is not None: xyz, vectors, time = read_snapshot(frame, f[frame], topology, atom_indices=atom_indices) t = Trajectory(xyz=np.array(xyz), topology=topology, time=np.array([time])) t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3)) return t else: return hoomdtraj_to_traj(f, topology, start=start, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.core.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def load_hoomdxml(filename, top=None): """Load a single conformation from an HOOMD-Blue XML file. For more information on this file format, see: http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html Notably, all node names and attributes are in all lower case. HOOMD-Blue does not contain residue and chain information explicitly. For this reason, chains will be found by looping over all the bonds and finding what is bonded to what. Each chain consisists of exactly one residue. Parameters ---------- filename : string The path on disk to the XML file top : None This argumet is ignored Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object, with corresponding Topology. Notes ----- This function requires the NetworkX python package. """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology topology = Topology() tree = cElementTree.parse(filename) config = tree.getroot().find('configuration') position = config.find('position') bond = config.find('bond') atom_type = config.find('type') # MDTraj calls this "name" box = config.find('box') box.attrib = dict((key.lower(), val) for key, val in box.attrib.items()) # be generous for case of box attributes lx = float(box.attrib['lx']) ly = float(box.attrib['ly']) lz = float(box.attrib['lz']) try: xy = float(box.attrib['xy']) xz = float(box.attrib['xz']) yz = float(box.attrib['yz']) except (ValueError, KeyError): xy = 0.0 xz = 0.0 yz = 0.0 unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz], [0.0, 0.0, lz]]]) positions, types = [], {} for pos in position.text.splitlines()[1:]: positions.append((float(pos.split()[0]), float(pos.split()[1]), float(pos.split()[2]))) for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]): types[idx] = str(atom_name.split()[0]) if len(types) != len(positions): raise ValueError('Different number of types and positions in xml file') # ignore the bond type if hasattr(bond, 'text'): bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]] chains = _find_chains(bonds) else: chains = [] bonds = [] # Relate the first index in the bonded-group to mdtraj.Residue bonded_to_residue = {} for i, _ in enumerate(types): bonded_group = _in_chain(chains, i) if bonded_group is not None: if bonded_group[0] not in bonded_to_residue: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) bonded_to_residue[bonded_group[0]] = t_residue topology.add_atom(types[i], virtual_site, bonded_to_residue[bonded_group[0]]) if bonded_group is None: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) topology.add_atom(types[i], virtual_site, t_residue) for bond in bonds: atom1, atom2 = bond[0], bond[1] topology.add_bond(topology.atom(atom1), topology.atom(atom2)) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = unitcell_vectors return traj
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a lammpstrj file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. See Also -------- read : Returns the raw data from the file Notes ----- If coordinates are specified in more than one style, the first complete trio of x/y/z coordinates will be read in according to the following order: 1) x,y,z (unscaled coordinates) 2) xs,ys,zs (scaled atom coordinates) 3) xu,yu,zu (unwrapped atom coordinates) 4) xsu,ysu,zsu (scaled unwrapped atom coordinates) E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used. if the file contains x, y, xs, ys, zs then xs, ys, zs will be used. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride * np.arange(len(xyz))) + initial t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None, frame=None, unit_set='real'): """Load a LAMMPS trajectory file. Parameters ---------- filename : str String filename of LAMMPS trajectory file. top : {str, Trajectory, Topology} The lammpstrj format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.LAMMPSTrajectoryFile : Low level interface to lammpstrj files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_lammpstrj') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lammpstrj. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with LAMMPSTrajectoryFile(filename) as f: # TODO: Support other unit sets. if unit_set == 'real': f.distance_unit == 'angstroms' else: raise ValueError('Unsupported unit set specified: {0}.'.format(unit_set)) if frame is not None: f.seek(frame) xyz, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_mol2(filename): """Load a TRIPOS mol2 file from disk. Parameters ---------- filename : path-like Path to the prmtop file on disk. Returns ------- traj : md.Trajectory The resulting topology, as an md.Topology object. Notes ----- This function should work on GAFF and sybyl style MOL2 files, but has been primarily tested on GAFF mol2 files. This function does NOT accept multi-structure MOL2 files!!! The elements are guessed using GAFF atom types or via the atype string. Examples -------- >>> traj = md.load_mol2('mysystem.mol2') """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide atoms, bonds = mol2_to_dataframes(filename) atoms_mdtraj = atoms[["name", "resName"]].copy() atoms_mdtraj["serial"] = atoms.index #Figure out 1 letter element names # IF this is a GAFF mol2, this line should work without issues atoms_mdtraj["element"] = atoms.atype.map(gaff_elements) # If this is a sybyl mol2, there should be NAN (null) values if atoms_mdtraj.element.isnull().any(): # If this is a sybyl mol2, I think this works generally. # Argument x is being passed as a list with only one element. def to_element(x): if isinstance(x, (list, tuple)): assert len(x) == 1 x = x[0] if '.' in x: # orbital-hybridizations in SYBL return x.split('.')[0] try: # check if we can convert the whole str to an Element, # if not, we only pass the first letter. from mdtraj.core.element import Element Element.getBySymbol(x) except KeyError: return x[0] return x atoms_mdtraj["element"] = atoms.atype.apply(to_element) # Check if elements inferred from atoms.atype are valid # If not, try to infer elements from atoms.name try: atoms_mdtraj['element'].apply(elem.get_by_symbol) except KeyError: try: atoms_mdtraj["element"] = atoms.name.apply(to_element) atoms_mdtraj['element'].apply(elem.get_by_symbol) except KeyError: raise KeyError('Invalid element passed to atoms DataFrame') atoms_mdtraj['resSeq'] = atoms['code'] atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int') bond_type_map = { '1': Single, '2': Double, '3': Triple, 'am': Amide, 'ar': Aromatic } if bonds is not None: bonds_mdtraj = bonds[["id0", "id1"]].values offset = bonds_mdtraj.min() # Should this just be 1??? bonds_mdtraj -= offset # Create the bond augment information n_bonds = bonds_mdtraj.shape[0] bond_augment = np.zeros([n_bonds, 2], dtype=float) # Add bond type information bond_augment[:, 0] = [ float(bond_type_map[str(bond_value)]) for bond_value in bonds["bond_type"].values ] # Add Bond "order" information, this is not known from Mol2 files bond_augment[:, 1] = [0.0 for _ in range(n_bonds)] # Augment array, dtype is cast to minimal representation of float bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1) else: bonds_mdtraj = None top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj) xyzlist = np.array([atoms[["x", "y", "z"]].values]) xyzlist /= 10.0 # Convert from angstrom to nanometer traj = Trajectory(xyzlist, top) return traj
def next_chunk(self, lag=0): """ gets the next chunk. If lag > 0, we open another iterator with same chunk size and advance it by one, as soon as this method is called with a lag > 0. :return: a feature mapped vector X, or (X, Y) if lag > 0 """ chunk = self.mditer.next() if lag > 0: if self.curr_lag == 0: # lag time changed, so open lagged iterator self.curr_lag = lag self._open_time_lagged() try: self.last_advanced_chunk = self.mditer2.next() except StopIteration: log.debug( "No more data in mditer2 during last_adv_chunk assignment. Padding with zeros" ) lagged_xyz = np.zeros_like(chunk.xyz) self.last_advanced_chunk = Trajectory( lagged_xyz, chunk.topology) try: adv_chunk = self.mditer2.next() except StopIteration: # no more data available in mditer2, so we have to take data from # current chunk and padd it with zeros! log.debug("No more data in mditer2. Padding with zeros." " Data avail: %i" % chunk.xyz.shape[0]) lagged_xyz = np.zeros_like(chunk.xyz) adv_chunk = Trajectory(lagged_xyz, chunk.topology) # build time lagged Trajectory by concatenating # last adv chunk and advance chunk i = lag - (self.chunksize * self.skip_n) padding_length = max( 0, chunk.xyz.shape[0] - (self.last_advanced_chunk.xyz.shape[0] - i) - adv_chunk.xyz.shape[0]) padding = np.zeros( (padding_length, chunk.xyz.shape[1], chunk.xyz.shape[2])) merged = Trajectory( np.concatenate( (self.last_advanced_chunk.xyz, adv_chunk.xyz, padding)), chunk.topology) # assert merged.xyz.shape[0] >= chunk.xyz.shape[0] # skip "lag" number of frames and truncate to chunksize chunk_lagged = merged[i:][:chunk.xyz.shape[0]] # remember last advanced chunk self.last_advanced_chunk = adv_chunk self.t += chunk.xyz.shape[0] if (self.t + lag >= self.trajectory_length(self.curr_itraj) and self.curr_itraj < len(self.trajfiles) - 1): log.debug('closing current trajectory "%s"' % self.trajfiles[self.curr_itraj]) self.mditer.close() self.t = 0 self.curr_itraj += 1 self.mditer = self._create_iter(self.trajfiles[self.curr_itraj]) # we open self.mditer2 only if requested due lag parameter! self.curr_lag = 0 # map data if lag == 0: return self.featurizer.map(chunk) else: X = self.featurizer.map(chunk) Y = self.featurizer.map(chunk_lagged) return X, Y
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz, time, cell_lengths, cell_angles = f.read( n_frames=1, atom_indices=atom_indices) else: xyz, time, cell_lengths, cell_angles = f.read( stride=stride, atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f: if frame is not None: f.seek(frame) xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is not None: in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) if atom_indices is not None: topology = topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) if cell_lengths is not None: t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_hoomdxml(filename, top=None): """Load a single conformation from an HOOMD-Blue XML file. For more information on this file format, see: http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html Notably, all node names and attributes are in all lower case. HOOMD-Blue does not contain residue and chain information explicitly. For this reason, chains will be found by looping over all the bonds and finding what is bonded to what. Each chain consisists of exactly one residue. Parameters ---------- filename : string The path on disk to the XML file Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object, with corresponding Topology. Notes ----- This function requires the NetworkX python package. """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology topology = Topology() tree = cElementTree.parse(filename) config = tree.getroot().find('configuration') position = config.find('position') bond = config.find('bond') atom_type = config.find('type') # MDTraj calls this "name" box = config.find('box') box.attrib = dict((key.lower(), val) for key, val in box.attrib.items()) # be generous for case of box attributes lx = float(box.attrib['lx']) ly = float(box.attrib['ly']) lz = float(box.attrib['lz']) try: xy = float(box.attrib['xy']) xz = float(box.attrib['xz']) yz = float(box.attrib['yz']) except: xy = 0.0 xz = 0.0 yz = 0.0 unitcell_vectors = np.array([[[lx, xy*ly, xz*lz], [0.0, ly, yz*lz], [0.0, 0.0, lz ]]]) positions, types = [], {} for pos in position.text.splitlines()[1:]: positions.append((float(pos.split()[0]), float(pos.split()[1]), float(pos.split()[2]))) for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]): types[idx] = str(atom_name.split()[0]) if len(types) != len(positions): raise ValueError('Different number of types and positions in xml file') # ignore the bond type bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]] chains = _find_chains(bonds) ions = [i for i in range(len(types)) if not _in_chain(chains, i)] # add chains, bonds and ions (each chain = 1 residue) for chain in chains: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) for atom in chain: topology.add_atom(types[atom], 'U', t_residue) for ion in ions: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) topology.add_atom(types[atom], 'U', t_residue) for bond in bonds: atom1, atom2 = bond[0], bond[1] topology.add_bond(topology.atom(atom1), topology.atom(atom2)) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = unitcell_vectors return traj
def load_arc(filename, top=None, stride=None, atom_indices=None): """Load a TINKER .arc file from disk. Parameters ---------- filename : str String filename of TINKER .arc file. top : {str, Trajectory, Topology} The .arc format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.ArcTrajectoryFile : Low level interface to TINKER .arc files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little weird, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message # if top is None: # raise ValueError('"top" argument is required for load_arc') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_arc. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) with ArcTrajectoryFile(filename) as f: xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(abc, f.distance_unit, Trajectory._distance_unit, inplace=True) if top is None: topology = f.topology else: topology = _parse_topology(top) if atom_indices is not None: topology = topology.subset(atom_indices) time = np.arange(len(xyz)) if stride is not None: # if we loaded with a stride, the Trajectories's time field should # respect that time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=abc, unitcell_angles=ang) return t
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None): """Load a xyz trajectory file. While there is no universal standard for this format, this plugin adheres to the same format as the VMD plugin: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html Most notably, units are in angstroms and anything past the 'z' field is ignored. Parameters ---------- filename : str String filename of xyz trajectory file. top : {str, Trajectory, Topology} The xyz format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.XYZTrajectoryFile : Low level interface to xyz files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_xyz') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_xyz. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with XYZTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) return t
def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. If 0, load all. Other Parameters ---------------- top : {str, Trajectory, Topology} Most trajectory formats do not contain topology information. Pass in either the path to a RCSB PDB file, a trajectory, or a topology to supply this information. This option is not required for the .h5, .lh5, and .pdb formats, which already contain topology information. stride : int, default=None Only read every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. See Also -------- load, load_frame Examples -------- >>> import mdtraj as md >>> for chunk in md.iterload('output.xtc', top='topology.pdb') >>> print chunk <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> """ stride = kwargs.get('stride', 1) atom_indices = cast_indices(kwargs.get('atom_indices', None)) if chunk % stride != 0 and filename.endswith('.dcd'): raise ValueError('Stride must be a divisor of chunk. stride=%d does not go ' 'evenly into chunk=%d' % (stride, chunk)) if chunk == 0: yield load(filename, **kwargs) # If chunk was 0 then we want to avoid filetype-specific code in case of undefined behavior in various file parsers. else: skip = kwargs.get('skip', 0) if filename.endswith('.h5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with HDF5TrajectoryFile(filename) as f: if skip > 0: xyz, _, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) while True: data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if data == []: raise StopIteration() in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) yield Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) if filename.endswith('.lh5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with LH5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) ptr = 0 if skip > 0: xyz, _, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() while True: xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time) elif filename.endswith('.xtc'): topology = _parse_topology(kwargs.get('top', None)) with XTCTrajectoryFile(filename) as f: if skip > 0: xyz, _, _, _ = f.read(skip) if len(xyz) == 0: raise StopIteration() while True: xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time) trajectory.unitcell_vectors = box yield trajectory elif filename.endswith('.dcd'): topology = _parse_topology(kwargs.get('top', None)) with DCDTrajectoryFile(filename) as f: ptr = 0 if skip > 0: xyz, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() while True: # for reasons that I have not investigated, dcdtrajectory file chunk and stride # together work like this method, but HDF5/XTC do not. xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length, unitcell_angles=box_angle) else: log.critical("loading complete traj into mem! This might no be desired.") t = load(filename, **kwargs) for i in range(skip, len(t), chunk): yield t[i:i+chunk]