def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with AmberRestartFile(filename) as f: return f.read_as_traj(topology, atom_indices=atom_indices)
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def read(self, n_frames=None, stride=None, atom_indices=None): """Read data from a molecular dynamics trajectory in the GROMACS GRO format. Parameters ---------- n_frames : int, optional If n_frames is not None, the next n_frames of data from the file will be read. Otherwise, all of the frames in the file will be read. stride : int, optional If stride is not None, read only every stride-th frame from disk. atom_indices : np.ndarray, dtype=int, optional The specific indices of the atoms you'd like to retrieve. If not supplied, all of the atoms will be retrieved. Returns ------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms, in units of nanometers. time : np.ndarray, None The time corresponding to each frame, in units of picoseconds, or None if no time information is present in the trajectory. unitcell_vectors : np.ndarray, shape=(n_frames, 3, 3) The box vectors in each frame, in units of nanometers """ if not self._open: raise ValueError('I/O operation on closed file') if not self._mode == 'r': raise ValueError('file not opened for reading') coordinates = [] unitcell_vectors = [] time = [] contains_time = True atom_indices = cast_indices(atom_indices) atom_slice = slice(None) if atom_indices is None else atom_indices if n_frames is None: frameiter = itertools.count() else: frameiter = range(n_frames) for i in frameiter: try: frame_xyz, frame_box, frame_time = self._read_frame() contains_time = contains_time and (frame_time is not None) coordinates.append(frame_xyz[atom_slice]) unitcell_vectors.append(frame_box) time.append(frame_time) except StopIteration: break coordinates, unitcell_vectors, time = map(np.array, (coordinates, unitcell_vectors, time)) if not contains_time: time = None else: time = time[::stride] return coordinates[::stride], unitcell_vectors[::stride], time
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md # doctest: +SKIP >>> traj = md.load_hdf5('output.h5') # doctest: +SKIP >>> print traj # doctest: +SKIP <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> # doctest: +SKIP >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') # doctest: +SKIP >>> print traj2 # doctest: +SKIP <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> # doctest: +SKIP Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.core.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : path-like Path of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, (string_types, os.PathLike)): raise TypeError('filename must be of type path-like for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with MDCRDTrajectoryFile(filename, topology.n_atoms) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with MDCRDTrajectoryFile(filename, topology.n_atoms) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.core.trajectory import _parse_topology, Trajectory if top is None: raise ValueError('"top" argument is required for load_netcdf') topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(topology, n_frames=n_frames, atom_indices=atom_indices, stride=stride)
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lh5. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with AmberRestartFile(filename) as f: xyz, time, cell_lengths, cell_angles = f.read( atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with AmberRestartFile(filename) as f: xyz, time, cell_lengths, cell_angles = f.read(atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_arc(filename, stride=None, atom_indices=None, frame=None): """Load a TINKER .arc file from disk. Parameters ---------- filename : str String filename of TINKER .arc file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.ArcTrajectoryFile : Low level interface to TINKER .arc files """ from mdtraj.core.trajectory import _parse_topology if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_arc. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) with ArcTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. Other Parameters ---------------- top : {str, Trajectory, Topology} Most trajectory formats do not contain topology information. Pass in either the path to a RCSB PDB file, a trajectory, or a topology to supply this information. This option is not required for the .h5, .lh5, and .pdb formats, which already contain topology information. stride : int, default=None Only read every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. See Also -------- load, load_frame Examples -------- >>> import mdtraj as md # doctest: +SKIP >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP ... print chunk <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP """ stride = kwargs.get('stride', 1) atom_indices = cast_indices(kwargs.get('atom_indices', None)) if chunk % stride != 0: raise ValueError('Stride must be a divisor of chunk. stride=%d does not go ' 'evenly into chunk=%d' % (stride, chunk)) if filename.endswith('.h5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with HDF5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) while True: data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if data == []: raise StopIteration() convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) yield Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) if filename.endswith('.lh5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with LH5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) ptr = 0 while True: xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time) elif filename.endswith('.xtc'): topology = _parse_topology(kwargs.get('top', None)) with XTCTrajectoryFile(filename) as f: while True: xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(box, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time) trajectory.unitcell_vectors = box yield trajectory elif filename.endswith('.dcd'): topology = _parse_topology(kwargs.get('top', None)) with DCDTrajectoryFile(filename) as f: ptr = 0 while True: # for reasons that I have not investigated, dcdtrajectory file chunk and stride # together work like this method, but HDF5/XTC do not. xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length, unitcell_angles=box_angle) else: t = load(filename, **kwargs) for i in range(0, len(t), chunk): yield t[i:i+chunk]
def load_pdb(filename, stride=None, atom_indices=None, frame=None, no_boxchk=False): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, default=None If not None, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, default=None Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. no_boxchk : bool, default=False By default, a heuristic check based on the particle density will be performed to determine if the unit cell dimensions are absurd. If the particle density is >1000 atoms per nm^3, the unit cell will be discarded. This is done because all PDB files from RCSB contain a CRYST1 record, even if there are no periodic boundaries, and dummy values are filled in instead. This check will filter out those false unit cells and avoid potential errors in geometry calculations. Set this variable to ``True`` in order to skip this heuristic check. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print(pdb) <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride traj = Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) if not no_boxchk and traj.unitcell_lengths is not None: # Only one CRYST1 record is allowed, so only do this check for the first # frame. Some RCSB PDB files do not *really* have a unit cell, but still # have a CRYST1 record with a dummy definition. These boxes are usually # tiny (e.g., 1 A^3), so check that the particle density in the unit # cell is not absurdly high. Standard water density is ~55 M, which # yields a particle density ~100 atoms per cubic nm. It should be safe # to say that no particle density should exceed 10x that. particle_density = traj.top.n_atoms / traj.unitcell_volumes[0] if particle_density > 1000: warnings.warn('Unlikely unit cell vectors detected in PDB file likely ' 'resulting from a dummy CRYST1 record. Discarding unit ' 'cell vectors.') traj._unitcell_lengths = traj._unitcell_angles = None return traj
def load_arc(filename, top=None, stride=None, atom_indices=None): """Load a TINKER .arc file from disk. Parameters ---------- filename : str String filename of TINKER .arc file. top : {str, Trajectory, Topology} The .arc format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.ArcTrajectoryFile : Low level interface to TINKER .arc files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little weird, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message # if top is None: # raise ValueError('"top" argument is required for load_arc') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_arc. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) with ArcTrajectoryFile(filename) as f: xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(abc, f.distance_unit, Trajectory._distance_unit, inplace=True) if top is None: topology = f.topology else: topology = _parse_topology(top) if atom_indices is not None: topology = topology.subset(atom_indices) time = np.arange(len(xyz)) if stride is not None: # if we loaded with a stride, the Trajectories's time field should # respect that time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=abc, unitcell_angles=ang) return t
def load_gsd(filename, top=None, start=None, n_frames=None, stride=None, atom_indices=None, frame=None): """Load a GSD trajectory file. Parameters ----------- filename : path-like Path of GSD trajectory file. top : {path-like, Trajectory, Topology}, None A pdb file, a trajectory, or a topology to supply topology information If None, topology information will be parsed from the GSD file start : int, None First frame to convert n_frames : int, None Number of frames after `start` to convert stride : int Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ from mdtraj.core.trajectory import Trajectory, _parse_topology import gsd.hoomd if not isinstance(filename, (string_types, os.PathLike)): raise TypeError('filename must be of type path-like for load_gsd. ' 'you supplied %s'.format(type(filename))) if top is not None: topology = _parse_topology(top) else: topology = load_gsd_topology(filename) atom_indices = cast_indices(atom_indices) with gsd.hoomd.open(filename, 'rb') as f: if frame is not None: xyz, vectors, time = read_snapshot(frame, f[frame], topology, atom_indices=atom_indices) t = Trajectory(xyz=np.array(xyz), topology=topology, time=np.array([time])) t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3)) return t else: return hoomdtraj_to_traj(f, topology, start=start, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f: if frame is not None: f.seek(frame) xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is not None: in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) if atom_indices is not None: topology = topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) if cell_lengths is not None: t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None, frame=None, unit_set='real'): """Load a LAMMPS trajectory file. Parameters ---------- filename : str String filename of LAMMPS trajectory file. top : {str, Trajectory, Topology} The lammpstrj format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.LAMMPSTrajectoryFile : Low level interface to lammpstrj files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_lammpstrj') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lammpstrj. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with LAMMPSTrajectoryFile(filename) as f: # TODO: Support other unit sets. if unit_set == 'real': f.distance_unit == 'angstroms' else: raise ValueError( 'Unsupported unit set specified: {0}.'.format(unit_set)) if frame is not None: f.seek(frame) xyz, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_hdf5(filename, stride=None, atom_indices=None, frame=None, root_uep='/'): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. root_uep : str, default='/' The root User Entry Point. This is a group in the HDF5 hierarchy which will be taken as the starting point to create the object tree. It can be whatever existing group in the file, named by its HDF5 path. If it does not exist, an HDF5ExtError is issued. Use this if you do not want to build the entire object tree, but rather only a subtree of it. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lh5. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename, root_uep=root_uep) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz, time, cell_lengths, cell_angles = f.read( n_frames=1, atom_indices=atom_indices) else: xyz, time, cell_lengths, cell_angles = f.read( stride=stride, atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_pdb(filename, stride=None, atom_indices=None, frame=None): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print pdb <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride return Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def read(self, n_frames=None, stride=None, atom_indices=None): """Read data from a molecular dynamics trajectory in the GROMACS GRO format. Parameters ---------- n_frames : int, optional If n_frames is not None, the next n_frames of data from the file will be read. Otherwise, all of the frames in the file will be read. stride : int, optional If stride is not None, read only every stride-th frame from disk. atom_indices : np.ndarray, dtype=int, optional The specific indices of the atoms you'd like to retrieve. If not supplied, all of the atoms will be retrieved. Returns ------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms, in units of nanometers. time : np.ndarray, None The time corresponding to each frame, in units of picoseconds, or None if no time information is present in the trajectory. unitcell_vectors : np.ndarray, shape=(n_frames, 3, 3) The box vectors in each frame, in units of nanometers """ if not self._open: raise ValueError('I/O operation on closed file') if not self._mode == 'r': raise ValueError('file not opened for reading') coordinates = [] unitcell_vectors = [] time = [] contains_time = True atom_indices = cast_indices(atom_indices) atom_slice = slice(None) if atom_indices is None else atom_indices if n_frames is None: frameiter = itertools.count() else: frameiter = range(n_frames) for i in frameiter: try: frame_xyz, frame_box, frame_time = self._read_frame() contains_time = contains_time and (frame_time is not None) coordinates.append(frame_xyz[atom_slice]) unitcell_vectors.append(frame_box) time.append(frame_time) except StopIteration: break coordinates, unitcell_vectors, time = map(np.array, (coordinates, unitcell_vectors, time)) if not contains_time: time = None else: time = time[::stride] return coordinates[::stride], time, unitcell_vectors[::stride]
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None): """Load a xyz trajectory file. While there is no universal standard for this format, this plugin adheres to the same format as the VMD plugin: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html Most notably, units are in angstroms and anything past the 'z' field is ignored. Parameters ---------- filename : str String filename of xyz trajectory file. top : {str, Trajectory, Topology} The xyz format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.XYZTrajectoryFile : Low level interface to xyz files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_xyz') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_xyz. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with XYZTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) return t
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None): """Load a xyz trajectory file. While there is no universal standard for this format, this plugin adheres to the same format as the VMD plugin: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html Most notably, units are in angstroms and anything past the 'z' field is ignored. Parameters ---------- filename : str String filename of xyz trajectory file. top : {str, Trajectory, Topology} The xyz format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.XYZTrajectoryFile : Low level interface to xyz files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_xyz') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_xyz. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with XYZTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f: if frame is not None: f.seek(frame) xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is not None: in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) if cell_lengths is not None: t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None, frame=None, unit_set='real'): """Load a LAMMPS trajectory file. Parameters ---------- filename : str String filename of LAMMPS trajectory file. top : {str, Trajectory, Topology} The lammpstrj format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.LAMMPSTrajectoryFile : Low level interface to lammpstrj files """ from mdtraj.core.trajectory import _parse_topology # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_lammpstrj') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lammpstrj. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) with LAMMPSTrajectoryFile(filename) as f: # TODO: Support other unit sets. if unit_set == 'real': f.distance_unit == 'angstroms' else: raise ValueError('Unsupported unit set specified: {0}.'.format(unit_set)) if frame is not None: f.seek(frame) n_frames = 1 else: n_frames = None return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)