Пример #1
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with AmberRestartFile(filename) as f:
        return f.read_as_traj(topology, atom_indices=atom_indices)
Пример #2
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
Пример #3
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with AmberRestartFile(filename) as f:
        return f.read_as_traj(topology, atom_indices=atom_indices)
Пример #4
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
Пример #5
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a molecular dynamics trajectory in the GROMACS GRO
        format.

        Parameters
        ----------
        n_frames : int, optional
            If n_frames is not None, the next n_frames of data from the file
            will be read. Otherwise, all of the frames in the file will be read.
        stride : int, optional
            If stride is not None, read only every stride-th frame from disk.
        atom_indices : np.ndarray, dtype=int, optional
            The specific indices of the atoms you'd like to retrieve. If not
            supplied, all of the atoms will be retrieved.

        Returns
        -------
        coordinates : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms, in units of nanometers.
        time : np.ndarray, None
            The time corresponding to each frame, in units of picoseconds, or
            None if no time information is present in the trajectory.
        unitcell_vectors : np.ndarray, shape=(n_frames, 3, 3)
            The box vectors in each frame, in units of nanometers
        """
        if not self._open:
            raise ValueError('I/O operation on closed file')
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        coordinates = []
        unitcell_vectors = []
        time = []
        contains_time = True

        atom_indices = cast_indices(atom_indices)
        atom_slice = slice(None) if atom_indices is None else atom_indices

        if n_frames is None:
            frameiter = itertools.count()
        else:
            frameiter = range(n_frames)

        for i in frameiter:
            try:
                frame_xyz, frame_box, frame_time = self._read_frame()
                contains_time = contains_time and (frame_time is not None)
                coordinates.append(frame_xyz[atom_slice])
                unitcell_vectors.append(frame_box)
                time.append(frame_time)
            except StopIteration:
                break

        coordinates, unitcell_vectors, time = map(np.array, (coordinates, unitcell_vectors, time))

        if not contains_time:
            time = None
        else:
            time = time[::stride]
        return coordinates[::stride], unitcell_vectors[::stride], time
Пример #6
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md                                        # doctest: +SKIP
    >>> traj = md.load_hdf5('output.h5')                           # doctest: +SKIP
    >>> print traj                                                 # doctest: +SKIP
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')   # doctest: +SKIP
    >>> print traj2                                                       # doctest: +SKIP
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>         # doctest: +SKIP

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates, topology=topology,
                            time=data.time, unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Пример #7
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates, topology=topology,
                            time=data.time, unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Пример #8
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : path-like
        Path of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_mdcrd. '
                        'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, topology.n_atoms) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology,
                              n_frames=n_frames,
                              stride=stride,
                              atom_indices=atom_indices)
Пример #9
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, topology.n_atoms) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology, n_frames=n_frames, stride=stride,
                              atom_indices=atom_indices)
Пример #10
0
def load_netcdf(filename,
                top=None,
                stride=None,
                atom_indices=None,
                frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    if top is None:
        raise ValueError('"top" argument is required for load_netcdf')

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology,
                              n_frames=n_frames,
                              atom_indices=atom_indices,
                              stride=stride)
Пример #11
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #12
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lh5. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames,
                              stride=stride,
                              atom_indices=atom_indices)
Пример #13
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    from mdtraj import Trajectory

    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=xyz, topology=topology, time=time)
Пример #14
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lh5. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
Пример #15
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #16
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    from mdtraj import Trajectory

    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=xyz, topology=topology, time=time)
Пример #17
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    if top is None:
        raise ValueError('"top" argument is required for load_netcdf')

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology, n_frames=n_frames, atom_indices=atom_indices, stride=stride)
Пример #18
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(atom_indices=atom_indices)
        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit,
                                   Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #19
0
def load_arc(filename, stride=None, atom_indices=None, frame=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames,
                              stride=stride,
                              atom_indices=atom_indices)
Пример #20
0
def load_arc(filename, stride=None, atom_indices=None, frame=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames, stride=stride,
                              atom_indices=atom_indices)
Пример #21
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame
        
    Examples
    --------
    >>> import mdtraj as md                                        # doctest: +SKIP
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP
    ...    print chunk
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0:
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))

    if filename.endswith('.h5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with HDF5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            while True:
                data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if data == []:
                    raise StopIteration()
                convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                yield Trajectory(xyz=data.coordinates, topology=topology,
                                 time=data.time, unitcell_lengths=data.cell_lengths,
                                 unitcell_angles=data.cell_angles)

    if filename.endswith('.lh5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with LH5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            ptr = 0
            while True:
                xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time)

    elif filename.endswith('.xtc'):
        topology = _parse_topology(kwargs.get('top', None))
        with XTCTrajectoryFile(filename) as f:
            while True:
                xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                trajectory.unitcell_vectors = box
                yield trajectory

    elif filename.endswith('.dcd'):
        topology = _parse_topology(kwargs.get('top', None))
        with DCDTrajectoryFile(filename) as f:
            ptr = 0
            while True:
                # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                # together work like this method, but HDF5/XTC do not.
                xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                 unitcell_angles=box_angle)

    else:
        t = load(filename, **kwargs)
        for i in range(0, len(t), chunk):
            yield t[i:i+chunk]
Пример #22
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None,
             no_boxchk=False):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, default=None
        If not None, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, default=None
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    no_boxchk : bool, default=False
        By default, a heuristic check based on the particle density will be
        performed to determine if the unit cell dimensions are absurd. If the
        particle density is >1000 atoms per nm^3, the unit cell will be
        discarded. This is done because all PDB files from RCSB contain a CRYST1
        record, even if there are no periodic boundaries, and dummy values are
        filled in instead. This check will filter out those false unit cells and
        avoid potential errors in geometry calculations. Set this variable to
        ``True`` in order to skip this heuristic check.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print(pdb)
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)
    
    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    traj = Trajectory(xyz=coords, time=time, topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)

    if not no_boxchk and traj.unitcell_lengths is not None:
        # Only one CRYST1 record is allowed, so only do this check for the first
        # frame. Some RCSB PDB files do not *really* have a unit cell, but still
        # have a CRYST1 record with a dummy definition. These boxes are usually
        # tiny (e.g., 1 A^3), so check that the particle density in the unit
        # cell is not absurdly high. Standard water density is ~55 M, which
        # yields a particle density ~100 atoms per cubic nm. It should be safe
        # to say that no particle density should exceed 10x that.
        particle_density = traj.top.n_atoms / traj.unitcell_volumes[0]
        if particle_density > 1000:
            warnings.warn('Unlikely unit cell vectors detected in PDB file likely '
                          'resulting from a dummy CRYST1 record. Discarding unit '
                          'cell vectors.')
            traj._unitcell_lengths = traj._unitcell_angles = None

    return traj
Пример #23
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    #   if top is None:
    #       raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(abc,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if top is None:
            topology = f.topology
        else:
            topology = _parse_topology(top)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz,
                   topology=topology,
                   time=time,
                   unitcell_lengths=abc,
                   unitcell_angles=ang)
    return t
Пример #24
0
def load_gsd(filename,
             top=None,
             start=None,
             n_frames=None,
             stride=None,
             atom_indices=None,
             frame=None):
    """Load a GSD trajectory file.

    Parameters
    -----------
    filename : path-like
        Path of GSD trajectory file.
    top : {path-like, Trajectory, Topology}, None
        A pdb file, a trajectory, or a topology to supply topology information
        If None, topology information will be parsed from the GSD file
    start : int, None
        First frame to convert
    n_frames : int, None
        Number of frames after `start` to convert
    stride : int
        Read only every stride-th frame.   
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    """
    from mdtraj.core.trajectory import Trajectory, _parse_topology
    import gsd.hoomd

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_gsd. '
                        'you supplied %s'.format(type(filename)))

    if top is not None:
        topology = _parse_topology(top)
    else:
        topology = load_gsd_topology(filename)
    atom_indices = cast_indices(atom_indices)

    with gsd.hoomd.open(filename, 'rb') as f:
        if frame is not None:
            xyz, vectors, time = read_snapshot(frame,
                                               f[frame],
                                               topology,
                                               atom_indices=atom_indices)
            t = Trajectory(xyz=np.array(xyz),
                           topology=topology,
                           time=np.array([time]))
            t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3))
            return t

        else:
            return hoomdtraj_to_traj(f,
                                     topology,
                                     start=start,
                                     n_frames=n_frames,
                                     stride=stride,
                                     atom_indices=atom_indices)
Пример #25
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
                        'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride,
                                       atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths,
                        f.distance_unit,
                        Trajectory._distance_unit,
                        inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #26
0
def load_lammpstrj(filename,
                   top=None,
                   stride=None,
                   atom_indices=None,
                   frame=None,
                   unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError(
                'Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths, cell_angles = f.read(n_frames=1,
                                                    atom_indices=atom_indices)
        else:
            xyz, cell_lengths, cell_angles = f.read(stride=stride,
                                                    atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    t.unitcell_lengths = cell_lengths
    t.unitcell_angles = cell_angles
    return t
Пример #27
0
def load_hdf5(filename,
              stride=None,
              atom_indices=None,
              frame=None,
              root_uep='/'):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    root_uep : str, default='/'
        The root User Entry Point. This is a group in the HDF5 hierarchy which will be taken as the starting point to
        create the object tree. It can be whatever existing group in the file, named by its HDF5 path. If it does not
        exist, an HDF5ExtError is issued. Use this if you do not want to build the entire object tree, but rather only
        a subtree of it.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lh5. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename, root_uep=root_uep) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(n_frames=n_frames,
                              stride=stride,
                              atom_indices=atom_indices)
Пример #28
0
def load_netcdf(filename,
                top=None,
                stride=None,
                atom_indices=None,
                frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(
                stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #29
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print pdb
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(unitcell_lengths,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=coords,
                      time=time,
                      topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
Пример #30
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read data from a molecular dynamics trajectory in the GROMACS GRO
        format.

        Parameters
        ----------
        n_frames : int, optional
            If n_frames is not None, the next n_frames of data from the file
            will be read. Otherwise, all of the frames in the file will be read.
        stride : int, optional
            If stride is not None, read only every stride-th frame from disk.
        atom_indices : np.ndarray, dtype=int, optional
            The specific indices of the atoms you'd like to retrieve. If not
            supplied, all of the atoms will be retrieved.

        Returns
        -------
        coordinates : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms, in units of nanometers.
        time : np.ndarray, None
            The time corresponding to each frame, in units of picoseconds, or
            None if no time information is present in the trajectory.
        unitcell_vectors : np.ndarray, shape=(n_frames, 3, 3)
            The box vectors in each frame, in units of nanometers
        """
        if not self._open:
            raise ValueError('I/O operation on closed file')
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        coordinates = []
        unitcell_vectors = []
        time = []
        contains_time = True

        atom_indices = cast_indices(atom_indices)
        atom_slice = slice(None) if atom_indices is None else atom_indices

        if n_frames is None:
            frameiter = itertools.count()
        else:
            frameiter = range(n_frames)

        for i in frameiter:
            try:
                frame_xyz, frame_box, frame_time = self._read_frame()
                contains_time = contains_time and (frame_time is not None)
                coordinates.append(frame_xyz[atom_slice])
                unitcell_vectors.append(frame_box)
                time.append(frame_time)
            except StopIteration:
                break

        coordinates, unitcell_vectors, time = map(np.array, (coordinates, unitcell_vectors, time))

        if not contains_time:
            time = None
        else:
            time = time[::stride]

        return coordinates[::stride], time, unitcell_vectors[::stride]
Пример #31
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Пример #32
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology, n_frames=n_frames, stride=stride,
                              atom_indices=atom_indices)
Пример #33
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print pdb
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)
    
    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=coords, time=time, topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
Пример #34
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices)

        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #35
0
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None,
                   frame=None, unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')
    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError('Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
Пример #36
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
#   if top is None:
#       raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(abc, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if top is None:
            topology = f.topology
        else:
            topology = _parse_topology(top)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time,
                   unitcell_lengths=abc,
                   unitcell_angles=ang)
    return t