Пример #1
0
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Пример #2
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
Пример #3
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a lammpstrj file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.

        See Also
        --------
        read : Returns the raw data from the file

        Notes
        -----
        If coordinates are specified in more than one style, the first complete
        trio of x/y/z coordinates will be read in according to the following
        order:
            1) x,y,z (unscaled coordinates)
            2) xs,ys,zs (scaled atom coordinates)
            3) xu,yu,zu (unwrapped atom coordinates)
            4) xsu,ysu,zsu (scaled unwrapped atom coordinates)

        E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used.
              if the file contains x, y, xs, ys, zs then xs, ys, zs will be used.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Пример #4
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a mdcrd file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if cell_lengths is None:
            cell_angles = None
        else:
            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Пример #5
0
def load_xml(filename, top=None):
    """Load a single conformation from an OpenMM XML file.

    The OpenMM serialized state XML format contains additional information that
    is not read by this method, including forces, energies, and velocities.
    Here, we just read the positions and the box vectors.

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : {str, Trajectory, Topology}
        The XML format does not contain topology information. Pass in either the
        path to a pdb file, a trajectory, or a topology to supply this information.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
    """
    import xml.etree.cElementTree as etree
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    
    topology = _parse_topology(top)

    tree = etree.parse(filename)

    # get all of the positions from the XML into a list of tuples
    # then convert to a numpy array
    positions = []
    for position in tree.getroot().find('Positions'):
        positions.append((float(position.attrib['x']),
                          float(position.attrib['y']),
                          float(position.attrib['z'])))

    box = []
    vectors = tree.getroot().find('PeriodicBoxVectors')
    for name in ['A', 'B', 'C']:
        box.append((float(vectors.find(name).attrib['x']),
                    float(vectors.find(name).attrib['y']),
                    float(vectors.find(name).attrib['z'])))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = np.array(box).reshape(1,3,3)

    return traj
Пример #6
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(
            stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        coordinates = in_units_of(coordinates,
                                  self.distance_unit,
                                  Trajectory._distance_unit,
                                  inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors,
                                       self.distance_unit,
                                       Trajectory._distance_unit,
                                       inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
Пример #7
0
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(
                stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates,
                                  f.distance_unit,
                                  Trajectory._distance_unit,
                                  inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors,
                                       f.distance_unit,
                                       Trajectory._distance_unit,
                                       inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Пример #8
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from the HDF5 file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        _check_mode(self.mode, ('r',))

        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        data = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(data) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        return Trajectory(xyz=data.coordinates, topology=topology, time=data.time,
                          unitcell_lengths=data.cell_lengths,
                          unitcell_angles=data.cell_angles)
Пример #9
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #10
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a XYZ file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial
        return Trajectory(xyz=xyz, topology=topology, time=time)
Пример #11
0
def create_water_topology_on_disc(n):
    topfile = tempfile.mktemp('.pdb')
    top = Topology()
    chain = top.add_chain()

    for i in range(n):
        res = top.add_residue('r%i' % i, chain)
        h1 = top.add_atom('H', hydrogen, res)
        o = top.add_atom('O', oxygen, res)
        h2 = top.add_atom('H', hydrogen, res)
        top.add_bond(h1, o)
        top.add_bond(h2, o)

    xyz = np.zeros((n * 3, 3))
    Trajectory(xyz, top).save_pdb(topfile)
    return topfile
Пример #12
0
def frames_from_files(files,
                      top,
                      frames,
                      chunksize=1000,
                      stride=1,
                      verbose=False,
                      copy_not_join=None):
    from pyemma.coordinates import source
    # Enforce topology to be a md.Topology object
    top = _enforce_top(top)
    reader = source(files, top=top)
    stride = int(stride)

    if stride != 1:
        frames[:, 1] *= int(stride)
        if verbose:
            log.info('A stride value of = %u was parsed, '
                     'interpreting "indexes" accordingly.' % stride)

    # sort by file and frame index
    sort_inds = np.lexsort((frames[:, 1], frames[:, 0]))
    sorted_inds = frames[sort_inds]
    assert len(sorted_inds) == len(frames)

    for u in np.unique(sorted_inds[:, 0]):
        largest_ind_in_traj = np.max(sorted_inds[sorted_inds == u])
        if reader.trajectory_length(u) < largest_ind_in_traj:
            raise ValueError(
                "largest specified index (%i * stride=%i * %i=%i) "
                "is larger than trajectory length '%s' = %i" (
                    largest_ind_in_traj / stride, largest_ind_in_traj / stride,
                    stride, largest_ind_in_traj, reader.filenames[u],
                    reader.trajectory_length(u)))

    collected_frames = []
    with reader.iterator(chunk=chunksize,
                         stride=sorted_inds,
                         return_trajindex=False) as it:
        for x in it:
            collected_frames.append(x)

    collected_frames = np.vstack(collected_frames)
    collected_frames = collected_frames[sort_inds.argsort()]
    collected_frames = collected_frames.reshape(-1, top.n_atoms, 3)

    return Trajectory(collected_frames, top)
Пример #13
0
def to_mdtraj_Trajectory(item,
                         atom_indices='all',
                         coordinates=None,
                         box=None,
                         check=True):

    if check:

        digest_item(item, 'mdtraj.Topology')
        atom_indices = digest_atom_indices(atom_indices)
        coordinates = digest_coordinates(coordinates)
        box = digest_box(box)

    from mdtraj.core.trajectory import Trajectory
    from . import extract

    tmp_item = extract(item, atom_indices=atom_indices, check=False)
    tmp_item = Trajectory(coordinates, item)

    return tmp_item
Пример #14
0
    def read_as_traj(self, topology, atom_indices=None):
        """Read an AMBER ASCII restart file as a trajectory.

        Parameters
        ----------
        topology : Topology
            The system topology
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object with 1 frame created from the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        xyz, time, cell_lengths, cell_angles = self.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          self.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   self.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

        return Trajectory(xyz=xyz,
                          topology=topology,
                          time=time,
                          unitcell_lengths=cell_lengths,
                          unitcell_angles=cell_angles)
Пример #15
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        atoms_mdtraj["element"] = atoms.atype.apply(lambda x: x.strip(".")[0])

    atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int')
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    bond_type_map = {
        '1': Single,
        '2': Double,
        '3': Triple,
        'am': Amide,
        'ar': Aromatic
    }
    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
        # Create the bond augment information
        n_bonds = bonds_mdtraj.shape[0]
        bond_augment = np.zeros([n_bonds, 2], dtype=float)
        # Add bond type information
        bond_augment[:, 0] = [float(bond_type_map[bond_value]) for bond_value in bonds["bond_type"].values]
        # Add Bond "order" information, this is not known from Mol2 files
        bond_augment[:, 1] = [0.0 for _ in range(n_bonds)]
        # Augment array, dtype is cast to minimal representation of float
        bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1)
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Пример #16
0
def load_lammpstrj(filename,
                   top=None,
                   stride=None,
                   atom_indices=None,
                   frame=None,
                   unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError(
                'Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths, cell_angles = f.read(n_frames=1,
                                                    atom_indices=atom_indices)
        else:
            xyz, cell_lengths, cell_angles = f.read(stride=stride,
                                                    atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    t.unitcell_lengths = cell_lengths
    t.unitcell_angles = cell_angles
    return t
Пример #17
0
def load_gsd(filename,
             top=None,
             start=None,
             n_frames=None,
             stride=None,
             atom_indices=None,
             frame=None):
    """Load a GSD trajectory file.

    Parameters
    -----------
    filename : path-like
        Path of GSD trajectory file.
    top : {path-like, Trajectory, Topology}, None
        A pdb file, a trajectory, or a topology to supply topology information
        If None, topology information will be parsed from the GSD file
    start : int, None
        First frame to convert
    n_frames : int, None
        Number of frames after `start` to convert
    stride : int
        Read only every stride-th frame.   
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    """
    from mdtraj.core.trajectory import Trajectory, _parse_topology
    import gsd.hoomd

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_gsd. '
                        'you supplied %s'.format(type(filename)))

    if top is not None:
        topology = _parse_topology(top)
    else:
        topology = load_gsd_topology(filename)
    atom_indices = cast_indices(atom_indices)

    with gsd.hoomd.open(filename, 'rb') as f:
        if frame is not None:
            xyz, vectors, time = read_snapshot(frame,
                                               f[frame],
                                               topology,
                                               atom_indices=atom_indices)
            t = Trajectory(xyz=np.array(xyz),
                           topology=topology,
                           time=np.array([time]))
            t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3))
            return t

        else:
            return hoomdtraj_to_traj(f,
                                     topology,
                                     start=start,
                                     n_frames=n_frames,
                                     stride=stride,
                                     atom_indices=atom_indices)
Пример #18
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(data.coordinates,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(data.cell_lengths,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates,
                            topology=topology,
                            time=data.time,
                            unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Пример #19
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : None
        This argumet is ignored

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except (ValueError, KeyError):
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz],
                                  [0.0, 0.0, lz]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]), float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    if hasattr(bond, 'text'):
        bonds = [(int(b.split()[1]), int(b.split()[2]))
                 for b in bond.text.splitlines()[1:]]
        chains = _find_chains(bonds)
    else:
        chains = []
        bonds = []

    # Relate the first index in the bonded-group to mdtraj.Residue
    bonded_to_residue = {}
    for i, _ in enumerate(types):
        bonded_group = _in_chain(chains, i)
        if bonded_group is not None:
            if bonded_group[0] not in bonded_to_residue:
                t_chain = topology.add_chain()
                t_residue = topology.add_residue('A', t_chain)
                bonded_to_residue[bonded_group[0]] = t_residue
            topology.add_atom(types[i], virtual_site,
                              bonded_to_residue[bonded_group[0]])
        if bonded_group is None:
            t_chain = topology.add_chain()
            t_residue = topology.add_residue('A', t_chain)
            topology.add_atom(types[i], virtual_site, t_residue)

    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Пример #20
0
    def read_as_traj(self,
                     topology,
                     n_frames=None,
                     stride=None,
                     atom_indices=None):
        """Read a trajectory from a lammpstrj file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.

        See Also
        --------
        read : Returns the raw data from the file

        Notes
        -----
        If coordinates are specified in more than one style, the first complete
        trio of x/y/z coordinates will be read in according to the following
        order:
            1) x,y,z (unscaled coordinates)
            2) xs,ys,zs (scaled atom coordinates)
            3) xu,yu,zu (unwrapped atom coordinates)
            4) xsu,ysu,zsu (scaled unwrapped atom coordinates)

        E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used.
              if the file contains x, y, xs, ys, zs then xs, ys, zs will be used.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames,
                                                   stride=stride,
                                                   atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(xyz,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(cell_lengths,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        if stride is None:
            stride = 1
        time = (stride * np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Пример #21
0
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None,
                   frame=None, unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError('Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices)

        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    t.unitcell_lengths = cell_lengths
    t.unitcell_angles = cell_angles
    return t
Пример #22
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : path-like
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        # Argument x is being passed as a list with only one element.
        def to_element(x):
            if isinstance(x, (list, tuple)):
                assert len(x) == 1
                x = x[0]

            if '.' in x:  # orbital-hybridizations in SYBL
                return x.split('.')[0]
            try:
                # check if we can convert the whole str to an Element,
                # if not, we only pass the first letter.
                from mdtraj.core.element import Element
                Element.getBySymbol(x)
            except KeyError:
                return x[0]
            return x

        atoms_mdtraj["element"] = atoms.atype.apply(to_element)

    # Check if elements inferred from atoms.atype are valid
    # If not, try to infer elements from atoms.name
    try:
        atoms_mdtraj['element'].apply(elem.get_by_symbol)
    except KeyError:
        try:
            atoms_mdtraj["element"] = atoms.name.apply(to_element)
            atoms_mdtraj['element'].apply(elem.get_by_symbol)
        except KeyError:
            raise KeyError('Invalid element passed to atoms DataFrame')

    atoms_mdtraj['resSeq'] = atoms['code']
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    bond_type_map = {
        '1': Single,
        '2': Double,
        '3': Triple,
        'am': Amide,
        'ar': Aromatic
    }
    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
        # Create the bond augment information
        n_bonds = bonds_mdtraj.shape[0]
        bond_augment = np.zeros([n_bonds, 2], dtype=float)
        # Add bond type information
        bond_augment[:, 0] = [
            float(bond_type_map[str(bond_value)])
            for bond_value in bonds["bond_type"].values
        ]
        # Add Bond "order" information, this is not known from Mol2 files
        bond_augment[:, 1] = [0.0 for _ in range(n_bonds)]
        # Augment array, dtype is cast to minimal representation of float
        bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1)
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Пример #23
0
    def next_chunk(self, lag=0):
        """
        gets the next chunk. If lag > 0, we open another iterator with same chunk
        size and advance it by one, as soon as this method is called with a lag > 0.

        :return: a feature mapped vector X, or (X, Y) if lag > 0
        """
        chunk = self.mditer.next()

        if lag > 0:
            if self.curr_lag == 0:
                # lag time changed, so open lagged iterator
                self.curr_lag = lag
                self._open_time_lagged()
                try:
                    self.last_advanced_chunk = self.mditer2.next()
                except StopIteration:
                    log.debug(
                        "No more data in mditer2 during last_adv_chunk assignment. Padding with zeros"
                    )
                    lagged_xyz = np.zeros_like(chunk.xyz)
                    self.last_advanced_chunk = Trajectory(
                        lagged_xyz, chunk.topology)
            try:
                adv_chunk = self.mditer2.next()
            except StopIteration:
                # no more data available in mditer2, so we have to take data from
                # current chunk and padd it with zeros!
                log.debug("No more data in mditer2. Padding with zeros."
                          " Data avail: %i" % chunk.xyz.shape[0])
                lagged_xyz = np.zeros_like(chunk.xyz)
                adv_chunk = Trajectory(lagged_xyz, chunk.topology)

            # build time lagged Trajectory by concatenating
            # last adv chunk and advance chunk
            i = lag - (self.chunksize * self.skip_n)
            padding_length = max(
                0, chunk.xyz.shape[0] -
                (self.last_advanced_chunk.xyz.shape[0] - i) -
                adv_chunk.xyz.shape[0])
            padding = np.zeros(
                (padding_length, chunk.xyz.shape[1], chunk.xyz.shape[2]))
            merged = Trajectory(
                np.concatenate(
                    (self.last_advanced_chunk.xyz, adv_chunk.xyz, padding)),
                chunk.topology)
            # assert merged.xyz.shape[0] >= chunk.xyz.shape[0]
            # skip "lag" number of frames and truncate to chunksize
            chunk_lagged = merged[i:][:chunk.xyz.shape[0]]

            # remember last advanced chunk
            self.last_advanced_chunk = adv_chunk

        self.t += chunk.xyz.shape[0]

        if (self.t + lag >= self.trajectory_length(self.curr_itraj)
                and self.curr_itraj < len(self.trajfiles) - 1):
            log.debug('closing current trajectory "%s"' %
                      self.trajfiles[self.curr_itraj])
            self.mditer.close()
            self.t = 0
            self.curr_itraj += 1
            self.mditer = self._create_iter(self.trajfiles[self.curr_itraj])
            # we open self.mditer2 only if requested due lag parameter!
            self.curr_lag = 0

        # map data
        if lag == 0:
            return self.featurizer.map(chunk)
        else:
            X = self.featurizer.map(chunk)
            Y = self.featurizer.map(chunk_lagged)
            return X, Y
Пример #24
0
def load_netcdf(filename,
                top=None,
                stride=None,
                atom_indices=None,
                frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(
                stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #25
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices)

        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #26
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except:
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx,  xy*ly, xz*lz],
                                  [0.0, ly,    yz*lz],
                                  [0.0, 0.0,   lz   ]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]),
                          float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]]
    chains = _find_chains(bonds)
    ions = [i for i in range(len(types)) if not _in_chain(chains, i)]

    # add chains, bonds and ions (each chain = 1 residue)
    for chain in chains:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        for atom in chain:
            topology.add_atom(types[atom], 'U', t_residue)
    for ion in ions:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        topology.add_atom(types[atom], 'U', t_residue)
    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Пример #27
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    #   if top is None:
    #       raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(abc,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if top is None:
            topology = f.topology
        else:
            topology = _parse_topology(top)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz,
                   topology=topology,
                   time=time,
                   unitcell_lengths=abc,
                   unitcell_angles=ang)
    return t
Пример #28
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
                        'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride,
                                       atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths,
                        f.distance_unit,
                        Trajectory._distance_unit,
                        inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #29
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Пример #30
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame

    Examples
    --------

    >>> import mdtraj as md
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb')
    >>>     print chunk

    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>

    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0 and filename.endswith('.dcd'):
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))
    if chunk == 0:
        yield load(filename, **kwargs)
    # If chunk was 0 then we want to avoid filetype-specific code in case of undefined behavior in various file parsers.
    else:
        skip = kwargs.get('skip', 0)
        if filename.endswith('.h5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')

            with HDF5TrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                while True:
                    data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if data == []:
                        raise StopIteration()
                    in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    yield Trajectory(xyz=data.coordinates, topology=topology,
                                     time=data.time, unitcell_lengths=data.cell_lengths,
                                     unitcell_angles=data.cell_angles)

        if filename.endswith('.lh5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')
            with LH5TrajectoryFile(filename) as f:
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                ptr = 0
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time)

        elif filename.endswith('.xtc'):
            topology = _parse_topology(kwargs.get('top', None))
            with XTCTrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                    trajectory.unitcell_vectors = box
                    yield trajectory

        elif filename.endswith('.dcd'):
            topology = _parse_topology(kwargs.get('top', None))
            with DCDTrajectoryFile(filename) as f:
                ptr = 0
                if skip > 0:
                    xyz, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                    # together work like this method, but HDF5/XTC do not.
                    xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                     unitcell_angles=box_angle)

        else:
            log.critical("loading complete traj into mem! This might no be desired.")
            t = load(filename, **kwargs)
            for i in range(skip, len(t), chunk):
                yield t[i:i+chunk]