Пример #1
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with AmberRestartFile(filename) as f:
        return f.read_as_traj(topology, atom_indices=atom_indices)
Пример #2
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with AmberRestartFile(filename) as f:
        return f.read_as_traj(topology, atom_indices=atom_indices)
Пример #3
0
    def __init__(self,
                 path,
                 mode='r',
                 topology=None,
                 stride=1,
                 atom_indices=None,
                 verbose=False):
        if mode != 'r':
            raise ValueError('mode must be "r"')
        self.path = path
        self.topology = topology
        self.stride = stride
        self.atom_indices = atom_indices
        self.verbose = verbose

        if isinstance(path, list):
            self.glob_matches = [expanduser(fn) for fn in path]
        else:
            self.glob_matches = sorted(glob.glob(expanduser(path)),
                                       key=_keynat)

        if topology is None:
            self._topology = None
        else:
            self._topology = _parse_topology(os.path.expanduser(topology))
Пример #4
0
def load(filename, chunks=10, **kwargs):
    """ A loader that will mimic mdtraj.Trajectory.load, but construct a
    dasktraj.Trajectory with a dask.array as xyz
    """

    top = kwargs.pop('top', None)
    extension = _get_extension(filename)
    if extension not in _TOPOLOGY_EXTS:
        topology = _parse_topology(top)

    length = len(open(filename))
    n_chunks = int(length / chunks)
    frames_left = length % chunks
    if frames_left != 0:
        n_chunks += 1
    # TODO this needs to be closed at some point
    data = load_chunks(filename, extension, chunks, range(n_chunks), **kwargs)

    #TODO: use this to construct unitcells
    # Pop out irelevant info
    uv = data.pop('unitcell_vectors')
    traj = Trajectory(topology=topology, delayed_objects=data, **data)
    if uv is not None:
        traj.unitcell_vectors = uv
    return traj
Пример #5
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : path-like
        Path of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_mdcrd. '
                        'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, topology.n_atoms) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology,
                              n_frames=n_frames,
                              stride=stride,
                              atom_indices=atom_indices)
Пример #6
0
def load_netcdf(filename,
                top=None,
                stride=None,
                atom_indices=None,
                frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    if top is None:
        raise ValueError('"top" argument is required for load_netcdf')

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology,
                              n_frames=n_frames,
                              atom_indices=atom_indices,
                              stride=stride)
Пример #7
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, topology.n_atoms) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology, n_frames=n_frames, stride=stride,
                              atom_indices=atom_indices)
Пример #8
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #9
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #10
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    if top is None:
        raise ValueError('"top" argument is required for load_netcdf')

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology, n_frames=n_frames, atom_indices=atom_indices, stride=stride)
Пример #11
0
def load_xml(filename, top=None):
    """Load a single conformation from an OpenMM XML file.

    The OpenMM serialized state XML format contains additional information that
    is not read by this method, including forces, energies, and velocities.
    Here, we just read the positions and the box vectors.

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : {str, Trajectory, Topology}
        The XML format does not contain topology information. Pass in either the
        path to a pdb file, a trajectory, or a topology to supply this information.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
    """
    import xml.etree.cElementTree as etree
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    
    topology = _parse_topology(top)

    tree = etree.parse(filename)

    # get all of the positions from the XML into a list of tuples
    # then convert to a numpy array
    positions = []
    for position in tree.getroot().find('Positions'):
        positions.append((float(position.attrib['x']),
                          float(position.attrib['y']),
                          float(position.attrib['z'])))

    box = []
    vectors = tree.getroot().find('PeriodicBoxVectors')
    for name in ['A', 'B', 'C']:
        box.append((float(vectors.find(name).attrib['x']),
                    float(vectors.find(name).attrib['y']),
                    float(vectors.find(name).attrib['z'])))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = np.array(box).reshape(1,3,3)

    return traj
Пример #12
0
def load_xml(filename, top=None):
    """Load a single conformation from an OpenMM XML file.

    The OpenMM serialized state XML format contains additional information that
    is not read by this method, including forces, energies, and velocities.
    Here, we just read the positions and the box vectors.

    Parameters
    ----------
    filename : path-like
        The path on disk to the XML file
    top : {str, Trajectory, Topology}
        The XML format does not contain topology information. Pass in either the
        path to a pdb file, a trajectory, or a topology to supply this information.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
    """
    import xml.etree.cElementTree as etree
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    
    topology = _parse_topology(top)

    tree = etree.parse(filename)

    # get all of the positions from the XML into a list of tuples
    # then convert to a numpy array
    positions = []
    for position in tree.getroot().find('Positions'):
        positions.append((float(position.attrib['x']),
                          float(position.attrib['y']),
                          float(position.attrib['z'])))

    box = []
    vectors = tree.getroot().find('PeriodicBoxVectors')
    for name in ['A', 'B', 'C']:
        box.append((float(vectors.find(name).attrib['x']),
                    float(vectors.find(name).attrib['y']),
                    float(vectors.find(name).attrib['z'])))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = np.array(box).reshape(1,3,3)

    return traj
Пример #13
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(atom_indices=atom_indices)
        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit,
                                   Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #14
0
    def __init__(self, path, mode='r', topology=None, stride=1,
                 atom_indices=None, verbose=False):
        if mode != 'r':
            raise ValueError('mode must be "r"')
        self.path = path
        self.topology = topology
        self.stride = stride
        self.atom_indices = atom_indices
        self.verbose = verbose

        if isinstance(path, list):
            self.glob_matches = [expanduser(fn) for fn in path]
        else:
            self.glob_matches = sorted(glob.glob(expanduser(path)), key=_keynat)

        if topology is None:
            self._topology = None
        else:
            self._topology = _parse_topology(os.path.expanduser(topology))
Пример #15
0
def load(filename, chunks=10, **kwargs):
    """
    A loader that will mimic :py:func:`mdtraj.Trajectory.load()`, but
    construct a :py:class:`dasktraj.Trajectory` with a :py:class:`dask.array`
    as xyz, time, and unitcell properties.

    Parameters
    ----------
    filename : string
        Filename of the file to load.
    chunks : int
        Number of frames per chunk.

    Returns
    -------
    trajectory
        A :py:class:`dasktraj.Trajectory`
    """

    top = kwargs.pop("top", None)
    extension = _get_extension(filename)
    if extension not in _TOPOLOGY_EXTS:
        topology = _parse_topology(top)

    filename = os.path.abspath(filename)
    length = len(open(filename))
    n_chunks = int(length / chunks)
    frames_left = length % chunks
    if frames_left != 0:
        n_chunks += 1
    # TODO this needs to be closed at some point
    data = load_chunks(filename, extension, chunks, range(n_chunks), **kwargs)

    # TODO: use this to construct unitcells
    # Pop out irrelevant info
    uv = data.pop("unitcell_vectors")
    traj = Trajectory(topology=topology, delayed_objects=data, **data)
    if uv is not None:
        traj.unitcell_vectors = uv
    return traj
Пример #16
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
                        'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride,
                                       atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths,
                        f.distance_unit,
                        Trajectory._distance_unit,
                        inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #17
0
def main(args, verbose=True):
    """Run the main script.

    Parameters
    ----------
    args : argparse.Namespace
        The collected command line arguments
    """
    if args.atom_indices is not None:
        atom_indices = np.loadtxt(args.atom_indices, int)
    else:
        atom_indices = None

    out_x = ext(args.output)
    out_units = units[out_x]
    out_fields = fields[out_x]
    OutFileFormat = formats[out_x]

    in_x = ext(args.input[0])
    InFileFormat = formats[in_x]

    if args.topology is not None:
        topology = _parse_topology(args.topology)
    else:
        topology = None

    if topology is not None and atom_indices is not None:
        topology = topology.subset(atom_indices)

    n_total = 0
    if args.index is not None:
        assert len(args.input) == 1
        # when chunk is None, we load up ALL of the frames. this isn't
        # strictly necessary, and it costs more memory, but it's ALOT
        # harder to get the code correct when we need to use data[start:end]
        # notation when all of the data isn't loaded up at once. it's easy
        # for hdf5 and netcdf, but for the others...
        assert args.chunk is None

    # this is the normal invocation pattern, but for PDBTrajectoryFile it's
    # different
    outfile_factory = functools.partial(OutFileFormat, args.output, 'w',
                        force_overwrite=args.force)

    with outfile_factory() as outfile:
        for fn in args.input:
            assert in_x == ext(fn)
            with InFileFormat(fn, 'r') as infile:

                while True:
                    data, in_units, n_frames = read(infile, args.chunk, stride=args.stride,
                                                    atom_indices=atom_indices)
                    if n_frames == 0:
                        break

                    if topology is not None:
                        # if the user supplied a topology, we should probably
                        # do some simple checks
                        if data['xyz'].shape[1] != topology._numAtoms:
                            warnings.warn('sdsfsd!!!!')
                        data['topology'] = topology

                    # if they want a specific set of frames, get those
                    # with slice notation
                    if args.index is not None:
                        _data = {}
                        for k, v in iteritems(data):
                            if isinstance(v, np.ndarray):
                                # we don't want the dimensionality to go deficient
                                if isinstance(args.index, int):
                                    _data[k] = v[np.newaxis, args.index]
                                else:
                                    _data[k] = v[args.index]
                            elif isinstance(v, md.Topology):
                                _data[k] = v
                            else:
                                raise RuntineError()
                        data = _data
                        print(list(data.keys()))
                        n_frames = len(data['xyz'])

                    convert(data, in_units, out_units, out_fields)
                    write(outfile, data)
                    n_total += n_frames

                    if verbose:
                        sys.stdout.write('\rconverted %d frames, %d atoms' % (n_total, data['xyz'].shape[1]))
                        sys.stdout.flush()

    if verbose:
        print(' ')
Пример #18
0
def load_lammpstrj(filename,
                   top=None,
                   stride=None,
                   atom_indices=None,
                   frame=None,
                   unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError(
                'Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths, cell_angles = f.read(n_frames=1,
                                                    atom_indices=atom_indices)
        else:
            xyz, cell_lengths, cell_angles = f.read(stride=stride,
                                                    atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    t.unitcell_lengths = cell_lengths
    t.unitcell_angles = cell_angles
    return t
Пример #19
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame

    Examples
    --------

    >>> import mdtraj as md
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP
    ...     print chunk # doctest: +SKIP

    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>

    """
    stride = kwargs.pop('stride', 1)
    atom_indices = cast_indices(kwargs.pop('atom_indices', None))
    top = kwargs.pop('top', None)
    skip = kwargs.pop('skip', 0)

    extension = _get_extension(filename)
    if extension not in _TOPOLOGY_EXTS:
        topology = _parse_topology(top)
    else:
        topology = top

    if chunk == 0:
        # If chunk was 0 then we want to avoid filetype-specific code
        # in case of undefined behavior in various file parsers.
        # TODO: this will first apply stride, then skip!
        if extension not in _TOPOLOGY_EXTS:
            kwargs['top'] = top
        yield load(filename, **kwargs)[skip:]
    elif extension in ('.pdb', '.pdb.gz'):
        # the PDBTrajectortFile class doesn't follow the standard API. Fixing it
        # to support iterload could be worthwhile, but requires a deep refactor.
        t = load(filename, stride=stride, atom_indices=atom_indices)
        for i in range(0, len(t), chunk):
            yield t[i:i + chunk]

    elif isinstance(stride, np.ndarray):
        with (lambda x: open(x, n_atoms=topology.n_atoms) if extension in
              ('.crd', '.mdcrd') else open(filename))(filename) as f:
            x_prev = 0
            curr_size = 0
            traj = []
            leftovers = []
            for k, g in groupby(enumerate(stride), lambda a: a[0] - a[1]):
                grouped_stride = list(map(itemgetter(1), g))
                seek_offset = (1 if x_prev != 0 else 0)
                seek_to = grouped_stride[0] - x_prev - seek_offset
                f.seek(seek_to, whence=1)
                x_prev = grouped_stride[-1]
                group_size = len(grouped_stride)
                if curr_size + group_size > chunk:
                    leftovers = grouped_stride
                else:
                    local_traj = _get_local_traj_object(
                        atom_indices, extension, f, group_size, topology,
                        **kwargs)
                    traj.append(local_traj)
                    curr_size += len(grouped_stride)
                if curr_size == chunk:
                    yield _efficient_traj_join(traj)
                    curr_size = 0
                    traj = []
                while leftovers:
                    local_chunk = leftovers[:min(chunk, len(leftovers))]
                    local_traj = _get_local_traj_object(
                        atom_indices, extension, f, len(local_chunk), topology,
                        **kwargs)
                    traj.append(local_traj)
                    leftovers = leftovers[min(chunk, len(leftovers)):]
                    curr_size += len(local_chunk)
                    if curr_size == chunk:
                        yield _efficient_traj_join(traj)
                        curr_size = 0
                        traj = []
            if traj:
                yield _efficient_traj_join(traj)
            raise StopIteration()

    else:
        with (lambda x: open(x, n_atoms=topology.n_atoms) if extension in
              ('.crd', '.mdcrd') else open(filename))(filename) as f:
            if skip > 0:
                f.seek(skip)
            while True:
                if extension not in _TOPOLOGY_EXTS:
                    traj = f.read_as_traj(topology,
                                          n_frames=chunk * stride,
                                          stride=stride,
                                          atom_indices=atom_indices,
                                          **kwargs)
                else:
                    traj = f.read_as_traj(n_frames=chunk * stride,
                                          stride=stride,
                                          atom_indices=atom_indices,
                                          **kwargs)

                if len(traj) == 0:
                    raise StopIteration()

                yield traj
Пример #20
0
def load_netcdf(filename,
                top=None,
                stride=None,
                atom_indices=None,
                frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(
                stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Пример #21
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices)

        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Пример #22
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame

    Examples
    --------

    >>> import mdtraj as md
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb')
    >>>     print chunk

    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>

    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0 and filename.endswith('.dcd'):
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))
    if chunk == 0:
        yield load(filename, **kwargs)
    # If chunk was 0 then we want to avoid filetype-specific code in case of undefined behavior in various file parsers.
    else:
        skip = kwargs.get('skip', 0)
        if filename.endswith('.h5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')

            with HDF5TrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                while True:
                    data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if data == []:
                        raise StopIteration()
                    in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    yield Trajectory(xyz=data.coordinates, topology=topology,
                                     time=data.time, unitcell_lengths=data.cell_lengths,
                                     unitcell_angles=data.cell_angles)

        if filename.endswith('.lh5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')
            with LH5TrajectoryFile(filename) as f:
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                ptr = 0
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time)

        elif filename.endswith('.xtc'):
            topology = _parse_topology(kwargs.get('top', None))
            with XTCTrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                    trajectory.unitcell_vectors = box
                    yield trajectory

        elif filename.endswith('.dcd'):
            topology = _parse_topology(kwargs.get('top', None))
            with DCDTrajectoryFile(filename) as f:
                ptr = 0
                if skip > 0:
                    xyz, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                    # together work like this method, but HDF5/XTC do not.
                    xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                     unitcell_angles=box_angle)

        else:
            log.critical("loading complete traj into mem! This might no be desired.")
            t = load(filename, **kwargs)
            for i in range(skip, len(t), chunk):
                yield t[i:i+chunk]
Пример #23
0
    def __init__(self, filename, chunk=100, **kwargs):
        """An iterator over a trajectory from one or more files on disk, in fragments

        This may be more memory efficient than loading an entire trajectory at
        once

        Parameters
        ----------
        filename : str
            Path to the trajectory file on disk
        chunk : int
            Number of frames to load at once from disk per iteration.  If 0, load all.

        Other Parameters
        ----------------
        top : {str, Trajectory, Topology}
            Most trajectory formats do not contain topology information. Pass in
            either the path to a RCSB PDB file, a trajectory, or a topology to
            supply this information. This option is not required for the .h5, .lh5,
            and .pdb formats, which already contain topology information.
        stride : int, default=None
            Only read every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it
            requires an extra copy, but will save memory.

        See Also
        --------
        load, load_frame

        Examples
        --------

        >>> import mdtraj as md
        >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP
        ...     print chunk # doctest: +SKIP

        <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
        <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
        <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
        <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
        <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>

        """
        self._filename = filename
        self._stride = kwargs.pop('stride', 1)
        self._atom_indices = cast_indices(kwargs.pop('atom_indices', None))
        self._top = kwargs.pop('top', None)
        self._skip = kwargs.pop('skip', 0)
        self._kwargs = kwargs
        self._chunksize = chunk
        self._extension = _get_extension(self._filename)
        self._closed = False
        if self._extension not in _TOPOLOGY_EXTS:
            self._topology = _parse_topology(self._top)
        else:
            self._topology = self._top

        self._mode = None
        if self._chunksize > 0 and self._extension in ('.pdb', '.pdb.gz'):
            self._mode = 'pdb'
            self._t = load(self._filename,
                           stride=self._stride,
                           atom_indices=self._atom_indices)
            self._i = 0
        elif isinstance(self._stride, np.ndarray):
            self._mode = 'random_access'
            self._f = (lambda x: open(x, n_atoms=self._topology.n_atoms)
                       if self._extension in
                       ('.crd', '.mdcrd') else open(self._filename))(
                           self._filename)
            self._ra_it = self._random_access_generator(self._f)
        else:
            self._mode = 'traj'
            self._f = (lambda x: open(x, n_atoms=self._topology.n_atoms)
                       if self._extension in
                       ('.crd', '.mdcrd') else open(self._filename))(
                           self._filename)

            # offset array handling
            offsets = kwargs.pop('offsets', None)
            if hasattr(self._f, 'offsets') and offsets is not None:
                self._f.offsets = offsets

            if self._skip > 0:
                self._f.seek(self._skip)
Пример #24
0
 def load_topology(self, fn):
     self.fn = fn
     self.topology = _parse_topology(fn)
     self.check_topology()
Пример #25
0
def main(args, verbose=True):
    """Run the main script.

    Parameters
    ----------
    args : argparse.Namespace
        The collected command line arguments
    """
    if args.atom_indices is not None:
        atom_indices = np.loadtxt(args.atom_indices, int)
    else:
        atom_indices = None

    out_x = ext(args.output)
    out_units = units[out_x]
    out_fields = fields[out_x]
    OutFileFormat = formats[out_x]

    in_x = ext(args.input[0])
    InFileFormat = formats[in_x]

    if args.topology is not None:
        topology = _parse_topology(args.topology)
    else:
        topology = None

    if topology is not None and atom_indices is not None:
        topology = topology.subset(atom_indices)

    n_total = 0
    if args.index is not None:
        assert len(args.input) == 1
        # when chunk is None, we load up ALL of the frames. this isn't
        # strictly necessary, and it costs more memory, but it's ALOT
        # harder to get the code correct when we need to use data[start:end]
        # notation when all of the data isn't loaded up at once. it's easy
        # for hdf5 and netcdf, but for the others...
        assert args.chunk is None

    # this is the normal invocation pattern, but for PDBTrajectoryFile it's
    # different
    outfile_factory = functools.partial(OutFileFormat,
                                        args.output,
                                        'w',
                                        force_overwrite=args.force)

    with outfile_factory() as outfile:
        for fn in args.input:
            assert in_x == ext(fn)
            with InFileFormat(fn, 'r') as infile:

                while True:
                    data, in_units, n_frames = read(infile,
                                                    args.chunk,
                                                    stride=args.stride,
                                                    atom_indices=atom_indices)
                    if n_frames == 0:
                        break

                    if topology is not None:
                        # if the user supplied a topology, we should probably
                        # do some simple checks
                        if data['xyz'].shape[1] != topology._numAtoms:
                            warnings.warn('sdsfsd!!!!')
                        data['topology'] = topology

                    # if they want a specific set of frames, get those
                    # with slice notation
                    if args.index is not None:
                        _data = {}
                        for k, v in iteritems(data):
                            if isinstance(v, np.ndarray):
                                # we don't want the dimensionality to go deficient
                                if isinstance(args.index, int):
                                    _data[k] = v[np.newaxis, args.index]
                                else:
                                    _data[k] = v[args.index]
                            elif isinstance(v, md.Topology):
                                _data[k] = v
                            else:
                                raise RuntineError()
                        data = _data
                        print(list(data.keys()))
                        n_frames = len(data['xyz'])

                    convert(data, in_units, out_units, out_fields)
                    write(outfile, data)
                    n_total += n_frames

                    if verbose:
                        sys.stdout.write('\rconverted %d frames, %d atoms' %
                                         (n_total, data['xyz'].shape[1]))
                        sys.stdout.flush()

    if verbose:
        print(' ')
Пример #26
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None
        return f.read_as_traj(topology, n_frames=n_frames, stride=stride,
                              atom_indices=atom_indices)
Пример #27
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
#   if top is None:
#       raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(abc, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if top is None:
            topology = f.topology
        else:
            topology = _parse_topology(top)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time,
                   unitcell_lengths=abc,
                   unitcell_angles=ang)
    return t
Пример #28
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    #   if top is None:
    #       raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz, abc, ang = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(abc,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if top is None:
            topology = f.topology
        else:
            topology = _parse_topology(top)

    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz,
                   topology=topology,
                   time=time,
                   unitcell_lengths=abc,
                   unitcell_angles=ang)
    return t
Пример #29
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Пример #30
0
def load_gsd(filename,
             top=None,
             start=None,
             n_frames=None,
             stride=None,
             atom_indices=None,
             frame=None):
    """Load a GSD trajectory file.

    Parameters
    -----------
    filename : path-like
        Path of GSD trajectory file.
    top : {path-like, Trajectory, Topology}, None
        A pdb file, a trajectory, or a topology to supply topology information
        If None, topology information will be parsed from the GSD file
    start : int, None
        First frame to convert
    n_frames : int, None
        Number of frames after `start` to convert
    stride : int
        Read only every stride-th frame.   
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    """
    from mdtraj.core.trajectory import Trajectory, _parse_topology
    import gsd.hoomd

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_gsd. '
                        'you supplied %s'.format(type(filename)))

    if top is not None:
        topology = _parse_topology(top)
    else:
        topology = load_gsd_topology(filename)
    atom_indices = cast_indices(atom_indices)

    with gsd.hoomd.open(filename, 'rb') as f:
        if frame is not None:
            xyz, vectors, time = read_snapshot(frame,
                                               f[frame],
                                               topology,
                                               atom_indices=atom_indices)
            t = Trajectory(xyz=np.array(xyz),
                           topology=topology,
                           time=np.array([time]))
            t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3))
            return t

        else:
            return hoomdtraj_to_traj(f,
                                     topology,
                                     start=start,
                                     n_frames=n_frames,
                                     stride=stride,
                                     atom_indices=atom_indices)
Пример #31
0
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None,
                   frame=None, unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')
    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError('Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            n_frames = 1
        else:
            n_frames = None

        return f.read_as_traj(topology, n_frames=n_frames, stride=stride, atom_indices=atom_indices)
Пример #32
0
 def load_topology(self, fn):
     self.fn = fn
     self.topology = _parse_topology(fn)
     self.check_topology()