示例#1
0
def test_constraints():
    c = np.array([(1,2,3.5)], dtype=np.dtype([('atom1', np.int32), ('atom2', np.int32), ('distance', np.float32)]))

    with HDF5TrajectoryFile(temp, 'w') as f:
        f.constraints = c

    with HDF5TrajectoryFile(temp) as f:
        assert eq(f.constraints, c)
示例#2
0
def test_write_coordinates_reshape():
    coordinates = np.random.randn(10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates)

    with HDF5TrajectoryFile(temp) as f:
        assert eq(f.root.coordinates[:], coordinates.reshape(1,10,3))
        assert eq(str(f.root.coordinates.attrs['units']), 'nanometers')
示例#3
0
def test_topology(get_fn):
    top = md.load_pdb(get_fn('native.pdb')).topology

    with HDF5TrajectoryFile(temp, 'w') as f:
        f.topology = top

    with HDF5TrajectoryFile(temp) as f:
        assert f.topology == top
示例#4
0
def test_write_multiple():
    coordinates = np.random.randn(4, 10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates)
        f.write(coordinates)

    with HDF5TrajectoryFile(temp) as f:
        assert eq(f.root.coordinates[:], np.vstack((coordinates, coordinates)))
示例#5
0
def test_write_coordinates():
    coordinates = np.random.randn(4, 10, 3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates)

    with HDF5TrajectoryFile(temp) as f:
        yield lambda: eq(f.root.coordinates[:], coordinates)
        yield lambda: eq(str(f.root.coordinates.attrs['units']), 'nanometers')
示例#6
0
def test_read_slice_3():
    coordinates = np.random.randn(4, 10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, alchemicalLambda=np.arange(4))

    with HDF5TrajectoryFile(temp) as f:
        got = f.read(stride=2, atom_indices=np.array([0,1]))
        assert eq(got.coordinates, coordinates[::2, [0,1], :])
        assert eq(got.alchemicalLambda, np.arange(4)[::2])
示例#7
0
def test_read_slice_2():
    coordinates = np.random.randn(4, 10, 3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, alchemicalLambda=np.arange(4))

    with HDF5TrajectoryFile(temp) as f:
        got = f.read(atom_indices=np.array([0, 1]))
        yield lambda: eq(got.coordinates, coordinates[:, [0, 1], :])
        yield lambda: eq(got.alchemicalLambda, np.arange(4))
示例#8
0
def test_read_slice_0():
    coordinates = np.random.randn(4, 10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, alchemicalLambda=np.array([1,2,3,4]))

    with HDF5TrajectoryFile(temp) as f:
        got = f.read(n_frames=2)
        assert eq(got.coordinates, coordinates[:2])
        assert eq(got.velocities, None)
        assert eq(got.alchemicalLambda, np.array([1,2]))
示例#9
0
def test_append():
    x1 = np.random.randn(10,5,3)
    x2 = np.random.randn(8,5,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(x1)
    with HDF5TrajectoryFile(temp, 'a') as f:
        f.write(x2)

    with HDF5TrajectoryFile(temp) as f:
        eq(f.root.coordinates[:], np.concatenate((x1,x2)))
示例#10
0
def test_read_0():
    coordinates = np.random.randn(4, 10, 3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, alchemicalLambda=np.array([1, 2, 3, 4]))

    with HDF5TrajectoryFile(temp) as f:
        got = f.read()
        yield lambda: eq(got.coordinates, coordinates)
        yield lambda: eq(got.velocities, None)
        yield lambda: eq(got.alchemicalLambda, np.array([1, 2, 3, 4]))
示例#11
0
def test_read_1():
    coordinates = units.Quantity(np.random.randn(4, 10,3), units.angstroms)
    velocities = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.years)


    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, velocities=velocities)

    with HDF5TrajectoryFile(temp) as f:
        got = f.read()
        assert eq(got.coordinates, coordinates.value_in_unit(units.nanometers))
        assert eq(got.velocities, velocities.value_in_unit(units.nanometers/units.picoseconds))
示例#12
0
def test_read_slice_1():
    coordinates = np.random.randn(4, 10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates)

    with HDF5TrajectoryFile(temp) as f:
        got = f.read(n_frames=2)
        assert eq(got.coordinates, coordinates[:2])
        assert eq(got.velocities, None)

        got = f.read(n_frames=2)
        assert eq(got.coordinates, coordinates[2:])
        assert eq(got.velocities, None)
示例#13
0
def test_write_units():
    # simtk.units are automatically converted into MD units for storage on disk
    coordinates = units.Quantity(np.random.randn(4, 10,3), units.angstroms)
    velocities = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.year)

    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, velocities=velocities)

    with HDF5TrajectoryFile(temp) as f:
        assert eq(f.root.coordinates[:], coordinates.value_in_unit(units.nanometers))
        assert eq(str(f.root.coordinates.attrs['units']), 'nanometers')

        assert eq(f.root.velocities[:], velocities.value_in_unit(units.nanometers/units.picosecond))
        assert eq(str(f.root.velocities.attrs['units']), 'nanometers/picosecond')
示例#14
0
def test_write_units2():
    from mdtraj.utils import unit
    coordinates = unit.quantity.Quantity(np.random.randn(4, 10,3),
                    unit.unit_definitions.angstroms)
    velocities = unit.quantity.Quantity(np.random.randn(4, 10,3),
                    unit.unit_definitions.angstroms/unit.unit_definitions.year)

    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, velocities=velocities)

    with HDF5TrajectoryFile(temp) as f:
        assert eq(f.root.coordinates[:], coordinates.value_in_unit(unit.unit_definitions.nanometers))
        assert eq(str(f.root.coordinates.attrs['units']), 'nanometers')

        assert eq(f.root.velocities[:], velocities.value_in_unit(unit.unit_definitions.nanometers/unit.unit_definitions.picosecond))
        assert eq(str(f.root.velocities.attrs['units']), 'nanometers/picosecond')
示例#15
0
def test_write_inconsistent_2():
    coordinates = np.random.randn(4, 10, 3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates, velocities=coordinates)
        # we're saving a deficient set of data, since before we wrote
        # more information.
        assert_raises(ValueError, lambda: f.write(coordinates))
示例#16
0
def test_write_inconsistent():
    coordinates = np.random.randn(4, 10,3)
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.write(coordinates)
        # since the first frames we saved didn't contain velocities, we
        # can't save more velocities
        with pytest.raises(ValueError):
            f.write(coordinates, velocities=coordinates)
示例#17
0
def test_attributes():
    constraints = np.zeros(10, dtype=[('atom1', np.int32), ('atom2', np.int32), ('distance', np.float32)])
    with HDF5TrajectoryFile(temp, 'w') as f:
        f.title = 'mytitle'
        f.reference = 'myreference'
        f.forcefield = 'amber99'
        f.randomState = 'sdf'
        f.application = 'openmm'
        f.constraints = constraints

    with HDF5TrajectoryFile(temp) as g:
        eq(g.title, 'mytitle')
        eq(g.reference, 'myreference')
        eq(g.forcefield, 'amber99')
        eq(g.randomState, 'sdf')
        eq(g.application, 'openmm')
        eq(g.constraints, constraints)
示例#18
0
def test_write_units_mismatch():
    velocoties = units.Quantity(np.random.randn(4, 10,3), units.angstroms/units.picosecond)

    with HDF5TrajectoryFile(temp, 'w') as f:
        # if you try to write coordinates that are unitted and not
        # in the correct units, we find that
        with pytest.raises(TypeError):
            f.write(coordinates=velocoties)
示例#19
0
def to_mdtraj_HDF5TrajectoryFile(item,
                                 atom_indices='all',
                                 structure_indices='all',
                                 check=True):

    if check:

        digest_item(item, 'file:h5')
        atom_indices = digest_atom_indices(atom_indices)
        structure_indices = digest_atom_indices(structure_indices)

    from mdtraj.formats import HDF5TrajectoryFile
    from ..mdtraj_HDF5TrajectoryFile import extract as extract_mdtraj_HDF5TrajectoryFile

    tmp_item = HDF5TrajectoryFile(item)
    tmp_item = extract_mdtraj_HDF5TrajectoryFile(
        tmp_item,
        atom_indices=atom_indices,
        structure_indices=structure_indices,
        copy_if_all=False,
        check=False)

    return tmp_item
示例#20
0
def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw):
    r"""
    Creates a (possibly featured) file reader by a number of input files and either a topology file or a featurizer.
    Parameters
    ----------
    :param input_files:
        A single input file or a list of input files.
    :param topology:
        A topology file. If given, the featurizer argument can be None.
    :param featurizer:
        A featurizer. If given, the topology file can be None.
    :param chunksize:
        The chunk size with which the corresponding reader gets initialized.
    :return: Returns the reader.
    """
    from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
    from pyemma.coordinates.data.py_csv_reader import PyCSVReader
    from pyemma.coordinates.data import FeatureReader
    from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader

    # fragmented trajectories
    if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and
            any(isinstance(item, (list, tuple)) for item in input_files)):
        return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer)

    # normal trajectories
    if (isinstance(input_files, string_types)
            or (isinstance(input_files, (list, tuple))
                and (any(isinstance(item, string_types) for item in input_files)
                     or len(input_files) is 0))):
        reader = None
        # check: if single string create a one-element list
        if isinstance(input_files, string_types):
            input_list = [input_files]
        elif len(input_files) > 0 and all(isinstance(item, string_types) for item in input_files):
            input_list = input_files
        else:
            if len(input_files) is 0:
                raise ValueError("The passed input list should not be empty.")
            else:
                raise ValueError("The passed list did not exclusively contain strings or was a list of lists "
                                 "(fragmented trajectory).")

        # TODO: this does not handle suffixes like .xyz.gz (rare)
        _, suffix = os.path.splitext(input_list[0])

        # check: do all files have the same file type? If not: raise ValueError.
        if all(item.endswith(suffix) for item in input_list):

            # do all the files exist? If not: Raise value error
            all_exist = True
            err_msg = ""
            for item in input_list:
                if not os.path.isfile(item):
                    err_msg += "\n" if len(err_msg) > 0 else ""
                    err_msg += "File %s did not exist or was no file" % item
                    all_exist = False
            if not all_exist:
                raise ValueError("Some of the given input files were directories"
                                 " or did not exist:\n%s" % err_msg)

            if all_exist:
                from mdtraj.formats.registry import FormatRegistry
                # we need to check for h5 first, because of mdtraj custom HDF5 traj format (which is deprecated).
                if suffix in ['.h5', '.hdf5']:
                    # TODO: inspect if it is a mdtraj h5 file, eg. has the given attributes
                    try:
                        from mdtraj.formats import HDF5TrajectoryFile
                        HDF5TrajectoryFile(input_list[0])
                        reader = FeatureReader(input_list, featurizer=featurizer, topologyfile=topology,
                                               chunksize=chunksize)
                    except:
                        from pyemma.coordinates.data.h5_reader import H5Reader
                        reader = H5Reader(filenames=input_files, chunk_size=chunksize, **kw)
                # CASE 1.1: file types are MD files
                elif suffix in FormatRegistry.loaders.keys():
                    # check: do we either have a featurizer or a topology file name? If not: raise ValueError.
                    # create a MD reader with file names and topology
                    if not featurizer and not topology:
                        raise ValueError("The input files were MD files which makes it mandatory to have either a "
                                         "featurizer or a topology file.")

                    reader = FeatureReader(input_list, featurizer=featurizer, topologyfile=topology,
                                           chunksize=chunksize)
                else:
                    if suffix in ['.npy', '.npz']:
                        reader = NumPyFileReader(input_list, chunksize=chunksize)
                    # otherwise we assume that given files are ascii tabulated data
                    else:
                        reader = PyCSVReader(input_list, chunksize=chunksize, **kw)
        else:
            raise ValueError("Not all elements in the input list were of the type %s!" % suffix)
    else:
        raise ValueError("Input \"%s\" was no string or list of strings." % input)
    return reader
示例#21
0
def test_reporter_subset():
    tempdir = os.path.join(dir, 'test2')
    os.makedirs(tempdir)

    pdb = PDBFile(get_fn('native2.pdb'))
    pdb.topology.setUnitCellDimensions([2, 2, 2])
    forcefield = ForceField('amber99sbildn.xml', 'amber99_obc.xml')
    system = forcefield.createSystem(pdb.topology,
                                     nonbondedMethod=CutoffPeriodic,
                                     nonbondedCutoff=1 * nanometers,
                                     constraints=HBonds,
                                     rigidWater=True)
    integrator = LangevinIntegrator(300 * kelvin, 1.0 / picoseconds,
                                    2.0 * femtoseconds)
    integrator.setConstraintTolerance(0.00001)

    platform = Platform.getPlatformByName('Reference')
    simulation = Simulation(pdb.topology, system, integrator, platform)
    simulation.context.setPositions(pdb.positions)

    simulation.context.setVelocitiesToTemperature(300 * kelvin)

    hdf5file = os.path.join(tempdir, 'traj.h5')
    ncfile = os.path.join(tempdir, 'traj.nc')
    dcdfile = os.path.join(tempdir, 'traj.dcd')

    atomSubset = [0, 1, 2, 4, 5]

    reporter = HDF5Reporter(hdf5file,
                            2,
                            coordinates=True,
                            time=True,
                            cell=True,
                            potentialEnergy=True,
                            kineticEnergy=True,
                            temperature=True,
                            velocities=True,
                            atomSubset=atomSubset)
    reporter2 = NetCDFReporter(ncfile,
                               2,
                               coordinates=True,
                               time=True,
                               cell=True,
                               atomSubset=atomSubset)
    reporter3 = DCDReporter(dcdfile, 2, atomSubset=atomSubset)

    simulation.reporters.append(reporter)
    simulation.reporters.append(reporter2)
    simulation.reporters.append(reporter3)
    simulation.step(100)

    reporter.close()
    reporter2.close()
    reporter3.close()

    t = md.load(get_fn('native.pdb'))
    t.restrict_atoms(atomSubset)

    with HDF5TrajectoryFile(hdf5file) as f:
        got = f.read()
        eq(got.temperature.shape, (50, ))
        eq(got.potentialEnergy.shape, (50, ))
        eq(got.kineticEnergy.shape, (50, ))
        eq(got.coordinates.shape, (50, len(atomSubset), 3))
        eq(got.velocities.shape, (50, len(atomSubset), 3))
        eq(got.cell_lengths, 2 * np.ones((50, 3)))
        eq(got.cell_angles, 90 * np.ones((50, 3)))
        eq(got.time, 0.002 * 2 * (1 + np.arange(50)))
        assert f.topology == md.load(get_fn('native.pdb'),
                                     atom_indices=atomSubset).topology

    with NetCDFTrajectoryFile(ncfile) as f:
        xyz, time, cell_lengths, cell_angles = f.read()
        eq(cell_lengths, 20 * np.ones((50, 3)))
        eq(cell_angles, 90 * np.ones((50, 3)))
        eq(time, 0.002 * 2 * (1 + np.arange(50)))
        eq(xyz.shape, (50, len(atomSubset), 3))

    hdf5_traj = md.load(hdf5file)
    dcd_traj = md.load(dcdfile, top=hdf5_traj)
    netcdf_traj = md.load(ncfile, top=hdf5_traj)

    # we don't have to convert units here, because md.load already handles
    # that
    eq(hdf5_traj.xyz, netcdf_traj.xyz)
    eq(hdf5_traj.unitcell_vectors, netcdf_traj.unitcell_vectors)
    eq(hdf5_traj.time, netcdf_traj.time)

    eq(dcd_traj.xyz, hdf5_traj.xyz)
    eq(dcd_traj.unitcell_vectors, hdf5_traj.unitcell_vectors)
示例#22
0
def test_dont_overwrite():
    with open(temp, 'w') as f:
        f.write('a')
    with pytest.raises(IOError):
        with HDF5TrajectoryFile(temp, 'w', force_overwrite=False) as f:
            f.write(np.random.randn(10,5,3))
示例#23
0
def test_do_overwrite():
    with open(temp, 'w') as f:
        f.write('a')

    with HDF5TrajectoryFile(temp, 'w', force_overwrite=True) as f:
        f.write(np.random.randn(10,5,3))
示例#24
0
def test_reporter():
    tempdir = os.path.join(dir, 'test1')
    os.makedirs(tempdir)

    pdb = PDBFile(get_fn('native.pdb'))
    forcefield = ForceField('amber99sbildn.xml', 'amber99_obc.xml')
    # NO PERIODIC BOUNARY CONDITIONS
    system = forcefield.createSystem(pdb.topology,
                                     nonbondedMethod=CutoffNonPeriodic,
                                     nonbondedCutoff=1.0 * nanometers,
                                     constraints=HBonds,
                                     rigidWater=True)
    integrator = LangevinIntegrator(300 * kelvin, 1.0 / picoseconds,
                                    2.0 * femtoseconds)
    integrator.setConstraintTolerance(0.00001)

    platform = Platform.getPlatformByName('Reference')
    simulation = Simulation(pdb.topology, system, integrator, platform)
    simulation.context.setPositions(pdb.positions)

    simulation.context.setVelocitiesToTemperature(300 * kelvin)

    hdf5file = os.path.join(tempdir, 'traj.h5')
    ncfile = os.path.join(tempdir, 'traj.nc')
    dcdfile = os.path.join(tempdir, 'traj.dcd')

    reporter = HDF5Reporter(hdf5file,
                            2,
                            coordinates=True,
                            time=True,
                            cell=True,
                            potentialEnergy=True,
                            kineticEnergy=True,
                            temperature=True,
                            velocities=True)
    reporter2 = NetCDFReporter(ncfile,
                               2,
                               coordinates=True,
                               time=True,
                               cell=True)
    reporter3 = DCDReporter(dcdfile, 2)

    simulation.reporters.append(reporter)
    simulation.reporters.append(reporter2)
    simulation.reporters.append(reporter3)
    simulation.step(100)

    reporter.close()
    reporter2.close()

    with HDF5TrajectoryFile(hdf5file) as f:
        got = f.read()
        yield lambda: eq(got.temperature.shape, (50, ))
        yield lambda: eq(got.potentialEnergy.shape, (50, ))
        yield lambda: eq(got.kineticEnergy.shape, (50, ))
        yield lambda: eq(got.coordinates.shape, (50, 22, 3))
        yield lambda: eq(got.velocities.shape, (50, 22, 3))
        yield lambda: eq(got.cell_lengths, None)
        yield lambda: eq(got.cell_angles, None)
        yield lambda: eq(got.time, 0.002 * 2 * (1 + np.arange(50)))
        yield lambda: f.topology == md.load(get_fn('native.pdb')).top

    with NetCDFTrajectoryFile(ncfile) as f:
        xyz, time, cell_lengths, cell_angles = f.read()
        yield lambda: eq(cell_lengths, None)
        yield lambda: eq(cell_angles, None)
        yield lambda: eq(time, 0.002 * 2 * (1 + np.arange(50)))

    hdf5_traj = md.load(hdf5file)
    dcd_traj = md.load(dcdfile, top=get_fn('native.pdb'))
    netcdf_traj = md.load(ncfile, top=get_fn('native.pdb'))

    # we don't have to convert units here, because md.load already
    # handles that
    assert hdf5_traj.unitcell_vectors is None
    yield lambda: eq(hdf5_traj.xyz, netcdf_traj.xyz)
    yield lambda: eq(hdf5_traj.unitcell_vectors, netcdf_traj.unitcell_vectors)
    yield lambda: eq(hdf5_traj.time, netcdf_traj.time)

    yield lambda: eq(dcd_traj.xyz, hdf5_traj.xyz)