Пример #1
0
def test_HDF5DatasetLoader_1():
    from mdtraj import io

    assert HDF5DatasetLoader.short_name == 'hdf5'

    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()
    try:
        os.chdir(dirname)

        # one file
        io.saveh('f1.h5', **{'test': np.zeros((10, 3))})
        loader = HDF5DatasetLoader('f1.h5', concat=False)
        X, y = loader.load()
        assert np.all(X == np.zeros((10, 3)))
        assert y is None

        # two files
        io.saveh('f2.h5', **{'test': np.ones((10, 3))})
        loader = HDF5DatasetLoader('f*.h5', concat=False)
        X, y = loader.load()
        assert isinstance(X, list)
        assert np.all(X[0] == np.zeros((10, 3)))
        assert np.all(X[1] == np.ones((10, 3)))
        assert y is None

        # concat and stride and y_col
        loader = HDF5DatasetLoader('f*.h5', y_col=2, stride=2, concat=True)
        X, y = loader.load()
        assert X.shape[0] == 10 and X.shape[1] == 2
        assert y.shape[0] == 10

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Пример #2
0
    def save(self, filename):
        """
        Save the results and everything needed to use this object again.

        Parameters
        ----------
        filename : str
            filename to save the data to. Will use mdtraj.io.saveh 

        Returns
        -------
        filename : str
            the same filename in case you want it back.
        """

        kwargs = {}
        kwargs['regularizer'] = np.array([pickle.dumps(self.regularizer)])
        kwargs['eta'] = np.array([self.eta])

        print 'has_solution?', self._has_solution
        if self._has_solution:
            kwargs['sol'] = self.v

        io.saveh(filename, **kwargs)

        return filename
Пример #3
0
def test_HDF5DatasetLoader_1():
    from mdtraj import io

    assert HDF5DatasetLoader.short_name == 'hdf5'

    cwd = os.path.abspath(os.curdir)
    dirname = tempfile.mkdtemp()
    try:
        os.chdir(dirname)

        # one file
        io.saveh('f1.h5', **{'test': np.zeros((10, 3))})
        loader = HDF5DatasetLoader('f1.h5', concat=False)
        X, y = loader.load()
        assert np.all(X == np.zeros((10, 3)))
        assert y is None

        # two files
        io.saveh('f2.h5', **{'test': np.ones((10, 3))})
        loader = HDF5DatasetLoader('f*.h5', concat=False)
        X, y = loader.load()
        assert isinstance(X, list)
        assert np.all(X[0] == np.zeros((10, 3)))
        assert np.all(X[1] == np.ones((10, 3)))
        assert y is None

        # concat and stride and y_col
        loader = HDF5DatasetLoader('f*.h5', y_col=2, stride=2, concat=True)
        X, y = loader.load()
        assert X.shape[0] == 10 and X.shape[1] == 2
        assert y.shape[0] == 10

    finally:
        os.chdir(cwd)
        shutil.rmtree(dirname)
Пример #4
0
    def files_to_shotset(cls, list_of_cbf_files, shotset_filename=None,
                         autocenter=True):
        """
        Convert a bunch of CBF files to a single ODIN shotset instance. If you 
        write the shotset immediately to disk, does this in a smart "lazy" way 
        so as to preseve memory.
        
        Parameters
        ----------
        list_of_cbf_files : list of str
            A list of paths to CBF files to convert.
        
        Optional Parameters
        -------------------
        shotset_filename : str
            The filename of the shotset to write to disk.
            
        autocenter : bool
            Whether or not to automatically determine the center of the detector.
            
        Returns
        -------
        ss : odin.xray.Shotset
            If `shotset_filename` is None, then returns the shotset object
        """
        
        # convert one CBF, and use it to get the detector, etc info
        seed_shot = cls(list_of_cbf_files[0], autocenter=autocenter).as_shotset()
        
        if shotset_filename:
            logger.info('writing CBF files straight to disk at: %s' % shotset_filename)
            
            seed_shot.save(shotset_filename)
            
            # now open a handle to that h5 file and add to it
            for i,fn in enumerate(list_of_cbf_files[1:]):
                 
                # i+1 b/c we already saved one shot
                d = {('shot%d' % (i+1,)) : cls(fn, autocenter=False).intensities.flatten()}
                io.saveh( shotset_filename, **d )
                
            io.saveh( shotset_filename, num_shots=np.array([ len(list_of_cbf_files) ]) )
            logger.info('Combined CBF data into: %s' % shotset_filename)
            return

        else:
            shot_i = np.zeros(( len(list_of_cbf_files), seed_shot.intensities.shape[1] ))
            shot_i[0,:] = seed_shot.intensities.flatten()
            
            for i,fn in enumerate(list_of_cbf_files[1:]):
                x = cls(fn, autocenter=False).intensities.flatten()
                if not len(x) == shot_i.shape[1]:
                    raise ValueError('Variable number of pixels in shots!')
                shot_i[i+1,:] = x
            
            ss = xray.Shotset( shot_i, seed_shot.detector, seed_shot.mask )

            return ss
Пример #5
0
    def test_RaggedArray_load_specific_h5_arrays(self):

        src = np.array(range(55))
        a = ra.RaggedArray(array=src, lengths=[25, 30])

        with tempfile.NamedTemporaryFile(suffix='.h5') as f:
            io.saveh(f.name, key0=a[0], key1=a[1])
            b = ra.load(f.name, keys=['key1'])

        assert_array_equal(a[1], b[0])
Пример #6
0
def project(n_parms, tica_evs, tica_lag):
    ref1 = np.loadtxt('selected_frames/%s.txt' % parms[0])
    d = np.zeros((len(ref1), n_parms))
    for p in range(n_parms):
        data = np.loadtxt('selected_frames/%s.txt' % (parms[p]))
        d[:, p] = data
    proj = np.dot(d, tica_evs.T)
    io.saveh('selected_frames/selected_frames_on_tica_l%d.h5' % (tica_lag),
             proj)
    return proj
Пример #7
0
    def save(self, output_fn):
        """
        save results to a .h5 file
        """
    
        kernel_str = pickle.dumps(self.kernel)

        io.saveh(output_fn, vals=self.vals,
            betas=self.betas, K=self.K, 
            Ku=self.Ku, eta=np.array([self.eta]),
            Xtrain=self._Xtrain, dt=np.array([self.dt]),
            kernel_str=np.array([kernel_str]))
Пример #8
0
def save_maps(rs_map, savepath):
    """
    Save maps as separate keys in .h5 file format so that downstream loading
    isn't problematic.
    """

    for i, item in enumerate(rs_map):
        name = 'n%s' % i
        data = {name : item}
        io.saveh(savepath + ".h5", **data)

    return
Пример #9
0
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
def entry_point():
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #11
0
    def save(self, output_fn):
        """
        save results to a .h5 file
        """
    
        kernel_str = pickle.dumps(self.kernel)

        io.saveh(output_fn, vals=self.vals,
            vecs=self.vecs, K=self.K, 
            K_uncentered=self.K_uncentered, 
            reg_factor=np.array([self.reg_factor]),
            M=self.M, a=self.a, a_mean=self.a_mean,
            a_stdev=self.a_stdev, 
            kernel_str=np.array([kernel_str]))
Пример #12
0
def test_overwrite_2():
    fid, fn = tempfile.mkstemp()
    try:
        a = np.arange(10)
        b = a + 1
        io.saveh(fn, a=a)
        io.saveh(fn, a=b)
        eq(io.loadh(fn, 'a'), b)
    except:
        raise
    finally:
        if os.path.exists(fn):
            os.close(fid)
            os.unlink(fn)
Пример #13
0
    def save(self, output_fn):
        """
        save results to a .h5 file
        """
    
        kernel_str = pickle.dumps(self.kernel)

        io.saveh(output_fn, vals=self.vals,
            vecs=self.vecs, K=self.K, 
            K_uncentered=self.K_uncentered, 
            reg_factor=np.array([self.reg_factor]),
            traj=self._Xall, dt=np.array([self.dt]),
            normalized=np.array([self._normalized]), 
            kernel_str=np.array([kernel_str]))
Пример #14
0
 def save(self, output):
     """
     save the results to file
     
     Parameters:
     -----------
     output : str
         output filename (.h5)
     """
     
     metric_string = cPickle.dumps(self.prep_metric)  # Serialize metric used to calculate tICA input.
     
     io.saveh(output, timelag_corr_mat=self.timelag_corr_mat,
         cov_mat=self.cov_mat, lag=np.array([self.lag]), vals=self.vals,
         vecs=self.vecs, metric_string=np.array([metric_string]))
Пример #15
0
def project(start_traj,end_traj,n_parms,tica,tica_lag):
    dataset = []
    if not os.path.exists('analysis/tica_projections') : 
        os.system('mkdir analysis/tica_projections')
    for i in range(start_traj,end_traj+1):
	ref1 = np.loadtxt('analysis/%d/analysis/parameters/%s.txt' %(i,parms[0]))
    	d = np.zeros((len(ref1),n_parms))
    	for p in range(n_parms):
	    data = np.loadtxt('analysis/%d/analysis/parameters/%s.txt' %(i,parms[p]))
	    d[:,p] = data
        proj = np.dot(d,tica['components'].T)
        io.saveh('analysis/tica_projections/traj%d_on_tica_l%d.h5' %(i,tica_lag), proj)
    	print "\tsaved projected trajectory %d at folder 'analysis/tica_projections' " %i
        dataset.append(proj)
    return dataset
Пример #16
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance')
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(
        k, d, args.hierarchical_clustering_zmatrix, args.stride, project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Пример #18
0
def entry_point():
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance'
        )
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(k, d, args.hierarchical_clustering_zmatrix, args.stride,
                       project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Пример #19
0
    def save_to_disk(self, filename):
        """Save this clusterer to disk.

        This is useful because computing the Z-matrix
        (done in __init__) is the most expensive part, and assigning is cheap

        Parameters
        ----------
        filename : str
            location to save to

        Raises
        ------
        Exception if something already exists at `filename`
        """
        io.saveh(filename, z_matrix=self.Z, traj_lengths=self.traj_lengths)
Пример #20
0
def entry_point():
    args, metric = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    project = Project.load_from(args.project)
    pdb = md.load(args.pdb)
    if args.traj_fn.lower() == 'all':
        traj_fn = None
    else:
        traj_fn = args.traj_fn

    distances = run(project, pdb, metric, traj_fn)

    io.saveh(args.output, distances)
    logger.info('Saved to %s', args.output)
Пример #21
0
def entry_point():
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
        distances = io.loadh(args.distances, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
        distances = io.loadh(args.distances, 'Data')

    trimmed = run(assignments, distances, args.rmsd_cutoff)

    io.saveh(args.output, trimmed)
    logger.info('Saved output to %s', args.output)
Пример #22
0
    def save(self, output):
        """
        save the results to file

        Parameters:
        -----------
        output : str
            output filename (.h5)
        """

        # Serialize metric used to calculate tICA input.
        metric_string = cPickle.dumps(self.prep_metric)

        io.saveh(output, timelag_corr_mat=self.timelag_corr_mat,
                 cov_mat=self.cov_mat, lag=np.array([self.lag]), vals=self.vals,
                 vecs=self.vecs, metric_string=np.array([metric_string]))
Пример #23
0
    def _save_masks(self, rs_mask):
        """ 
        Save mask in h5 format, with each key corresponding to a separate image. Currently
        stored in a temporary directory since arrays for the same image from different batches
        must still be compiled.
        """

        output_dir = self.system['map_path'] + "temp/"
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        for i, item in enumerate(rs_mask):
            name = 'arr_%s' % i
            data = {name : item}
            io.saveh(output_dir + "masks_b%s.h5" %self.nbatch, **data)

        return
Пример #24
0
 def setup(self):
     
     self.q_values = np.array([1.0, 2.0])
     self.num_phi  = 360
     self.l = 50.0
     self.d = xray.Detector.generic(spacing=0.4, l=self.l)
     self.t = trajectory.load(ref_file('ala2.pdb'))
     
     self.num_shots = 2
     intensities = np.abs(np.random.randn(self.num_shots, self.d.num_pixels))
     io.saveh('tmp_tables.h5', data=intensities)
     
     self.tables_file = tables.File('tmp_tables.h5')
     self.i = self.tables_file.root.data
     
     self.shot = xray.Shotset(self.i, self.d)
     
     return
Пример #25
0
 def setup(self):
     
     self.q_values = np.array([1.0, 2.0])
     self.num_phi  = 360
     self.l = 50.0
     self.d = xray.Detector.generic(spacing=0.4, l=self.l)
     self.t = Trajectory.load(ref_file('ala2.pdb'))
     
     self.num_shots = 2
     intensities = np.abs(np.random.randn(self.num_shots, self.d.num_pixels))
     io.saveh('tmp_tables.h5', data=intensities)
     
     self.tables_file = tables.File('tmp_tables.h5')
     self.i = self.tables_file.root.data
     
     self.shot = xray.Shotset(self.i, self.d)
     
     return
Пример #26
0
    def start(self):
        import pickle
        from mdtraj import io
        from glob import glob
        import numpy as np

        featurizer = np.load(self.args.featurizer)
        topology = featurizer.reference_traj
        filenames = [fn for t in self.args.trajectories for fn in glob(t)]

        X, indices, fns = featurize_all(filenames, featurizer, topology)
        y = self.model.fit_transform(X)

        io.saveh(
            self.args.out, X=y, indices=indices, fns=fns,
            labels=np.array(self.labels),
            featurizer=np.array([pickle.dumps(featurizer)]))
        print('Projection saved: %s' % self.args.out)
Пример #27
0
 def save(self, output_name, txt=False, txt_fmt='%d %d %d %f'):
     if txt:
         x1_coords_flat = self.x1_coords.flatten()
         x2_coords_flat = self.x2_coords.flatten()
         values_flat = self.values.flatten()
         states = np.arange(len(x1_coords_flat))
         output_data = np.array(
             list(zip(states, x1_coords_flat, x2_coords_flat, values_flat)))
         np.savetxt(output_name,
                    output_data,
                    fmt=txt_fmt,
                    header='state x1 x2 energy')
     else:
         output_dict = {
             'x1_coords': self.x1_coords,
             'x2_coords': self.x2_coords,
             'landscape': self.values
         }
         io.saveh(output_name, **output_dict)
Пример #28
0
    def test_RaggedArray_load_h5_arrays(self):
        src = np.array(range(55))
        a = ra.RaggedArray(array=src, lengths=[25, 30])

        with tempfile.NamedTemporaryFile(suffix='.h5') as f:
            io.saveh(f.name, key0=a[0], key1=a[1])

            b = ra.load(f.name, keys=['key0', 'key1'])

        assert_ra_equal(a, b)

        src = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                        [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]]).T

        a = ra.RaggedArray(array=src, lengths=[4, 6])

        with tempfile.NamedTemporaryFile(suffix='.h5') as f:
            io.saveh(f.name, key0=a[0], key1=a[1])
            b = ra.load(f.name, keys=['key0', 'key1'])

        assert_ra_equal(a, b)
Пример #29
0
    def start(self):
        import pickle
        import mdtraj as md
        from mdtraj import io
        from glob import glob
        import numpy as np

        featurizer = np.load(self.args.featurizer)
        topology = md.load(self.args.top)
        filenames = [fn for t in self.args.trajectories for fn in glob(t)]

        X, indices, fns = featurize_all(filenames, featurizer, topology)
        y = self.model.fit_transform([X])
        fns = np.array([fn.encode('utf-8') for fn in fns])

        io.saveh(
            self.args.out, X=y[0], indices=indices, fns=fns,
            labels=np.array(self.labels),
            topology = np.array([pickle.dumps(topology)]),
            featurizer=np.array([pickle.dumps(featurizer)]))
        print('Projection saved: %s' % self.args.out)
Пример #30
0
def save(output_name, ragged_array):
    """Save a RaggedArray or numpy ndarray to disk as an HDF5 file.

    Parameters
    ----------
    output_name : str
        Path of file to write out.
    ragged_array : np.ndarray, RaggedArray
        Array to write to disk.

    See Also
    --------
    mdtraj.io.saveh
    """

    try:
        io.saveh(output_name,
                 array=ragged_array._data,
                 lengths=ragged_array.lengths)
    except AttributeError:
        # A TypeError results when the input is actually an ndarray
        io.saveh(output_name, ragged_array)
Пример #31
0
def _save_old_style(output_name, ragged_array):
    """Depricated en bloc RaggedArray saving routine.

    Parameters
    ----------
    output_name : str
        Path of file to write out.
    ragged_array : np.ndarray, RaggedArray
        Array to write to disk.

    See Also
    --------
    mdtraj.io.saveh
    """

    try:
        io.saveh(output_name,
                 array=ragged_array._data,
                 lengths=ragged_array.lengths)
    except AttributeError:
        # A TypeError results when the input is actually an ndarray
        io.saveh(output_name, ragged_array)
Пример #32
0
def compile_masks(system):
    """
    Compile per-batch Bragg masks into a composite file.
    """

    #n_images = system['batch_size'] * system['n_batch']
    n_images = len(system['img2batch'])
    dtc_size = system['shape'][0] * system['shape'][1]
    comb_mask = np.zeros((n_images, dtc_size), dtype=np.uint8)

    # combine all temp files 
    for batch in range(int(system['n_batch'])):
        print "on batch %i" %batch
        for img in range(int(n_images)):
            comb_mask[img] += io.loadh(system['map_path'] + "temp/masks_b%s.h5" %batch, "arr_%i" %img)

    print "saving combined mask"
    for i, item in enumerate(comb_mask):
        name = 'arr_%s' % i
        data = {name : item}
        io.saveh(system['map_path'] + "combined_braggmasks.h5", **data)

    return 
Пример #33
0
    def start(self):
        import pickle
        import mdtraj as md
        from mdtraj import io
        from glob import glob
        import numpy as np

        featurizer = np.load(self.args.featurizer)
        topology = md.load(self.args.top)
        filenames = [fn for t in self.args.trajectories for fn in glob(t)]

        X, indices, fns = featurize_all(filenames, featurizer, topology)
        y = self.model.fit_transform([X])
        fns = np.array([fn.encode('utf-8') for fn in fns])

        io.saveh(self.args.out,
                 X=y[0],
                 indices=indices,
                 fns=fns,
                 labels=np.array(self.labels),
                 topology=np.array([pickle.dumps(topology)]),
                 featurizer=np.array([pickle.dumps(featurizer)]))
        print('Projection saved: %s' % self.args.out)
Пример #34
0
def save(outdir, traj_lengths, stride, n_real_atoms,
         centers, assignments, distances, scores, times):

    assignments = reshape_for_output(assignments, np.int, traj_lengths, stride)
    distances = reshape_for_output(distances, np.float, traj_lengths, stride)

    centers = centers.swapaxes(1,2)
    centers = centers[:, 0:n_real_atoms, :]

    os.makedirs(outdir)
    log('saving results to %s/' % outdir)
    io.saveh(os.path.join(outdir, 'centers.h5'), XYZList=centers)
    io.saveh(os.path.join(outdir, 'Assignments.h5'), assignments)
    io.saveh(os.path.join(outdir, 'Assignments.h5.distances'), distances)
    if len(scores) > 0 and len(times) > 0:
        io.saveh(os.path.join(outdir, 'convergence.h5'), scores=scores, times=times)
Пример #35
0
 def save_container(filename, dtype):
     io.saveh(
         filename,
         arr_0=-1 * np.ones(
             (project.n_trajs, np.max(project.traj_lengths)), dtype=dtype),
         completed_trajs=np.zeros((project.n_trajs), dtype=np.bool))
Пример #36
0
##############################################################################
# Code
##############################################################################

def main(k, d, zmatrix_fn, stride, project):
    hierarchical = Hierarchical.load_from_disk(zmatrix_fn)
    assignments = hierarchical.get_assignments(k=k, cutoff_distance=d)

    new_assignments = np.ones(
        (project.n_trajs, project.traj_lengths.max()), dtype=np.int) * -1
    new_assignments[:, ::stride] = assignments

    return new_assignments

if __name__ == "__main__":
    args = parser.parse_args()
    k = int(args.num_states) if args.num_states != 'none' else None
    d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None
    arglib.die_if_path_exists(args.assignments)
    if k is None and d is None:
        logger.error(
            'You need to supply either a number of states or a cutoff distance')
        sys.exit(1)

    project = Project.load_from(args.project)
    assignments = main(
        k, d, args.hierarchical_clustering_zmatrix, args.stride, project)
    io.saveh(args.assignments, assignments)
    logger.info('Saved assignments to %s', args.assignments)
Пример #37
0
def main(args, metric):

    if args.alg == 'sclarans' and args.stride != 1:
        logger.error(
            """You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended.""")
        sys.exit(1)

    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None  # probably bad...
        logger.info('RMSD metric - loading only the atom indices required')
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data
    if args.alg == 'hierarchical':
        zmatrix_fn = os.path.join(args.output_dir, 'ZMatrix.h5')
        die_if_path_exists(zmatrix_fn)
        extra_kwargs['zmatrix_fn'] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, 'Gens.h5')
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, 'Assignments.h5')
            distances_fn = os.path.join(args.output_dir,
                                        'Assignments.h5.distances')
            die_if_path_exists([assignments_fn, distances_fn])

    project = Project.load_from(args.project)

    if isinstance(metric,
                  metrics.Vectorized) and not args.alg == 'hierarchical':
        # if the metric is vectorized then
        # we can load prepared trajectories
        # which may allow for better memory
        # efficiency
        ptrajs, which = load_prep_trajectories(project, args.stride,
                                               atom_indices, metric)
        trajectories = None
        n_trajs = len(ptrajs)

        num_frames = np.sum([len(p) for p in ptrajs])
        if num_frames != len(which):
            raise Exception("something went wrong in loading step (%d v %d)" %
                            (num_frames, len(which)))
    else:
        trajectories = load_trajectories(project, args.stride, atom_indices)
        ptrajs = None
        which = None
        n_trajs = len(trajectories)

    logger.info('Loaded %d trajs', n_trajs)

    clusterer = cluster(metric, trajectories, ptrajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):

        if isinstance(metric, metrics.Vectorized):
            gen_inds = clusterer.get_generator_indices()
            generators = project.load_frame(which[gen_inds, 0], which[gen_inds,
                                                                      1])
        else:
            generators = clusterer.get_generators_as_traj()

        logger.info('Saving %s', generators_fn)
        generators.save(generators_fn)

        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()

            logger.info('Since stride=1, Saving %s', assignments_fn)
            logger.info('Since stride=1, Saving %s', distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
Пример #38
0
def compute_crysol(trajectory, save_to=None):
    """
    Compute crysol for all the snapshots in an msmbuilder trajectory.
   
    Parameters
    ----------
    trajectory : msmbulder.Trajectory.trajectory
        The trajectory to compute SAXS for

    save_to : str or None
        If this is a string, will save to an h5 file of that name.    

    Returns
    -------
    q_values : np.ndarray
        The q_values at which the scattering was computed, in ()

    scattering_pred : np.ndarray
        The estimated integrated intensity for each `q_value`
    """

    setup_tmp_dir()

    if type(trajectory) == str:
        trajectory = Trajectory.load_trajectory_file(trajectory)

    os.chdir(TEMPDIR)
    scattering_pred = None

    for i in range(len(trajectory)):

        frame = trajectory[i]

        pdbfn = '%s/tmp4crysol.pdb' % TEMPDIR
        frame.save_to_pdb(pdbfn)

        # run crysol comand line
        args = ['/%s %s' % kv for kv in crysol_params.items()]
        cmd = ['crysol', pdbfn] + args
        print cmd
        subprocess.check_call(' '.join(cmd), shell=True, stdout=DEVNULL, stderr=DEVNULL)

        # parse the output
        intensities_output = 'tmp4crysol00.int'
        if not os.path.exists(intensities_output):
             raise IOError('crysol output not found')

        d = np.genfromtxt(intensities_output, skip_header=1)
        q_values = d[:,0]

        # initialize output space
        if scattering_pred == None:
            scattering_pred = np.zeros((len(trajectory), d.shape[0]))

        scattering_pred[i,:] = d[:,3]

        os.remove(pdbfn)
        os.remove(intensities_output)
        os.remove('tmp4crysol00.alm')
        os.remove('tmp4crysol00.log')


    if save_to:
        io.saveh(save_to, q_values=q_values, saxs=scattering_pred)
        print "Saved: %s" % save_to
        return
    else:
        return q_values, scattering_pred
Пример #39
0
def ConvertDihedralsToArray(phi, psi):
    HCarray = np.zeros((phi.shape))
    for i in range(len(phi)):
        for j in range(len(phi[i])):
            if is_helical_peptide(phi[i, j], psi[i, j]):
                HCarray[i][j] = 1
            else:
                HCarray[i][j] = 0
    return HCarray


def count_n_helices(HCarray):

    return HCarray.sum(1)


project = Project.load_from(args.Project)
assignments = -1 * np.ones((project.n_trajs, max(project.traj_lengths)))

for trajid in range(project.n_trajs):
    print "Working on: %s" % project.traj_filename(trajid)
    traj = project.load_traj(trajid)
    phi = mdtraj.compute_phi(traj)[1] * 360 / (2 * np.pi)
    psi = mdtraj.compute_psi(traj)[1] * 360 / (2 * np.pi)
    HCarray = ConvertDihedralsToArray(phi, psi)
    assignments[trajid][:traj.n_frames] = count_n_helices(HCarray)
    assignments = assignments.astype(int)

io.saveh(args.Output, assignments)
Пример #40
0
    for i in range(k):
        transmat[i, :] =  countsmat[i, :] / np.sum(countsmat[i, :])

    eigs = np.sort(np.real(np.linalg.eigvals(transmat)))
    timescales =  -lag_time / np.log(eigs)
    return timescales

trajectories = []
for i in range(10):
    fn = 'trajectory-%d.h5' % i
    if os.path.exists(fn):
        print 'loading %s' % fn
        trajectories.append(io.loadh(fn)['arr_0'])
    else:
        x = propagate(5e5)
        io.saveh(fn, x)
        print 'saving %s' % fn
        trajectories.append(x)

def msm_timescale(trajectories, lag_times, n_states=2, discretization='grid'):
    all_timescales = np.zeros((len(trajectories), len(lag_times)))
    for i, x in enumerate(trajectories):
        all_timescales[i] = [msm_solution(x, n_states, lag_time, discretization=discretization)[-2] for lag_time in lag_times]
    return  np.mean(all_timescales, axis=0), np.std(all_timescales, axis=0) / np.sqrt(len(trajectories))

def ghmm_timescale(trajectories, lag_times, n_states=2):
    all_timescales = np.zeros((len(trajectories), len(lag_times)))
    for i, x in enumerate(trajectories):
        all_timescales[i] = [GaussianFusionHMM(n_states, n_features=1, fusion_prior=0).fit([x[::l].reshape(-1,1)]).timescales_()[-1]*l for l in lag_times]
    return  np.mean(all_timescales, axis=0), np.std(all_timescales, axis=0) / np.sqrt(len(trajectories))
Пример #41
0
parser.add_argument('rmsd_cutoff', help="""distance value at which to trim,
    in. Data further than this value to its generator will be
    discarded. Note: this is measured with whatever distance metric you used to cluster""", type=float)
parser.add_argument('output', default='Data/Assignments.Trimmed.h5')


def run(assignments, distances, cutoff):
    number = np.count_nonzero(distances > cutoff)
    logger.info('Discarding %d assignments', number)

    assignments[np.where(distances > cutoff)] = -1
    return assignments


if __name__ == "__main__":
    args = parser.parse_args()

    arglib.die_if_path_exists(args.output)

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
        distances = io.loadh(args.distances, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
        distances = io.loadh(args.distances, 'Data')

    trimmed = run(assignments, distances, args.rmsd_cutoff)

    io.saveh(args.output, trimmed)
    logger.info('Saved output to %s', args.output)
Пример #42
0
            traj_asa = []
            logger.info("Working on Trajectory %d", traj_ind)
            traj_fn = project.traj_filename(traj_ind)
            chunk_ind = 0
            for traj_chunk in md.iterload(traj_fn, atom_indices=atom_indices, chunk=1000):
                traj_asa.extend(md.shrake_rupley(traj_chunk))
                chunk_ind += 1
            SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa

    else:
        traj_asa = []
        for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ):
            traj_asa.extend( asa.calculate_asa( traj_chunk ) )

        SASA = np.array(traj_asa)

    return SASA

if __name__ == '__main__':
    args = parser.parse_args()
    arglib.die_if_path_exists(args.output)

    if args.atom_indices.lower() == 'all':
        atom_indices = None
    else:
        atom_indices = np.loadtxt(args.atom_indices).astype(int)

    project = Project.load_from(args.project)
    SASA = run(project, atom_indices, args.traj_fn)
    io.saveh(args.output, SASA)
Пример #43
0
pairs = []
for i in range(len(listt)):
    for j in range(i + 1, len(listt)):
        pairs.append([listt[i], listt[j]])
pairs = np.array(pairs)
print "len(pairs):", len(pairs)

# stage 1
for i in range(4):
    traj = md.load('../md_files/stage1_xtc/protein_%d.xtc' % i, top=ref)
    print "stage1: traj, xyz.shape:", i, traj.xyz.shape
    d = md.compute_contacts(traj,
                            contacts=pairs,
                            scheme='closest-heavy',
                            ignore_nonprotein=True)
    io.saveh('s1_%d.h5' % i, distances=d[0])
    io.saveh('s1_%d.h5' % i, residue_pairs=d[1])

# stage 2
for i in range(20):
    traj = md.load('../md_files/stage2_xtc/protein_%d.xtc' % i, top=ref)
    print "stage2: traj, xyz.shape:", i, traj.xyz.shape
    d = md.compute_contacts(traj,
                            contacts=pairs,
                            scheme='closest-heavy',
                            ignore_nonprotein=True)
    io.saveh('s1_%d.h5' % i, distances=d[0])
    io.saveh('s1_%d.h5' % i, residue_pairs=d[1])

# stage 3
for i in range(20):
Пример #44
0
            cc_aniso[counter] = model_utils.mweighted_cc(pred_aniso.copy(), exp_map.copy(), mult = mult)

            print "gamma: %.2f, sigma: %.2f, CC: %.4f" %(gamma, sigma, cc_aniso[counter])
            counter += 1

    cc_aniso = cc_aniso.reshape(len(gamma_range), len(sigma_range))
    return cc_aniso

if __name__ == '__main__':

    start = time.time()
    #sigma_range, gamma_range = np.arange(0.05, 1.55, 0.05), np.arange(3.0, 93.0, 3.0)
    sigma_range, gamma_range = np.arange(0.5, 0.61, 0.01), np.arange(12.0, 21.0)
    
    # load system and generate symmetry information
    system = pickle.load(open(sys.argv[1], "rb"))
    symm_ops = pickle.load(open("reference/symm_ops.pickle", "rb"))[sys.argv[2]]
    symm_idx, grid, mult = model_utils.generate_symmates(symm_ops, system, laue=False)

    # load molecular transform and experimental maps
    transform = np.load(sys.argv[3])
    experimental = np.load(sys.argv[4])

    # scan across sigma and gamma ranges; save mesh and cc_aniso to same .h5 file
    cc_aniso = scan(system, transform, experimental, sigma_range, gamma_range, sys.argv[5], mult)
    io.saveh(sys.argv[6] + "/%s.h5" %(sys.argv[5]), sigmas = sigma_range)
    io.saveh(sys.argv[6] + "/%s.h5" %(sys.argv[5]), gammas = gamma_range)
    io.saveh(sys.argv[6] + "/%s.h5" %(sys.argv[5]), cc = cc_aniso)

    print "elapsed time is %.3f" %((time.time() - start)/60.0)
Пример #45
0
import os, sys
from msmbuilder import Project
import mdtraj as md
from mdtraj import io
import numpy as np

project = Project.load_from("ProjectInfo-RRR.yaml")
Rgs = -1 * np.ones((project.n_trajs, max(project.traj_lengths)))

for i in range(project.n_trajs):
    t = project.load_traj(i)
    rg = md.compute_rg(t)
    Rgs[i][:len(rg)] = rg

io.saveh('Rgs-RRR.h5', Rgs)
Пример #46
0
    def files_to_shotset(cls,
                         list_of_cbf_files,
                         shotset_filename=None,
                         autocenter=True):
        """
        Convert a bunch of CBF files to a single ODIN shotset instance. If you 
        write the shotset immediately to disk, does this in a smart "lazy" way 
        so as to preseve memory.
        
        Parameters
        ----------
        list_of_cbf_files : list of str
            A list of paths to CBF files to convert.
        
        Optional Parameters
        -------------------
        shotset_filename : str
            The filename of the shotset to write to disk.
            
        autocenter : bool
            Whether or not to automatically determine the center of the detector.
            
        Returns
        -------
        ss : odin.xray.Shotset
            If `shotset_filename` is None, then returns the shotset object
        """

        # convert one CBF, and use it to get the detector, etc info
        seed_shot = cls(list_of_cbf_files[0],
                        autocenter=autocenter).as_shotset()

        if shotset_filename:
            logger.info('writing CBF files straight to disk at: %s' %
                        shotset_filename)

            seed_shot.save(shotset_filename)

            # now open a handle to that h5 file and add to it
            for i, fn in enumerate(list_of_cbf_files[1:]):

                # i+1 b/c we already saved one shot
                d = {
                    ('shot%d' % (i + 1, )):
                    cls(fn, autocenter=False).intensities.flatten()
                }
                io.saveh(shotset_filename, **d)

            io.saveh(shotset_filename,
                     num_shots=np.array([len(list_of_cbf_files)]))
            logger.info('Combined CBF data into: %s' % shotset_filename)
            return

        else:
            shot_i = np.zeros(
                (len(list_of_cbf_files), seed_shot.intensities.shape[1]))
            shot_i[0, :] = seed_shot.intensities.flatten()

            for i, fn in enumerate(list_of_cbf_files[1:]):
                x = cls(fn, autocenter=False).intensities.flatten()
                if not len(x) == shot_i.shape[1]:
                    raise ValueError('Variable number of pixels in shots!')
                shot_i[i + 1, :] = x

            ss = xray.Shotset(shot_i, seed_shot.detector, seed_shot.mask)

            return ss
Пример #47
0
def main(args, metric):
    
    if args.alg == 'sclarans' and args.stride != 1:
        logger.error("""You don't want to use a stride with sclarans. The whole point of
sclarans is to use a shrink multiple to accomplish the same purpose, but in parallel with
stochastic subsampling. If you cant fit all your frames into  memory at the same time, maybe you
could stride a little at the begining, but its not recommended.""")
        sys.exit(1)
        
    # if we have a metric that explicitly operates on a subset of indices,
    # then we provide the option to only load those indices into memory
    # WARNING: I also do something a bit dirty, and inject `None` for the
    # RMSD.atomindices to get the metric to not splice
    if isinstance(metric, metrics.RMSD):
        atom_indices = metric.atomindices
        metric.atomindices = None # probably bad...
        logger.info('RMSD metric - loading only the atom indices required')
    else:
        atom_indices = None

    # In case the clustering / algorithm needs extra arguments, use
    # this dictionary
    extra_kwargs = {}

    # Check to be sure we won't overwrite any data 
    if args.alg == 'hierarchical':
        zmatrix_fn = os.path.join(args.output_dir, 'ZMatrix.h5')
        die_if_path_exists(zmatrix_fn)
        extra_kwargs['zmatrix_fn'] = zmatrix_fn
    else:
        generators_fn = os.path.join(args.output_dir, 'Gens.h5') 
        die_if_path_exists(generators_fn)
        if args.stride == 1:
            assignments_fn = os.path.join(args.output_dir, 'Assignments.h5') 
            distances_fn = os.path.join(args.output_dir, 'Assignments.h5.distances')
            die_if_path_exists([assignments_fn, distances_fn])
        
    project = Project.load_from(args.project)

    if isinstance(metric, metrics.Vectorized) and not args.alg == 'hierarchical': 
        # if the metric is vectorized then
        # we can load prepared trajectories 
        # which may allow for better memory
        # efficiency
        ptrajs, which = load_prep_trajectories(project, args.stride, atom_indices, metric)
        trajectories = None
        n_trajs = len(ptrajs)

        num_frames = np.sum([len(p) for p in ptrajs])
        if num_frames != len(which):
            raise Exception("something went wrong in loading step (%d v %d)" % (num_frames, len(which)))
    else:
        trajectories = load_trajectories(project, args.stride, atom_indices)       
        ptrajs = None
        which = None
        n_trajs = len(trajectories)

    logger.info('Loaded %d trajs', n_trajs)

    clusterer = cluster(metric, trajectories, ptrajs, args, **extra_kwargs)

    if not isinstance(clusterer, clustering.Hierarchical):

        if isinstance(metric, metrics.Vectorized):
            gen_inds = clusterer.get_generator_indices()
            generators = project.load_frame(which[gen_inds,0], which[gen_inds,1])
        else:
            generators = clusterer.get_generators_as_traj()
        
        logger.info('Saving %s', generators_fn)
        generators.save(generators_fn)

        if args.stride == 1:
            assignments = clusterer.get_assignments()
            distances = clusterer.get_distances()
            
            logger.info('Since stride=1, Saving %s', assignments_fn)
            logger.info('Since stride=1, Saving %s', distances_fn)
            io.saveh(assignments_fn, assignments)
            io.saveh(distances_fn, distances)
Пример #48
0
 def test_save(self):
     """Save HDF5 to disk and load it back up"""
     io.saveh(self.filename2, self.data)
     TestData = io.loadh(self.filename2, 'arr_0')
     eq(TestData, self.data)
Пример #49
0
 def save_container(filename, dtype):
     io.saveh(filename, arr_0=-1 * np.ones((project.n_trajs, np.max(project.traj_lengths)), dtype=dtype),
              completed_trajs=np.zeros((project.n_trajs), dtype=np.bool))
n_parms = len(parms)
n_trajs = end_traj - start_traj + 1
print "there are %d parameters" % n_parms
print "there are %d trajectories in the 'analysis/parameters' folder" % n_trajs

# getting tICA
print "Obtaining tICA object..."
tica = ti.tICA(n_components=None, lag_time=tica_lag)
dataset1 = train(start_traj, end_traj, n_parms)
tica.fit(dataset1)
print "first 5 tICA eigenvalues:", tica.eigenvalues_[0:5]
tica.save('analysis/tica_l%d.h5' % tica_lag)
print "saved tICA object: 'tica_l%d.h5'  in folder 'analysis' " % tica_lag

# projecting and ploting tICA
tica = io.loadh('analysis/tica_l%d.h5' % tica_lag)
dataset = project(start_traj, end_traj, n_parms, tica, tica_lag)
ev0, ev1 = [], []
for i in range(n_trajs):
    ev0.extend(dataset[i][:, 0])
    ev1.extend(dataset[i][:, 1])
ev0, ev1 = np.array(ev0), np.array(ev1)
io.saveh('analysis/tica_projections/ev0.h5', ev0)
io.saveh('analysis/tica_projections/ev1.h5', ev1)
print "saved all projected frames 'ev0.h5 & ev1.h5' at 'analysis/tica_projections' "
plt.figure(figsize=(12, 8))
plt.hist2d(ev0, ev1, bins=200, norm=LogNorm(), cmap=plt.cm.jet)
plt.savefig('analysis/tica_l%d.pdf' % tica_lag)
print "saved tica landscape for lag time %d at 'analysis/tica_l%d.pdf' " % (
    tica_lag, tica_lag)
Пример #51
0
    start = time.time()
    system = pickle.load(open(sys.argv[2], "rb"))

    if sys.argv[1] == 'compile':

        # generating temp dir for resolution shell data
        output_dir = system['map_path'] + "temp/"
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # processing resolution shell
        I_dir, num_shells, n_shell = sys.argv[3], int(sys.argv[4]), int(sys.argv[5])
        shell_maps, shell_grids = compile_shell(system, I_dir, num_shells, n_shell)

        # save resolution shell in grid and dictionary formats
        with open(system['map_path'] + "temp/dict_rshell%i_t%i.pickle" %(n_shell, num_shells), "wb") as handle:
            pickle.dump(shell_maps, handle)

        io.saveh(system['map_path'] + "temp/grid_rshell%i_t%i.h5" %(n_shell, num_shells), **shell_grids)
            
    if sys.argv[1] == "reduce":

        # combine resolution shells
        combined_maps, shell_stats = reduce_shells(system, int(sys.argv[3]))

        io.saveh(system['map_path'] + "final_maps.h5", **combined_maps)
        io.saveh(system['map_path'] + "shell_statistics.h5", **shell_stats)
                
    print "elapsed time is %f" %((time.time() - start)/60.0)