def test_order_1(): with tempdir(): with dataset('ds1.h5', 'w', 'hdf5') as ds1: for i in range(20): ds1[i] = np.random.randn(10) assert list(ds1.keys()) == list(range(20)) with dataset('ds1/', 'w', 'dir-npy') as ds1: for i in range(20): ds1[i] = np.random.randn(10) assert list(ds1.keys()) == list(range(20))
def test_mdtraj_1(): ds = dataset(get_fn('') + '*.pdb', fmt='mdtraj', verbose=True) print(ds.keys()) print(ds.get(0)) print(ds.provenance) ds = dataset(get_fn('') + '*.pdb', fmt='mdtraj', atom_indices=[1, 2], verbose=True) print(ds.keys()) print(ds.get(0)) print(ds.provenance)
def test_union_3(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1/', 'w', 'dir-npy') as ds1, \ dataset('ds2/', 'w', 'dir-npy') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10, 4) # Uneven length! with assert_raises(ValueError): mds = dataset(['ds1', 'ds2'])
def test_hdf5_3(): with tempdir(): with dataset('ds.h5', 'w', 'hdf5') as ds: ds[0] = np.random.randn(10) ds[1] = np.random.randn(10) ref_sum = _sum_helper(ds) iter_args = (dataset('ds.h5') for _ in range(5)) sums = Parallel(n_jobs=2)(delayed(_sum_helper)(a) for a in iter_args) assert all(s == ref_sum for s in sums)
def test_union_3(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1/', 'w', 'dir-npy') as ds1, \ dataset('ds2/', 'w', 'dir-npy') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10,4) # Uneven length! with assert_raises(ValueError): mds = dataset(['ds1', 'ds2'])
def test_union_2(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1/', 'w', 'dir-npy') as ds1, \ dataset('ds2/', 'w', 'dir-npy') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10, 4) ds2[1] = np.random.randn(10, 4) mds = dataset(['ds1', 'ds2']) mds_out = mds.create_derived('derived', fmt='dir-npy') assert len(mds_out.provenance.split('\n')) > 0
def test_uneven_n(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1/', 'w', 'dir-npy') as ds1, \ dataset('ds2/', 'w', 'dir-npy') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(5, 2) ds2[0] = np.random.randn(10, 4) # Uneven number of trajs! fu = FeatureUnion(normalize=False) with assert_raises(ValueError): fu.fit((ds1, ds2))
def test_hdf5_3(): with tempdir(): with dataset('ds.h5', 'w', 'hdf5') as ds: ds[0] = np.random.randn(10) ds[1] = np.random.randn(10) ref_sum = _sum_helper(ds) iter_args = (dataset('ds.h5') for _ in range(5)) sums = Parallel(n_jobs=2)( delayed(_sum_helper)(a) for a in iter_args) assert all(s == ref_sum for s in sums)
def test_union_2(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1/', 'w', 'dir-npy') as ds1, \ dataset('ds2/', 'w', 'dir-npy') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10,4) ds2[1] = np.random.randn(10,4) mds = dataset(['ds1', 'ds2']) mds_out = mds.create_derived('derived', fmt='dir-npy') assert len(mds_out.provenance.split('\n')) > 0
def test_assign(self): with open(os.devnull) as dn: subprocess.call( [ 'msmb', 'SolventShellsAssigner', '--trjs', self.traj_fn, '--solute_indices', self.ute_fn, '--solvent_indices', self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out', self.outfn, '--chunk', '2' ], stdout=dn, stderr=dn ) data = dataset(self.outfn)[0] should_be = np.array([ [0, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 1], [1, 1, 0, 1], [2, 0, 0, 2], [2, 1, 0, 2], # 3 # 4 [5, 1, 0, 0], [6, 1, 0, 1], [7, 1, 0, 2], # 8 ]) np.testing.assert_array_equal(data, should_be)
def test_assign(self): with open(os.devnull) as dn: subprocess.call([ 'msmb', 'SolventShellsAssigner', '--trjs', self.traj_fn, '--solute_indices', self.ute_fn, '--solvent_indices', self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out', self.outfn, '--chunk', '2' ], stdout=dn, stderr=dn) data = dataset(self.outfn)[0] should_be = np.array([ [0, 0, 0, 0], [0, 1, 0, 0], [1, 0, 0, 1], [1, 1, 0, 1], [2, 0, 0, 2], [2, 1, 0, 2], # 3 # 4 [5, 1, 0, 0], [6, 1, 0, 1], [7, 1, 0, 2], # 8 ]) np.testing.assert_array_equal(data, should_be)
def test_1(): path = tempfile.mkdtemp() shutil.rmtree(path) try: X = np.random.randn(10, 2) ds = dataset(path, 'w', 'dir-npy') ds[0] = X assert set(os.listdir(path)) == set(('PROVENANCE.txt', '00000000.npy')) np.testing.assert_array_equal(ds[0], X) assert_raises(IndexError, lambda: ds[1]) assert len(ds) == 1 Y = np.zeros((10, 1)) Z = np.ones((2, 2)) ds[1] = Y ds[2] = Z np.testing.assert_array_equal(ds[1], Y) np.testing.assert_array_equal(ds[2], Z) assert len(ds) == 3 for i, item in enumerate(ds): np.testing.assert_array_equal(item, [X, Y, Z][i]) except: raise finally: shutil.rmtree(path)
def test_2(): path1 = tempfile.mkdtemp() path2 = tempfile.mkdtemp() shutil.rmtree(path1) shutil.rmtree(path2) try: X = np.random.randn(10, 2) Y = np.random.randn(10, 2) ds1 = dataset(path1, 'w', 'dir-npy') ds1[0] = X ds2 = ds1.create_derived(path2) ds2[0] = Y np.testing.assert_array_equal(ds1[0], X) np.testing.assert_array_equal(ds2[0], Y) assert len(ds1) == 1 assert len(ds2) == 1 prov2 = ds2.provenance print(prov2) assert 2 == sum([s.startswith(' Command') for s in prov2.splitlines()]) except: raise finally: shutil.rmtree(path1) shutil.rmtree(path2)
def test_2(): path1 = tempfile.mkdtemp() path2 = tempfile.mkdtemp() shutil.rmtree(path1) shutil.rmtree(path2) try: X = np.random.randn(10, 2) Y = np.random.randn(10, 2) ds1 = dataset(path1, 'w', 'dir-npy') ds1[0] = X ds2 = ds1.create_derived(path2) ds2[0] = Y np.testing.assert_array_equal(ds1[0], X) np.testing.assert_array_equal(ds2[0], Y) assert len(ds1) == 1 assert len(ds2) == 1 prov2 = ds2.provenance print(prov2) assert 2 == sum( [s.startswith(' Command') for s in prov2.splitlines()]) except: raise finally: shutil.rmtree(path1) shutil.rmtree(path2)
def test_partial_transform(self): with open(os.devnull) as dn: subprocess.call( [ 'msmb', 'SolventShellsFeaturizer', '--trjs', self.traj_fn, '--solute_indices', self.ute_fn, '--solvent_indices', self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out', self.outfn ], stdout=dn, stderr=dn ) data = dataset(self.outfn)[0] norm = np.asarray([4 * np.pi * r ** 2 for r in [0.5, 1.5, 2.5]]) should_be = np.array([ [2, 0, 0], [0, 2, 0], [0, 0, 2], [0, 0, 0], [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0] ]) / norm np.testing.assert_array_equal(data, should_be)
def start(self): if os.path.exists(self.out): self.error('File exists: %s' % self.out) print(self.instance) if os.path.exists(os.path.expanduser(self.top)): top = os.path.expanduser(self.top) else: top = None traj_dataset = MDTrajDataset(self.trjs, topology=top, stride=self.instance.stride, verbose=False) with dataset(self.assignments, mode='r') as assn_dataset: out_dataset = assn_dataset.create_derived(self.out, fmt='dir-npy') pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=len(assn_dataset)).start() for tr_key, as_key in pbar( zip(traj_dataset.keys(), assn_dataset.keys()) ): out_dataset[as_key] = self.instance.partial_transform( (traj_dataset[tr_key], assn_dataset[as_key]) ) out_dataset.close() print("\nSaving transformed dataset to '%s'" % self.out) print("To load this dataset interactive inside an IPython") print("shell or notebook, run\n") print(" $ ipython") print(" >>> from msmbuilder.dataset import dataset") print(" >>> ds = dataset('%s')\n" % self.out)
def save_dataset(data, path): if os.path.exists(path): cmd = "rm -rf %s" % path subprocess.call(cmd, shell=True) ds = dataset(path, 'w', 'dir-npy') for i in range(0, len(data)): ds[i] = data[i] ds.close()
def featurizing_the_conformations(featurizer, xtc_traj_folder, traj_list_array, pdb_name): #we usually use dihedral, rmsd or pairwise distance featurizers in MSM output_features=[] for trajfile in traj_list_array: xyz=dataset(trajectory_dir+trajfile, topology=trajectory_dir+pdb_name) #xyz is the coordinates for the frames in the trajectory temp=featurizer.fit_transform(xyz) #we transform the coordinates into pairwise distaneces print("loaded %s into (%d,%d) dimensional file"%(trajfile, len(temp[0]), len(temp[0][0]))) output_features.append(temp[0]) return output_features
def test_transform_command_1(): with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home())) shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd " "-m model.pkl -t transformed.h5 --top " "{data_home}/alanine_dipeptide/ala2.pdb".format(data_home=get_data_home())) eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])
def test_append_dirnpy(): path = tempfile.mkdtemp() shutil.rmtree(path) try: with dataset(path, 'w', 'dir-npy') as ds: ds[0] = np.random.randn(10, 2) with dataset(path, 'a', 'dir-npy') as ds: ds[1] = np.random.randn(10, 2) with dataset(path, 'a', 'dir-npy') as ds: ds[2] = np.random.randn(10, 2) with dataset(path, 'a', 'dir-npy') as ds: # Overwrite ds[2] = np.random.randn(10, 2) np.testing.assert_array_equal(ds[:][0], ds[0]) np.testing.assert_array_equal(ds[:][1], ds[1]) np.testing.assert_array_equal(ds[:][2], ds[2]) finally: shutil.rmtree(path)
def test_hdf5_1(): with tempdir(): ds = dataset('ds.h5', 'w', 'hdf5') print(ds.provenance) ds[0] = np.zeros(10) np.testing.assert_array_equal(ds.get(0), np.zeros(10)) assert list(ds.keys()) == [0] assert len(ds) == 1 ds[0] = np.random.randn(10, 1) ds[1] = np.random.randn(10, 2) ds[2] = np.random.randn(10, 3) np.testing.assert_array_equal(ds[:][0], ds[0]) np.testing.assert_array_equal(ds[:][1], ds[1]) np.testing.assert_array_equal(ds[:][2], ds[2]) ds.close() with dataset('ds.h5') as ds: assert ds[0].shape == (10, 1)
def test_atom_pairs_featurizer(): with tempdir(): shell('msmb AtomIndices -o all.txt --all -d -p %s/alanine_dipeptide/ala2.pdb' % get_data_home()), shell("msmb AtomPairsFeaturizer --trjs '{data_home}/alanine_dipeptide/*.dcd'" " --out pairs --pair_indices all.txt" " --top {data_home}/alanine_dipeptide/ala2.pdb".format( data_home=get_data_home())) ds = dataset('pairs') assert len(ds) == 10 assert ds[0].shape[1] == len(np.loadtxt('all.txt')**2) print(ds.provenance)
def test_union(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1.h5', 'w', 'hdf5') as ds1, \ dataset('ds2.h5', 'w', 'hdf5') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10, 4) ds2[1] = np.random.randn(10, 4) # Compare row sums rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1) rs2 = ds1[1] + np.sum(ds2[1], axis=1) mds = dataset(['ds1.h5', 'ds2.h5']) assert len(mds) == 2 assert mds[0].shape == (10, 6) assert mds[1].shape == (10, 5) np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1) np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
def test_4(): path = tempfile.mkdtemp() shutil.rmtree(path) try: ds = dataset(path, 'w', 'dir-npy') ds[0] = np.random.randn(10, 2) v = ds.get(0, mmap=True) assert isinstance(v, np.memmap) np.testing.assert_array_equal(ds[0], v) del v # close the underlying file finally: shutil.rmtree(path)
def test_union(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1.h5', 'w', 'hdf5') as ds1, \ dataset('ds2.h5', 'w', 'hdf5') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(10) ds2[0] = np.random.randn(10,4) ds2[1] = np.random.randn(10,4) # Compare row sums rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1) rs2 = ds1[1] + np.sum(ds2[1], axis=1) mds = dataset(['ds1.h5', 'ds2.h5']) assert len(mds) == 2 assert mds[0].shape == (10, 6) assert mds[1].shape == (10, 5) np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1) np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
def Get_rawposition_features_villin(): import os import shutil os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('./rawpositions')): shutil.rmtree('./rawpositions') from msmbuilder.dataset import dataset xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.featurizer import RawPositionsFeaturizer #for raw positions featurizer = RawPositionsFeaturizer() #for raw positions rawpositions = xyz.fit_transform_with(featurizer, 'rawpositions/', fmt='dir-npy') #for rawpositions return rawpositions
def test_superpose_featurizer(): with tempdir(): shell('msmb AtomIndices -o all.txt --all -a -p %s/alanine_dipeptide/ala2.pdb' % get_data_home()), shell("msmb SuperposeFeaturizer --trjs '{data_home}/alanine_dipeptide/*.dcd'" " --out distances --atom_indices all.txt" " --reference_traj {data_home}/alanine_dipeptide/ala2.pdb" " --top {data_home}/alanine_dipeptide/ala2.pdb".format( data_home=get_data_home())) ds = dataset('distances') assert len(ds) == 10 assert ds[0].shape[1] == len(np.loadtxt('all.txt')) print(ds.provenance)
def test_atom_pairs_featurizer(): with tempdir(): shell('msmb AtomIndices -o all.txt --all -d -p ' '%s/alanine_dipeptide/ala2.pdb' % get_data_home()), shell("msmb AtomPairsFeaturizer " "--trjs '{data_home}/alanine_dipeptide/*.dcd'" " --transformed pairs --pair_indices all.txt" " --top {data_home}/alanine_dipeptide/ala2.pdb" .format(data_home=get_data_home())) ds = dataset('pairs') assert len(ds) == 10 assert ds[0].shape[1] == len(np.loadtxt('all.txt') ** 2) print(ds.provenance)
def test_dataset(): with tempdir(): # This doesn't work with py2.6 with dataset('ds1.h5', 'w', 'hdf5') as ds1, \ dataset('ds2.h5', 'w', 'hdf5') as ds2: ds1[0] = np.random.randn(10, 2) ds1[1] = np.random.randn(5, 2) ds2[0] = np.random.randn(10, 4) ds2[1] = np.random.randn(5, 4) # Compare row sums rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1) rs2 = np.sum(ds1[1], axis=1) + np.sum(ds2[1], axis=1) fu = FeatureUnion(normalize=False) mds = fu.fit_transform((ds1, ds2)) assert len(mds) == 2 assert mds[0].shape == (10, 6) assert mds[1].shape == (5, 6) np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1) np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
def start(self): ds = dataset(self.dataset, mode='r') assert len(ds) == 1, "Only support one at a time for now" ds = ds[0] dat_fn = "{}.txt".format(self.out_prefix) tcl_fn = "{}.tcl".format(self.out_prefix) np.savetxt(dat_fn, ds, fmt="%.5f") with open(tcl_fn, 'w') as f: f.write(VMDSCRIPT.format( traj_fn=self.traj, step=self.stride, top_fn=self.top, dat_fn=dat_fn ))
def test_superpose_featurizer(): with tempdir(): shell('msmb AtomIndices -o all.txt --all -a -p ' '%s/alanine_dipeptide/ala2.pdb' % get_data_home()), shell("msmb SuperposeFeaturizer " "--trjs '{data_home}/alanine_dipeptide/*.dcd'" " --transformed distances --atom_indices all.txt" " --reference_traj {data_home}/alanine_dipeptide/ala2.pdb" " --top {data_home}/alanine_dipeptide/ala2.pdb" .format(data_home=get_data_home())) ds = dataset('distances') assert len(ds) == 10 assert ds[0].shape[1] == len(np.loadtxt('all.txt')) print(ds.provenance)
def main(): import argparse, textwrap parser = argparse.ArgumentParser( usage=textwrap.dedent( '''Use "python %(prog)s -h" for more information.'''), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( 'pdbpath', help=textwrap.dedent('''[required] Path to pdb trajectories.''')) parser.add_argument('target', help=textwrap.dedent('''[required] Path to target pdb. Note: The target pdb should have the same number of atoms in structure with that in pdb trajectories. ''' )) args = parser.parse_args() from msmbuilder.dataset import dataset coords = dataset(args.pdbpath) print '%i trajectories found. ' % len(coords) ## featurize features = featurize_trajectories(coords, 'ContactFeaturizer') #print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % (features[0].shape[0], features[0].shape[1]) import mdtraj as md target = md.load(args.target) native_contact_dists, native_contact_pairs = md.compute_contacts( target, scheme='ca') native_contact_pairs = native_contact_pairs[np.where( native_contact_dists[0] <= 0.75)] n_native_contact = len(native_contact_pairs) print "Target structure has %i pairs of CA-CA contact in total. \n" % n_native_contact from msmbuilder.featurizer import ContactFeaturizer native_contact_to_target = np.concatenate( ContactFeaturizer( contacts=native_contact_pairs, scheme='ca').fit_transform(coords)) # (n_samples, n_pairs) native_contact_to_target = np.select( [native_contact_to_target <= 0.75, native_contact_to_target > 0.75], [1, 0]) native_contact_to_target = np.sum(native_contact_to_target, axis=1) with open( '%s.%s.number_native_contact.dat' % (get_basename_no_ext(args.target), get_basename_no_ext(args.pdbpath)), 'w') as f: for e in native_contact_to_target: print >> f, '%i %i %.3f' % (n_native_contact, e, e * 1. / n_native_contact)
def test_3(): path = tempfile.mkdtemp() shutil.rmtree(path) try: ds = dataset(path, 'w', 'dir-npy') ds[0] = np.random.randn(10, 2) ds[1] = np.random.randn(10, 2) ds[2] = np.random.randn(10, 2) np.testing.assert_array_equal(ds[:][0], ds[0]) np.testing.assert_array_equal(ds[:][1], ds[1]) np.testing.assert_array_equal(ds[:][2], ds[2]) finally: shutil.rmtree(path)
def test_transform_command_1(): with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home())) shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd " "-m model.pkl -t transformed.h5 --top " "{data_home}/alanine_dipeptide/ala2.pdb" .format(data_home=get_data_home())) eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0]) with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/trajectory-0.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home()))
def Get_dihedral_features_villin(): import os import shutil import mdtraj as md os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('./diheds')): shutil.rmtree('./diheds') from msmbuilder.dataset import dataset t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.featurizer import DihedralFeaturizer #for dihedrals featurizer = DihedralFeaturizer(types=['phi', 'psi']) #for dihedrals diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #for dihedrals des_feat=featurizer.describe_features(t) res = [ sub['resids'] for sub in des_feat ] print(str(res)) return diheds
def Get_contacts_features_villin(): import os import shutil import mdtraj as md os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('./contacts')): shutil.rmtree('./contacts') from msmbuilder.dataset import dataset xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.featurizer import ContactFeaturizer #for contacts featurizer = ContactFeaturizer(scheme='ca') #for contacts des_feat=featurizer.describe_features(t) res = [ sub['resids'] for sub in des_feat ] print(str(res)) contacts = xyz.fit_transform_with(featurizer, 'contacts/', fmt='dir-npy') #for contacts return contacts
def test_items(): with tempdir(): ds = dataset('ds.h5', 'w', 'hdf5') ds[0] = np.random.randn(10, 1) ds[1] = np.random.randn(10, 2) ds[5] = np.random.randn(10, 3) # NOTE! # ds[:] does not work for non-contiguous keys. keys = [0, 1, 5] for i, (k, v) in enumerate(ds.items()): assert k == keys[i] np.testing.assert_array_equal(ds[k], v) ds.close()
def test_MSMBuilderDatasetLoader_1(): from msmbuilder.dataset import dataset path = tempfile.mkdtemp() shutil.rmtree(path) try: x = np.random.randn(10, 2) ds = dataset(path, 'w', 'dir-npy') ds[0] = x loader = MSMBuilderDatasetLoader(path, fmt='dir-npy') X, y = loader.load() assert np.all(X[0] == x) assert y is None finally: shutil.rmtree(path)
def Get_combined_features_villin(): from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.featurizer import ContactFeaturizer diheds= DihedralFeaturizer() contacts=ContactFeaturizer() features=[("di_villin",diheds),("con_villin",contacts)] import os import shutil os.chdir('/homes/anuginueni/traj_villin') if(os.path.isdir('/homes/anuginueni/traj_villin/combined')): shutil.rmtree('/homes/anuginueni/traj_villin/combined') from msmbuilder.dataset import dataset xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) from msmbuilder.feature_selection import FeatureSelector comb_features=FeatureSelector(features) co=xyz.fit_transform_with(comb_features, '/homes/anuginueni/traj_villin/combined/', fmt='dir-npy') return co
def test_partial_transform(self): with open(os.devnull) as dn: subprocess.call([ 'msmb', 'SolventShellsFeaturizer', '--trjs', self.traj_fn, '--solute_indices', self.ute_fn, '--solvent_indices', self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out', self.outfn ], stdout=dn, stderr=dn) data = dataset(self.outfn)[0] norm = np.asarray([4 * np.pi * r**2 for r in [0.5, 1.5, 2.5]]) should_be = np.array([[2, 0, 0], [0, 2, 0], [0, 0, 2], [0, 0, 0], [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]) / norm np.testing.assert_array_equal(data, should_be)
def test_items(): with tempdir(): ds = dataset('ds.h5', 'w', 'hdf5') ds[0] = np.random.randn(10, 1) ds[1] = np.random.randn(10, 2) ds[5] = np.random.randn(10, 3) keys = [0, 1, 5] for i, (k, v) in enumerate(ds.items()): assert k == keys[i] np.testing.assert_array_equal(ds[k], v) np.testing.assert_array_equal(ds[:][0], ds[0]) np.testing.assert_array_equal(ds[:][1], ds[1]) np.testing.assert_array_equal(ds[:][2], ds[5]) ds.close()
import pandas as pd from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline import mdtraj as md tica_lagtime = 1600 trajectories = dataset.MDTrajDataset("./trajectories/*.h5") t0 = trajectories[0][0] dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime) Xf = np.concatenate(X) tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime) dih_model = utils.load("./dihedrals/model.pkl") d = dih_model.describe_features(t0) d = pd.DataFrame(d) d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]] d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
def test_hdf5_2(): with tempdir(): with dataset('ds.h5', 'w', 'hdf5') as ds: ds2 = ds.create_derived('ds2.h5') print(ds2.provenance) ds2.close()
def load(self): from msmbuilder.dataset import dataset ds = dataset(self.path, mode='r', fmt=self.fmt, verbose=self.verbose) print('Dataset provenance:\n') print(ds.provenance) return ds, None
import mdtraj as md from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline trj0 = md.load("traj-refine_implicit_md.xtc", top="topol-renumbered-implicit.pdb") trj0 = trj0[0:50] X = dataset.dataset("./tica.h5") Xf = np.concatenate(X) dih_model = utils.load("./dihedrals/model.pkl") tica_model = utils.load("./tica.pkl") pipeline = make_pipeline(dih_model, tica_model) x0 = pipeline.transform([trj0])[0] hexbin(Xf[:, 0], Xf[:, 1], bins='log') plot(x0[:, 0], x0[:, 1], 'kx') map(lambda k: annotate(k, xy=x0[k, 0:2], fontsize=14), arange(len(x0)))
def test_union_no_longer_exists(): with assert_raises_regexp(ValueError, r".*[Uu]se msmbuilder\.featurizer\.FeatureUnion.*"): mds = dataset(['ds1.h5', 'ds2.h5'], fmt='hdf5-union')
import matplotlib matplotlib.use('Agg') from msmbuilder.dataset import dataset import matplotlib.pyplot as plt import numpy as np trajs = dataset('tica_trajs.h5') #Load file trajs = np.concatenate(trajs) # Flatten list of trajectories plt.hexbin(trajs[:,0],trajs[:,1], bins='log', mincnt=1) plt.savefig('msm_fig.png')