def build_metadata(self, meta): """ Builds an msmbuilder metadata object """ if meta is None: try: self.parser = GenericParser( fn_re='{}/(e\d+s\d+)_.*/Production.nc'.format( self.data_folder), group_names=['sim'], group_transforms=[lambda x: x], top_fn='', step_ps=self.timestep) meta = gather_metadata('{}/e*/*nc'.format(self.data_folder), parser) except: logger.warning("Could not automatically build metadata") return None else: if not isinstance(meta, pd.DataFrame): meta = load_meta(meta) return meta
def sample_clusters(): meta = load_meta() tops = preload_tops(meta) print('Sampling trajectories') ref = md.load('topology.pdb') for i in range(int(num_clusters)): print(i) df_smp = df.ix[df['Trajectory']==i, ['Key', 'Time_ps']].sample(100) inds = zip(df_smp['Key'], df_smp['Time_ps']) # Use loc because sample_dimension is nice traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=meta.loc[traj_i]['top_fn']) for traj_i, frame_i in inds ) # Original trajectories include both BT1 and BT2 so need to superpose traj.superpose(reference=ref) # Save traj_fn = "clusters/rmsd_cluster-{}.dcd".format(i) backup(traj_fn) traj.save(traj_fn)
{{header}} Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top ## Load meta = load_meta() centroids = md.load("centroids.xtc", top=preload_top(meta)) ## Kernel SIGMA = 0.3 # nm from msmbuilder.featurizer import RMSDFeaturizer import numpy as np featurizer = RMSDFeaturizer(centroids) lfeats = {} for i, traj in itertrajs(meta): lfeat = featurizer.partial_transform(traj) lfeat = np.exp(-lfeat**2 / (2 * (SIGMA**2))) lfeats[i] = lfeat save_trajs(lfeats, 'ftrajs', meta)
from msmbuilder.io import load_meta, preload_tops from mdtraj import load from hbonds import HBondFeaturizer # from msmbuilder.feature_selection import FeatureSelector from featureselector import FeatureSelector meta = load_meta('meta.pandas.pickl') tops = preload_tops(meta) trajs = [load(row['traj_fn'], top=tops[row['top_fn']], stride=10) for i, row in meta.iterrows()] def traj_summary(ftrajs): print('Length of ftraj {}'.format(len(ftrajs))) for traj in ftrajs: print('\t Shape: {}'.format(traj.shape)) def test_HBondFeaturizer(traj_list): feat = HBondFeaturizer(freq=0.0) feat.fit(traj_list) ftraj = feat.transform(traj_list) traj_summary(ftraj) def test_HBondsFeatExtr(traj_list): features = [('hbonds', HBondFeaturizer())] feat = FeatureSelector(features, which_feat=['hbonds']) feat.fit(traj_list) ftrajs = feat.transform(traj_list)
"""Check for abnormally high rmsd values to a reference structure {{header}} Meta ---- depends: - meta.pandas.pickl - trajs - top.pdb """ import mdtraj as md from msmbuilder.io import load_meta, itertrajs, save_trajs ## Load reference structure ref = md.load("top.pdb") meta = load_meta() ## Do calculation and save rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)} save_trajs(rmsds, 'rmsds', meta)
#!/usr/bin/env python from msmbuilder.dataset import dataset from msmbuilder.io import save_trajs, load_meta import argparse parser = argparse.ArgumentParser( prog='dataset_h5_to_npy_dir.py', formatter_class=argparse.RawDescriptionHelpFormatter, description='''version1''') parser.add_argument("dataset", help="""An HDF5 dataset""", type=str) parser.add_argument("meta", help="A metadata pickl file", type=str) parser.add_argument("trajs", help="The folder in which to store the trajs", type=str, default='trajs') if __name__ == '__main__': args = parser.parse_args() meta = load_meta(args.meta) ds = dataset(args.dataset) trajs = {} for k, v in ds.items(): trajs[k] = v save_trajs(trajs, args.trajs, meta)
#!/bin/env python from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser import numpy as np import mdtraj as md from msmbuilder.featurizer import DihedralFeaturizer, AtomPairsFeaturizer from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic from multiprocessing import Pool import contextlib meta = load_meta("meta.pandas.pickl") tops = preload_tops(meta) alpha_carbon_number = np.array([ 9, 26, 40, 59, 80, 92, 104, 111, 118, 139, 151, 170, 187, 194, 215, 234, 253, 270, 289, 306, 320, 346, 358, 374, 386, 403, 419, 434, 453, 462, 474, 490, 502, 516, 527, 538, 548, 567, 586, 605, 616, 628, 647, 669, 686, 708, 720, 736, 748, 767, 783, 804, 814, 825, 840, 850, 870, 889, 910, 927, 941, 948, 969, 980, 994, 1004, 1019, 1035, 1054, 1061, 1085, 1099, 1109, 1133, 1153, 1172, 1189, 1202, 1214, 1226, 1233, 1250, 1266, 1290, 1302, 1324, 1335, 1349, 1373, 1395, 1416, 1432, 1444, 1455, 1469, 1483, 1502, 1516, 1530, 1547, 1571, 1593, 1603, 1622, 1634, 1658, 1672, 1682, 1697, 1713, 1730, 1746, 1761, 1777, 1793, 1807, 1827, 1849, 1871, 1885, 1892, 1909, 1933, 1953, 1969, 1983, 2003, 2022, 2036, 2053, 2074, 2086, 2102, 2126, 2138, 2153, 2167, 2174, 2189, 2210, 2234, 2255, 2266, 2283, 2290, 2310, 2327, 2338 ]) num = len(alpha_carbon_number) atompair = [] for i in range(num):