def load_metadata(traj_dir, top): """ Loads metadata of features and saves them. :param traj_dir: directory containing trajectories :param top: topology file name :return: metadata data frame """ re_pattern = '(\w+)-([0-9]{3})k-([0-9])atm-prod([0-9]+\.[0-9]+).*BT([0-9]+)*' captured_group_names = [ 'PDB', 'Temp', 'Pressure', 'Prod_Round', 'Act_Site' ] captured_group_transforms = [identity, float, float, identity, int] time_step = 1 # in picoseconds file_type = 'dcd' parser = GenericParser(re_pattern, group_names=captured_group_names, group_transforms=captured_group_transforms, top_fn=top, step_ps=time_step) meta = gather_metadata(os.path.join(traj_dir, "*.{}".format(file_type)), parser) save_meta(meta) return meta
"""Find trajectories and associated metadata msmbuilder autogenerated template version 2 created 2017-05-30T15:16:59.066163 please cite msmbuilder in any publications """ from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser ## Construct and save the dataframe parser = NumberedRunsParser( traj_fmt="trajectory-{run}.xtc", top_fn="top.pdb", step_ps=50, ) meta = gather_metadata("trajs/*.xtc", parser) save_meta(meta)
from msmbuilder.io import NumberedRunsParser, gather_metadata from msmbuilder.featurizer import DihedralFeaturizer from msmbuilder.preprocessing import MinMaxScaler from msmbuilder.decomposition import tICA from msmbuilder.cluster import MiniBatchKMeans from msmbuilder.msm import MarkovStateModel from sklearn.pipeline import Pipeline import os from ..adaptive import create_folder logging.disable(logging.CRITICAL) parser = NumberedRunsParser(traj_fmt='run-{run}.nc', top_fn='data_app/runs/structure.prmtop', step_ps=200) meta = gather_metadata('/'.join(['data_app/runs/', '*nc']), parser) model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', MinMaxScaler()), ('tICA', tICA(lag_time=1, n_components=4)), ('clusterer', MiniBatchKMeans(n_clusters=5)), ('msm', MarkovStateModel(lag_time=1, n_timescales=4))]) spawns = [ (0, 1), ] epoch = 1 class TestAppBase: def __init__(self): self.app = App(generator_folder='data_app/generators',
"""Find trajectories and associated metadata {{header}} Meta ---- depends: - trajs - top.pdb """ from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser ## Construct and save the dataframe parser = NumberedRunsParser( traj_fmt="trajectory-{run}.xtc", top_fn="top.pdb", step_ps=50, ) meta = gather_metadata("trajs/*.xtc", parser) save_meta(meta)
from msmbuilder.io import GenericParser, save_meta, gather_metadata from os.path import join # # File name parsing and metadata # def identity(x): return x re_pattern = '(\w+)-([0-9]+)-as([0-9]+)*' captured_group_names = ['PDB', 'Traj_Num', 'Act_Site'] captured_group_transforms = [identity, int, int] time_step = 10 #10 ps file_type = 'nc' # # Gather and save the metadata # parser = GenericParser(re_pattern, group_names=captured_group_names, group_transforms=captured_group_transforms, top_fn='proc_traj/2agy-as1.prmtop', step_ps=time_step) meta = gather_metadata(join('proc_traj', "*.{}".format(file_type)), parser) save_meta(meta) from msmbuilder.feature_extraction import FunctionFeaturizer
# Helper functions # def identity(x): return x # # File name parsing and metadata # re_pattern = '(\w+)-([0-9]{3})k-([0-9])atm-prod([0-9]+\.[0-9]+).*BT([0-9]+)*' captured_group_names = ['PDB', 'Temp', 'Pressure', 'Prod_Round', 'Act_Site'] captured_group_transforms = [identity, float, float, identity, int] time_step = 1 file_type = 'dcd' # # Gather and save the metadata # parser = GenericParser(re_pattern, group_names=captured_group_names, group_transforms=captured_group_transforms, top_fn='topology.pdb', step_ps=time_step) meta = gather_metadata(os.path.join('traj', "*.{}".format(file_type)), parser) save_meta(meta)
"""Find trajectories and associated metadata msmbuilder autogenerated template version 2 created 2017-05-30T15:16:59.066163 please cite msmbuilder in any publications """ from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser ## Construct and save the dataframe parser = NumberedRunsParser( traj_fmt="trajectory-{run}.xtc", top_fn="../Data/top.pdb", step_ps=50, ) meta = gather_metadata("../Data/trajs/*.xtc", parser) save_meta(meta)
alpha = 0.5 rmsd_target = '/scratch/jap12009/msm/fast/try1/monomer2_4us_ZN.pdb' spawn = 10 f = open('round.txt') lines = f.read() f.close round_num = int(lines) ## Construct and save the dataframe parser = NumberedRunsParser( traj_fmt="trj-{run}.xtc", top_fn="/scratch/jap12009/msm/fast/try1/frame0nw_startingAPO.pdb", step_ps=240, ) meta = gather_metadata("/scratch/jap12009/msm/fast/try1/trj/trj-*.xtc", parser) save_meta(meta) ## Set up parameters for clustering kcen = KCenters( n_clusters=num_clusters, metric='rmsd', ) ## Try to limit RAM usage def guestimate_stride(): total_data = meta['nframes'].sum() want = kcen.n_clusters * 20 stride = max(1, total_data // want) print("Since we have", total_data, "frames, we're going to stride by", stride, "during fitting, because this is probably adequate for",