def test_sample_1(): # Test that the code actually runs and gives something non-crazy # Make an ergodic dataset with two gaussian centers offset by 25 units. chunk = np.random.normal(size=(20000, 3)) data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))] clusterer = cluster.KMeans(n_clusters=2) msm = MarkovStateModel() pipeline = sklearn.pipeline.Pipeline([("clusterer", clusterer), ("msm", msm)]) pipeline.fit(data) trimmed_assignments = pipeline.transform(data) # Now let's make make the output assignments start with # zero at the first position. i0 = trimmed_assignments[0][0] if i0 == 1: for m in trimmed_assignments: m *= -1 m += 1 pairs = msm.draw_samples(trimmed_assignments, 2000) samples = map_drawn_samples(pairs, data) mu = np.mean(samples, axis=1) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1) # We should make sure we can sample from Trajectory objects too... # Create a fake topology with 1 atom to match our input dataset top = md.Topology.from_dataframe(pd.DataFrame({ "serial": [0], "name": ["HN"], "element": ["H"], "resSeq": [1], "resName": "RES", "chainID": [0] }), bonds=np.zeros(shape=(0, 2), dtype='int')) # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz trajectories = [md.Trajectory(x[:, np.newaxis], top) for x in data] trj_samples = map_drawn_samples(pairs, trajectories) mu = np.array([t.xyz.mean(0)[0] for t in trj_samples]) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
def test_sample_1(): # Test that the code actually runs and gives something non-crazy # Make an ergodic dataset with two gaussian centers offset by 25 units. chunk = np.random.normal(size=(20000, 3)) data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))] clusterer = cluster.KMeans(n_clusters=2) msm = MarkovStateModel() pipeline = sklearn.pipeline.Pipeline( [("clusterer", clusterer), ("msm", msm)] ) pipeline.fit(data) trimmed_assignments = pipeline.transform(data) # Now let's make make the output assignments start with # zero at the first position. i0 = trimmed_assignments[0][0] if i0 == 1: for m in trimmed_assignments: m *= -1 m += 1 pairs = msm.draw_samples(trimmed_assignments, 2000) samples = map_drawn_samples(pairs, data) mu = np.mean(samples, axis=1) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1) # We should make sure we can sample from Trajectory objects too... # Create a fake topology with 1 atom to match our input dataset top = md.Topology.from_dataframe( pd.DataFrame({ "serial": [0], "name": ["HN"], "element": ["H"], "resSeq": [1], "resName": "RES", "chainID": [0] }), bonds=np.zeros(shape=(0, 2), dtype='int') ) # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz trajectories = [md.Trajectory(x[:, np.newaxis], top) for x in data] trj_samples = map_drawn_samples(pairs, trajectories) mu = np.array([t.xyz.mean(0)[0] for t in trj_samples]) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
clL = cluster.labels_ msm = MarkovStateModel(lag_time=10,n_timescales=10) msm.fit_transform(clL) trjs = clL N = n_samples inits = ad.findStarting([trjs], N, method=method) T = [] for trj in sorted(glob.glob(Trjs)): T.append(trj) count = 0 for init in inits: structure = msm.draw_samples(clL, 1)[init] print structure top = findTop(T[structure[0][0]]) rawTrj = findRawtrj(T[structure[0][0]]) print top #f = md.load(rawTrj, top=top, frame=structure[0][1]) #f.save_pdb(name_sys+str(count)+'_'+name_round+'.pdb') #shutil.copy(top, name_sys+str(count)+'_'+name_round+'.prmtop') #f.save_mdcrd(name_sys+str(count)+'_'+name_round+'.mdcrd') frame = structure[0][1] newTop = name_sys+str(count)+'_'+name_round+'.prmtop' newrst = name_sys+str(count)+'_'+name_round+'-00.rst' f = open('cppASample_'+str(count)+'.in', 'w') f.write('parm ' + top + '\n') f.write('trajin ' + rawTrj + '\n')
# findStarting(trjs, N, method='random') import adaptivsamplingMSM as ad from msmbuilder.msm import MarkovStateModel cluster=pickle.load(open('clustering.pkl','rb')) trjs = cluster.labels_ N = n_samples T = [] for trj in sorted(glob.glob('rawTrj/MD1-rwTrj/*.mdcrd')): T.append(trj) inits = ad.findStarting([trjs], N, method='leastPop') msm=MarkovStateModel(lag_time=1, n_timescales=10) msm.fit_transform(cluster.labels_) OPF = [] structure = msm.draw_samples(trjs, 1) for i in range(n_samples): try: init = structure[msm.mapping_[inits[i]]] except KeyError: print KeyError traj = T[init[0][0]] frame = init[0][1] OPF.append({'traj':traj, 'frame':frame}) json.dump(OPF, open("ClsInf.txt",'w')) ### Step 5: making the CPPtraj inputs import json topFile='mytopfile.top' inf = json.load(open("ClsInf.txt"))