def respawn_from_clusterer(self, n_spawns, percentile=0.5): """ Find candidate frames in the trajectories to spawn new simulations from. Look for frames in the trajectories that are nearby the cluster centers that have low population counts Parameters ---------- percentile: float, The percentile below which to look for low populated microstates of the clusterer n_spawns: int, number of spawns to generate Returns ------- list of tuples, each tuple being (traj_id, frame_id) """ clusterer = retrieve_clusterer(self.model) low_counts_ids = apply_percentile_search(count_array=clusterer.counts_, percentile=percentile, desired_length=n_spawns, search_type='clusterer') if self.ttrajs is None: self.ttrajs = self.get_tica_trajs() return sample_states( trajs=self.ttrajs, state_centers=clusterer.cluster_centers_[low_counts_ids])
def respawn_from_MSM(self, n_spawns, percentile=0.5, search_type='populations'): """ Find candidate frames in the trajectories to spawn new simulations from. i) We can look for frames in the trajectories that are nearby regions with low population in the MSM equilibrium ii) We can also look for microstates that have low counts of transitions out of them Parameters ---------- percentile: float, The percentile below which to look for low populated microstates of the MSM search_type: str, either 'populations' or 'counts' n_spawns: int, number of spawns to generate Returns ------- selected_states: a list of tuples, each tuple being (traj_id, frame_id) """ msm = retrieve_MSM(self.model) clusterer = retrieve_clusterer(self.model) if search_type not in ['populations', 'counts']: raise ValueError("search_type is not 'populations' or 'counts'") if search_type == 'counts': # Counts amount of transitions out of each microstate of the MSM count_matrix = numpy.sum(msm.countsmat_, axis=1) else: # The equilibrium population (stationary eigenvector) of transmat_ count_matrix = msm.populations_ low_counts_ids = apply_percentile_search(count_array=count_matrix, percentile=percentile, desired_length=n_spawns, search_type='msm', msm=msm) if self.ttrajs is None: self.ttrajs = self.get_tica_trajs() # Find frames in the trajectories that are nearby the selected cluster centers # Only retrieve one frame per cluster center selected_states = sample_states( trajs=self.ttrajs, state_centers=clusterer.cluster_centers_[low_counts_ids]) return selected_states
please cite msmbuilder in any publications """ import mdtraj as md import os from msmbuilder.io.sampling import sample_states from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic ## Load meta, ttrajs = load_trajs('ttrajs') kmeans = load_generic("kmeans.pickl") ## Sample inds = sample_states(ttrajs, kmeans.cluster_centers_, k=10) save_generic(inds, "cluster-sample-inds.pickl") ## Make trajectories top = preload_top(meta) out_folder = "cluster_samples" backup(out_folder) os.mkdir(out_folder) for state_i, state_inds in enumerate(inds): traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) for traj_i, frame_i in state_inds) traj.save("{}/{}.xtc".format(out_folder, state_i))
selected_txx = [0] * 100 middles0 = [0] * 100 cluster_counts = [0] * 100 for i in range(100): selected[i] = [0] * len(ttrajs5) for j in range(len(ttrajs5)): test = np.where(kmeans.labels_[j] == i) selected[i][j] = ttrajs5[j][test] selected_txx[i] = np.concatenate(selected[i]) middles0[i] = selected_txx[i].mean(axis=0) cluster_counts[i] = len(selected_txx[i]) middles = np.asarray(middles0) out = sample_states(ttrajs5, middles[score[:10]], k=5) print('out is ', out) out2 = sample_states(ttrajs5, middles[score[:10]], k=1) np.savetxt('out_round9_cen_70_100_16.txt', out2, fmt='%6d') state = range(100) plt.plot(state, count_sum) plt.xlabel('State', fontsize=16) plt.ylabel('Count Sum', fontsize=16) plt.savefig('counts_cen_sum_30_100_5_16.eps') plt.clf()
- ../../trajs """ import mdtraj as md import os from msmbuilder.io.sampling import sample_states from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic ## Load meta, ttrajs = load_trajs('ttrajs') kmeans = load_generic("kmeans.pickl") ## Sample inds = sample_states(ttrajs, kmeans.cluster_centers_, k=10) save_generic(inds, "cluster-sample-inds.pickl") ## Make trajectories top = preload_top(meta) out_folder = "cluster_samples" backup(out_folder) os.mkdir(out_folder) for state_i, state_inds in enumerate(inds): traj = md.join( md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) for traj_i, frame_i in state_inds )