def sample_region( data, pt_dict, n_frames, ): """Function to sample a region of the data. Parameters ---------- data : list of lists List of low dimensional data(output of tica) pt_dict : dict Dictionary where the keys are the dimensions and the value is the value of the dimension. pt = {0:0.15, 4:0.2} n_frames: int Number of frames required Returns ------- list of tuples where first number is the trajectory index and second is the frame index """ dimensions = list(pt_dict.keys()) d_data = [i[:, dimensions] for i in data] tree = KDTree(d_data) pt = [pt_dict[i] for i in dimensions] dis, ind = tree.query(pt, n_frames) return ind
def sample_region(data, pt_dict, n_frames,): """Function to sample a region of the data. Parameters ---------- data : list of lists List of low dimensional data(output of tica) pt_dict : dict Dictionary where the keys are the dimensions and the value is the value of the dimension. pt = {0:0.15, 4:0.2} n_frames: int Number of frames required Returns ------- list of tuples where first number is the trajectory index and second is the frame index """ dimensions = list(pt_dict.keys()) d_data = [i[:, dimensions] for i in data] tree = KDTree(d_data) pt = [pt_dict[i] for i in dimensions] dis, ind = tree.query(pt, n_frames) return ind
def sample_dimension(data, dimension, n_frames, scheme="linear"): """Function to sample a dimension of the data using one of 3 schemes. All other dimensions are ignored. Parameters ---------- data : list of lists List of low dimensional data(output of tica) dimension : int dimension to sample on n_frames: int Number of frames required scheme: string One of either "linear", "random" or "edges". Linear samples the tic linearly, random samples randomly thereby taking approximate free energies into account, and edges samples the edges of the tic only. Returns ------- list of tuples where first number is the trajectory index and second is the frame index """ d_data = [i[:, dimension][:, np.newaxis] for i in data] #sort it because all three sampling schemes use it all_vals = [] for i in d_data: all_vals.extend(i.flatten()) all_vals = np.sort(all_vals) #get lineraly placed points if scheme == "linear": max_val = all_vals[-1] min_val = all_vals[0] spaced_points = np.linspace(min_val, max_val, n_frames) elif scheme == "random": spaced_points = np.sort(np.random.choice(all_vals, n_frames)) elif scheme == "edge": _cut_point = np.int(n_frames / 2) spaced_points = np.hstack( (all_vals[:_cut_point], all_vals[-_cut_point:])) else: raise ValueError("Scheme has be to one of linear, random or edge") tree = KDTree(d_data) return_vec = [] for pt in spaced_points: dis, ind = tree.query([pt]) return_vec.append(ind) return return_vec
def sample_dimension(data, dimension, n_frames, scheme="linear"): """Function to sample a dimension of the data using one of 3 schemes. All other dimensions are ignored. Parameters ---------- data : list of lists List of low dimensional data(output of tica) dimension : int dimension to sample on n_frames: int Number of frames required scheme: string One of either "linear", "random" or "edges". Linear samples the tic linearly, random samples randomly thereby taking approximate free energies into account, and edges samples the edges of the tic only. Returns ------- list of tuples where first number is the trajectory index and second is the frame index """ d_data = [i[:,dimension][:,np.newaxis] for i in data] #sort it because all three sampling schemes use it all_vals = [] for i in d_data: all_vals.extend(i.flatten()) all_vals = np.sort(all_vals) #get lineraly placed points if scheme=="linear": max_val = all_vals[-1] min_val = all_vals[0] spaced_points = np.linspace(min_val, max_val, n_frames) elif scheme=="random": spaced_points = np.sort(np.random.choice(all_vals, n_frames)) elif scheme=="edge": _cut_point = np.int(n_frames / 2) spaced_points = np.hstack((all_vals[:_cut_point], all_vals[-_cut_point:])) else: raise ValueError("Scheme has be to one of linear, random or edge") tree = KDTree(d_data) return_vec = [] for pt in spaced_points: dis, ind = tree.query([pt]) return_vec.append(ind) return return_vec
def kdtree_maker(prt, stride=1): key_list = list(prt.tica_data.keys())[::stride] data = [prt.tica_data[i] for i in key_list] tree = KDTree(data) return key_list, tree
msm_mdl = ContinuousTimeMSM(lag_time=10,ergodic_cutoff=1/10) assignments.fit_with(msm_mdl) msm_mdl.percent_retained_ ### CALCULATING 10 MOST POPULATED CONFORMATIONS f=open("msm_mdl.pkl",'wb') pickle.dump({k: msm_mdl.__dict__[k] for k in ['lag_time','n_timescales','ergodic_cutoff','verbose','sliding_window','guess','theta_','ratemat_','transmat_','countsmat_','n_states_', 'mapping_','populations_','information_','loglikelihoods_','eigenvalues_','left_eigenvectors_','right_eigenvectors_','percent_retained_'] }, f) f.close() q=msm_mdl.__dict__.get('populations_') ind=np.argpartition(q, -10)[-10:] p=ind[np.argsort(q[ind])] print("The most populated conformations:", q[p]) ### EXTRACTING REPRESENTATIVE CONFORMATIONS OF MACROSTATE ktree=KDTree(tica_features) trj_ds = dataset("*.nc", topology="s.pdb") trj_list = [] for cind in p: pt = kmeans_mdl.cluster_centers_[cind] _,(t,f) = ktree.query(pt) print("The trajectory and frame number of representative macrostate:",t,f) trj_list.append(trj_ds[t][f]) #CALCULATING COMMON FREE ENERGY LANDSCAPE FOR ALL PH pi_0=msm_mdl.__dict__.get('populations_')[np.concatenate(assignments, axis=0)] data = np.concatenate(tica_trajs, axis=0) clip = [(-np.inf, np.inf), (-np.inf, np.inf)] levels = np.linspace(0,2,21) def _thermo_transform(Z, temperature): return - THERMO_CONSTANT * temperature * np.log(Z)