def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [np.array([-3,0]), np.array([-1,1]), np.array([0,0]), np.array([1,-1]), np.array([4,2])] widths = [np.array([0.3,2]), np.array([0.3,2]), np.array([0.3,2]), np.array([0.3,2]), np.array([0.3,2])] # continuous trajectory nsample = 1000 cls.T = len(means)*nsample cls.X = np.zeros((cls.T, 2)) for i in range(len(means)): cls.X[i*nsample:(i+1)*nsample,0] = widths[i][0] * np.random.randn() + means[i][0] cls.X[i*nsample:(i+1)*nsample,1] = widths[i][1] * np.random.randn() + means[i][1] # cluster in different ways cls.km = coor.cluster_kmeans(data = cls.X, k = 100) cls.rs = coor.cluster_regspace(data = cls.X, dmin=0.5) cls.rt = coor.cluster_uniform_time(data = cls.X, k = 100) cls.cl = [cls.km, cls.rs, cls.rt]
def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [ np.array([-3, 0]), np.array([-1, 1]), np.array([0, 0]), np.array([1, -1]), np.array([4, 2]) ] widths = [ np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]), np.array([0.3, 2]) ] # continuous trajectory nsample = 1000 cls.T = len(means) * nsample cls.X = np.zeros((cls.T, 2)) for i in range(len(means)): cls.X[i * nsample:(i + 1) * nsample, 0] = widths[i][0] * np.random.randn() + means[i][0] cls.X[i * nsample:(i + 1) * nsample, 1] = widths[i][1] * np.random.randn() + means[i][1] # cluster in different ways cls.km = coor.cluster_kmeans(data=cls.X, k=100) cls.rs = coor.cluster_regspace(data=cls.X, dmin=0.5) cls.rt = coor.cluster_uniform_time(data=cls.X, k=100) cls.cl = [cls.km, cls.rs, cls.rt]
def __init__(self, ic, data=None, pickle_file=None): if data is None: self.cluster = pickle.load(open(pickle_file, 'rb')) else: self.cluster = coor.cluster_regspace(data, max_centers=1000, dmin=0.025) pickle.dump(self.cluster, open(ic.outname + '_cl_full.pickle', 'wb'))
feat.add_backbone_torsions(selstr=None, deg=True, cossin=True) # in degrees #List of all the angles #print(feat.describe()) #Number of dregree of freedom #print(feat.dimension()) inp = coor.source(traj, feat) sincos = inp.get_output()[0] ############# #Use a regular space clustering. Cluster centers are at least in distance of #dmin to each other according to the given metric.Then Voronoi discretization #with the computed centers is used to partition the data cl_space = coor.cluster_regspace(sincos, dmin=arg.dmin, max_centers=100000) clustCenters = cl_space.clustercenters #angle for each centroid #We now discretize the trajectory to either set of cluster centers #assign structure's cluster number Sspace = coor.assign_to_centers(sincos, clustCenters) #assign for each cluster their frames number indexClusters = cl_space.index_clusters clustCentersFrameNo = -1 * np.ones(len(clustCenters), dtype='int32') #Find the centroid (euclideen distance) for ind_clust in range(len(indexClusters)): #Frames which compose the cluster frameNumber = indexClusters[ind_clust][:, 1] cosinusSinus = sincos[frameNumber, :] print(str(ind_clust)) for j in range(len(cosinusSinus)):
backbone.sort() for i in backbone: print(topology.atom(i)) atom_pairs = list(combinations(backbone, 2)) pca = PCA(n_components=8) atom_pairs = list(combinations(range(t.n_atoms), 2)) pairwise_distances = md.geometry.compute_distances(t, atom_pairs) print(pairwise_distances.shape) reduced_distances = pca.fit_transform(pairwise_distances) pca.components_ pca.explained_variance_ratio_ tmp = deepcopy(pca.components_) dist_space = coor.cluster_regspace(pairwise_distances, dmin=55) centers_space = np.sort(dist_space.clustercenters, axis=0) Sspace = coor.assign_to_centers(pairwise_distances, centers_space) test = dist_space.dtrajs plt.figure() #plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=t.time) #plt.scatter(reduced_distances[:, 0], reduced_distances[:,1], marker='x', c=[1]*len(reduced_distances[:,1])) plt.scatter(reduced_distances[:, 0], reduced_distances[:, 2], marker='o', c=test[0] * 5, alpha=0.3) plt.xlabel('PC1') plt.ylabel('PC3') plt.title('Pairwise distance PCA: cyclic peptide')
dtrajs_nnn_234 = [] dtrajs_nnn_345 = [] dtrajs_nnn_456 = [] for i in range( len(dtraj_rama_2) ): dtrajs_nnn_234.append( np.vstack( (dtrajs_rama_2[i], dtrajs_rama_3[i], dtrajs_rama_4[i]) ).T ) dtrajs_nnn_234[i].astype('int64') dtrajs_nnn_345.append( np.vstack( (dtrajs_rama_3[i], dtrajs_rama_4[i], dtrajs_rama_5[i]) ).T ) dtrajs_nnn_345[i].astype('int64') dtrajs_nnn_456.append( np.vstack( (dtrajs_rama_4[i], dtrajs_rama_5[i], dtrajs_rama_6[i]) ).T ) dtrajs_nnn_456[i].astype('int64') # In[18]: n_clusters = 8 clustering_nnn_234 = coor.cluster_regspace(dtrajs_nnn_234,max_centers=n_clusters,dmin=0.5) clustering_nnn_345 = coor.cluster_regspace(dtrajs_nnn_345,max_centers=n_clusters,dmin=0.5) clustering_nnn_456 = coor.cluster_regspace(dtrajs_nnn_456,max_centers=n_clusters,dmin=0.5) # In[19]: dtrajs_1D_234 = clustering_nnn_234.dtrajs dtrajs_1D_345 = clustering_nnn_345.dtrajs dtrajs_1D_456 = clustering_nnn_456.dtrajs # In[20]: # shift the cluster indices so they are all consistent cc_234 = clustering_nnn_234.clustercenters[:] cc_345 = clustering_nnn_345.clustercenters[:] cc_456 = clustering_nnn_456.clustercenters[:]
ss]) # prune the data to lighten the load # calculate the HMSM on subsets of the trajectories for traj_frac in range(Ntraj_0, Ntraj_f + 1): if (rank == 0): print 'Starting trajfrac ' + str(traj_frac) + ' of ' + str(Ntraj_sets) # get the subset dtraj_CN_act = dtraj_CN[traj_frac * Nparam_traj / Ntraj_sets:(traj_frac + 1) * Nparam_traj / Ntraj_sets] # clustering n_clusters = n_Estates # number of clusters clustering = coor.cluster_regspace(dtraj_CN_act, max_centers=n_clusters, dmin=dmin) save_object( 'clustering' + sys_nm + '_trajfrac-' + str(traj_frac) + '.pkl', clustering) # already did this, read it in #with open('clustering'+sys_nm+'_trajfrac-'+str(traj_frac)+'.pkl', 'rb') as f: # clustering = pickle.load(f) dtrajs = clustering.dtrajs cc = clustering.clustercenters[:, 0] print 'n_clusters = ' + str(len(cc)) else: dtrajs = None # send the dtraj info dtrajs = comm.bcast(dtrajs, root=0)
obs_data = [] obs_data.append(qdata) for i in range(num_pairs): dist = md.compute_distances(traj, [fit_pairs[i]], periodic=False)[:, 0] if inverse[i]: obs_data.append(1. / dist) else: obs_data.append(dist) #load the observable object that calculates the observables of a set of simulation data #do a simple discretizaiton fo the data into equilibrium distribution states. #In theory, the user will be able to specify any sort of equlibrium states for their data all_dist = np.array(obs_data).transpose() reg_space_obj = coor.cluster_regspace(all_dist, dmin=0.05) dtrajs = np.array(reg_space_obj.dtrajs)[0, :] assert np.min(dtrajs) == 0 assert np.shape(dtrajs)[0] == np.shape(data)[0] print "Number of equilibrium states are : %d" % (np.max(dtrajs)) equilibrium_frames = [] indices = np.arange(np.shape(data)[0]) for i in range(np.max(dtrajs) + 1): state_data = indices[dtrajs == i] if not state_data.size == 0: equilibrium_frames.append(state_data) total_check = 0 for set_of_frames in equilibrium_frames: total_check += len(set_of_frames) assert total_check == np.shape(data)[0]
plt.xlabel('time / ns') plt.ylabel('IC 3') plt.yticks(np.arange(-4, 6, 2)) # for shorter trajectory, ideal number of clusters is 100 # optimal lag_time = 750? # optimal lag_time = 1000 timesteps clustering = coor.cluster_kmeans(Y, k=100) dtrajs = clustering.dtrajs msm = pyemma.msm.estimate_markov_model(dtrajs, 380) pyemma.plots.plot_cktest(msm.cktest(3, err_est=True), marker='.') # TRIALS - reg_space clustering and kmeans comparison - kmeans by far better clustering_reg = coor.cluster_regspace(Y, dmin=2, max_centers=100) cr_x = clustering_reg.clustercenters[:, 0] cr_y = clustering_reg.clustercenters[:, 0] cc_x = clustering.clustercenters[:, 0] cc_y = clustering.clustercenters[:, 1] c_reg = [cr_x, cr_y] c = [cc_x, cc_y] print(len(clustering_reg.clustercenters)) fig, axes = plt.subplots(1, 2, figsize=(10, 4), sharex=True, sharey=True) for ax, cls in zip(axes.flat, [c, c_reg]): pyemma.plots.plot_density(xall, yall, ax=ax, cbar=False, alpha=0.1, logscale=True)
def test_exceptions(self): us_centers = [1.1, 1.3] us_force_constants = [1.0, 1.0] us_trajs = [ np.array([1.0, 1.1, 1.2, 1.1, 1.0, 1.1]), np.array([1.3, 1.2, 1.3, 1.4, 1.4, 1.3]) ] md_trajs = [ np.array([0.9, 1.0, 1.1, 1.2, 1.3, 1.4]), np.array([1.5, 1.4, 1.3, 1.4, 1.4, 1.5]) ] cluster = cluster_regspace(data=us_trajs + md_trajs, max_centers=10, dmin=0.15) us_dtrajs = cluster.dtrajs[:2] md_dtrajs = cluster.dtrajs[2:] # unmatching number of us trajectories / us parameters with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs[:-1], us_dtrajs, us_centers, us_force_constants) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs[:-1], us_centers, us_force_constants) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers[:-1], us_force_constants) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants[:-1]) # unmatching number of md trajectories with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=md_trajs[:-1], md_dtrajs=md_dtrajs) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=md_trajs, md_dtrajs=md_dtrajs[:-1]) # unmatchig trajectory lengths us_trajs_x = [ np.array([1.0, 1.1, 1.2, 1.1, 1.0]), np.array([1.3, 1.2, 1.3, 1.4, 1.4]) ] md_trajs_x = [ np.array([0.9, 1.0, 1.1, 1.2, 1.3]), np.array([1.5, 1.4, 1.3, 1.4, 1.4]) ] with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs_x, us_dtrajs, us_centers, us_force_constants) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=md_trajs_x, md_dtrajs=md_dtrajs) # unmatching md_trajs/md_dtrajs cases with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=None, md_dtrajs=md_dtrajs) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=md_trajs, md_dtrajs=None) # single trajectory cases with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs[0], us_dtrajs[0], us_centers[0], us_force_constants[0]) with self.assertRaises(ValueError): estimate_umbrella_sampling(us_trajs, us_dtrajs, us_centers, us_force_constants, md_trajs=md_trajs[0], md_dtrajs=md_dtrajs[0])
def setUp(self): self.input_trajs = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 1, 2], [3, 4, 5], [6, 7, 8]] self.cluster_obj = coor.cluster_regspace(data=self.input_trajs, dmin=.5)
#f = coor.featurizer(topfile) #f.add_distances_ca() ## load trajectories and colvar files #inp = coor.source(traj_list, f) #tica = coor.tica(inp, lag=500, dim=3, commute_map=True, kinetic_map=False, skip=20000, stride=10 ) col = import_colvar('mix') col_skip = [i[20000::10, 1] for i in col.col] #print('shape of col_skip is ', len(col_skip[0])) print('min and max of col_skip ', np.min(col_skip), np.max(col_skip)) clust_col_skip_obj = coor.cluster_regspace(col_skip, max_centers=1000, dmin=0.025) clust_col_skip_dtraj = clust_col_skip_obj.dtrajs #clust_col_skip_dtraj = pickle.load(open('clust_col_skip_dtraj_cl_full.pickle', 'rb')) pickle.dump(clust_col_skip_obj, open('clust_col_skip_obj_cl_full.pickle', 'wb'), protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(clust_col_skip_dtraj, open('clust_col_skip_dtraj_cl_full_try2.pickle', 'wb'), protocol=pickle.HIGHEST_PROTOCOL) print('length of clust_col_skip_dtraj is ', len(clust_col_skip_dtraj)) print('length of clust_col_skip_dtraj[0] is ', len(clust_col_skip_dtraj[0])) #Y = tica.get_output()
center = np.arange(0.7, 15.5, 0.1) center2 = center.tolist() colvar_list = [indir + "/comboCOLVAR{:2.1f}".format(i) for i in center] col = [np.loadtxt(f, skiprows=1) for f in colvar_list] length = len(center) print(length) force = 119.503 force_list = [500] * length ## Start doing stuff max_centers = 1500 dmin = 0.015 kt = 0.596 #cv = list(col[20000:,1]) cv = [i[20000::10, 1] for i in col] cv2 = [i.copy(order='C') for i in cv] us_cluster = coor.cluster_regspace(cv2, max_centers=max_centers, dmin=dmin) w = thermo.estimate_umbrella_sampling(cv2, us_cluster.dtrajs, center2, force_list, kT=2.496, maxiter=50000, lag=200, dt_traj='10 ps', save_convergence_info=200, estimator='dtram') pickle.dump(us_cluster, open(clust_out, 'wb')) pickle.dump(w, open(out, 'wb'))
def clusterRegularSpace(trajectories, dmin, stride=1): """ Cluster the trajectories using Regular Space clustering, which is a modified version of Hartigan's leader algorithm """ return coor.cluster_regspace(data=trajectories, dmin=dmin, stride=stride)