def build_msm(self, lag_time=None): """Build an MSM from the loaded trajectories.""" if lag_time is None: lag_time = self.good_lag_time else: self.good_lag_time = lag_time # Do assignment trajs = get_data.get_shimtraj_from_trajlist(self.traj_list) metric = classic.Euclidean2d() # Allocate array n_trajs = len(self.traj_list) max_traj_len = max([t.shape[0] for t in self.traj_list]) assignments = -1 * np.ones((n_trajs, max_traj_len), dtype='int') # Prepare generators pgens = metric.prepare_trajectory(self.clusterer.get_generators_as_traj()) for i, traj in enumerate(trajs): ptraj = metric.prepare_trajectory(traj) for j in xrange(len(traj)): d = metric.one_to_all(ptraj, pgens, j) assignments[i, j] = np.argmin(d) counts = msml.get_count_matrix_from_assignments(assignments, n_states=None, lag_time=lag_time) rev_counts, t_matrix, populations, mapping = msml.build_msm(counts) return t_matrix
def test_1(self): C = MSMLib.get_count_matrix_from_assignments(self.assignments, 2) rc, t, p, m = MSMLib.build_msm(C, symmetrize="MLE", ergodic_trimming=True) eq(rc.todense(), np.matrix([[6.46159184, 4.61535527], [4.61535527, 2.30769762]]), decimal=4) eq(t.todense(), np.matrix([[0.58333689, 0.41666311], [0.66666474, 0.33333526]]), decimal=4) eq(p, np.array([0.61538595, 0.38461405]), decimal=5) eq(m, np.array([0, 1]))
def test_4(self): c, rc, t, p, m = MSMLib.build_msm(self.assignments, lag_time=2, symmetrize=None, sliding_window=True) npt.assert_array_equal(c.todense(), np.matrix('7 4; 3 2')) npt.assert_array_almost_equal(rc.todense(), np.matrix('7 4; 3 2')) npt.assert_array_almost_equal(t.todense(), np.matrix([[ 0.63636364, 0.36363636], [ 0.6, 0.4]])) assert p is None npt.assert_array_equal(m, [0,1])
def test_3(self): c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize='Transpose') npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2')) npt.assert_array_almost_equal(rc.todense(), np.matrix([[ 7, 4.5], [ 4.5, 2]])) npt.assert_array_almost_equal(t.todense(), np.matrix([[ 0.60869565, 0.39130435], [ 0.69230769, 0.30769231]])) npt.assert_array_almost_equal(p, [ 0.63888889, 0.36111111]) npt.assert_array_equal(m, [0,1])
def test_2(self): c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize=None) npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2')) npt.assert_array_almost_equal(rc.todense(), np.matrix([[ 7, 5], [ 4, 2]])) npt.assert_array_almost_equal(t.todense(), np.matrix([[ 0.58333333, 0.41666667], [ 0.66666667, 0.33333333]])) assert p is None npt.assert_array_equal(m, [0,1])
def test_1(self): c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize='MLE') npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2')) npt.assert_array_almost_equal(rc.todense(), np.matrix([[ 6.46159184, 4.61535527], [ 4.61535527, 2.30769762]])) npt.assert_array_almost_equal(t.todense(), np.matrix([[ 0.58333689, 0.41666311], [ 0.66666474, 0.33333526]])) npt.assert_array_almost_equal(p, [ 0.61538595, 0.38461405]) npt.assert_array_equal(m, [0,1])
def parallel_get_matrix(input): print "working" (Ttest, multinom, NumStates)=input newT=scipy.sparse.lil_matrix((int(NumStates),int(NumStates)),dtype='float32') for i in range(0, Ttest.shape[1]): transitions = numpy.row_stack((numpy.array([i]*NumStates),numpy.arange(0, NumStates))) pvals=numpy.array([x/sum(Ttest[i]) for x in Ttest[i]]) counts=numpy.random.multinomial(int(multinom), pvals, size=1) newT=newT+scipy.sparse.coo_matrix((counts[0], transitions),shape=(NumStates,NumStates)) rev_counts, t_matrix, Populations, Mapping = MSMLib.build_msm(newT, symmetrize='MLE', ergodic_trimming=True) return rev_counts, t_matrix, Populations, Mapping
def msm(traj_list, n_clusters, n_medoid_iters=10, lag_time=1, distance_cutoff=None): """Use classic clustering methods.""" print "Building a classic MSM" hkm = cluster(traj_list, n_clusters, n_medoid_iters, distance_cutoff) # centroids = hkm.get_generators_as_traj() # centroids_nf = centroids['XYZList'][:, 0, 0:dim] counts = msml.get_count_matrix_from_assignments(hkm.get_assignments(), n_clusters, lag_time) rev_counts, t_matrix, populations, mapping = msml.build_msm(counts) return t_matrix
def test_2_point_1(self): "This doesn't work" # same as test_2, just with get_populations=True c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize=None, get_populations=True) npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2')) npt.assert_array_almost_equal(rc.todense(), np.matrix([[ 7, 5], [ 4, 2]])) npt.assert_array_almost_equal(t.todense(), np.matrix([[ 0.58333333, 0.41666667], [ 0.66666667, 0.33333333]])) npt.assert_array_almost_equal(p, [ 0.61538595, 0.38461405]) npt.assert_array_equal(m, [0,1])
def run(lagtime, assignments, symmetrize='MLE', input_mapping="None", trim=True, out_dir="./Data/"): # set the filenames for output FnTProb = os.path.join(out_dir, "tProb.mtx") FnTCounts = os.path.join(out_dir, "tCounts.mtx") FnMap = os.path.join(out_dir, "Mapping.dat") FnAss = os.path.join(out_dir, "Assignments.Fixed.h5") FnPops = os.path.join(out_dir, "Populations.dat") # make sure none are taken outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops] arglib.die_if_path_exists(outputlist) # Check for valid lag time assert lagtime > 0, 'Please specify a positive lag time.' # if given, apply mapping to assignments if input_mapping != "None": MSMLib.apply_mapping_to_assignments(assignments, input_mapping) n_assigns_before_trim = len(np.where(assignments.flatten() != -1)[0]) counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lagtime, sliding_window=True) rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=trim) if trim: MSMLib.apply_mapping_to_assignments(assignments, mapping) n_assigns_after_trim = len(np.where(assignments.flatten() != -1)[0]) # if had input mapping, then update it if input_mapping != "None": mapping = mapping[input_mapping] # Print a statement showing how much data was discarded in trimming percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0 logger.warning("Ergodic trimming discarded: %f percent of your data", percent) else: logger.warning("No ergodic trimming applied") # Save all output np.savetxt(FnPops, populations) np.savetxt(FnMap, mapping, "%d") scipy.io.mmwrite(str(FnTProb), t_matrix) scipy.io.mmwrite(str(FnTCounts), rev_counts) io.saveh(FnAss, assignments) for output in outputlist: logger.info("Wrote: %s", output) return
def run(LagTime, assignments, Symmetrize='MLE', input_mapping="None", Prior=0.0, OutDir="./Data/"): # set the filenames for output FnTProb = os.path.join(OutDir, "tProb.mtx") FnTCounts = os.path.join(OutDir, "tCounts.mtx") FnMap = os.path.join(OutDir, "Mapping.dat") FnAss = os.path.join(OutDir, "Assignments.Fixed.h5") FnPops = os.path.join(OutDir, "Populations.dat") # make sure none are taken outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops] arglib.die_if_path_exists(outputlist) # if given, apply mapping to assignments if input_mapping != "None": MSMLib.apply_mapping_to_assignments(assignments, input_mapping) n_states = np.max(assignments.flatten()) + 1 n_assigns_before_trim = len( np.where( assignments.flatten() != -1 )[0] ) rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(assignments, lag_time=LagTime, symmetrize=Symmetrize, sliding_window=True, trim=True) MSMLib.apply_mapping_to_assignments(assignments, mapping) n_assigns_after_trim = len( np.where( assignments.flatten() != -1 )[0] ) # if had input mapping, then update it if input_mapping != "None": mapping = mapping[input_mapping] # Print a statement showing how much data was discarded in trimming percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0 logger.warning("Ergodic trimming discarded: %f percent of your data", percent) # Save all output np.savetxt(FnPops, populations) np.savetxt(FnMap, mapping,"%d") scipy.io.mmwrite(str(FnTProb), t_matrix) scipy.io.mmwrite(str(FnTCounts), rev_counts) msmbuilder.io.saveh(FnAss, assignments) for output in outputlist: logger.info("Wrote: %s", output) return
def build_classic_from_memberships(memberships, lag_time=1): """Build a classic msm by turning a membership array into a state list. This function uses msmbuilder code to calculate the count matrix. Use this for compairing quantized versions of the fuzzy count matrix building for consistency. """ states = np.zeros(memberships.shape[0], dtype='int') n_states = memberships.shape[1] for i in xrange(memberships.shape[0]): memb = memberships[i] state = np.argmax(memb) states[i] = state counts = msm.get_counts_from_traj(states, n_states, lag_time) rev_counts, t_matrix, populations, mapping = msm.build_msm(counts) return rev_counts, t_matrix, populations, mapping
def classic(trajs, n_clusters, n_medoid_iters, metric, dim=2, lag_time=1, show=False, desc=None): """Use classic clustering methods.""" if desc is None: desc = "Classic, n_clusters=%d" % n_clusters hkm = clustering.HybridKMedoids(metric, trajs, k=n_clusters, local_num_iters=n_medoid_iters) centroids = hkm.get_generators_as_traj() centroids_nf = centroids['XYZList'][:, 0, 0:dim] plot_centroids(centroids_nf) if show: pp.show() counts = msml.get_count_matrix_from_assignments(hkm.get_assignments(), n_clusters, lag_time) rev_counts, t_matrix, populations, mapping = msml.build_msm(counts) analyze_msm(t_matrix, centroids_nf, desc, show=show) return t_matrix
def get_eigenvalues( count_matrix ): bad_states = np.array(np.where( count_matrix.sum(axis=1) == 0 )[0]).flatten() i_ary = count_matrix.nonzero()[0] j_ary = count_matrix.nonzero()[1] i_ary = np.concatenate( (i_ary, bad_states) ) j_ary = np.concatenate( (j_ary, bad_states) ) new_data = np.concatenate( (count_matrix.data, np.ones(len(bad_states))) ) print i_ary.shape, count_matrix.data.shape, new_data.shape, len(bad_states) count_matrix = scipy.sparse.csr_matrix( (new_data, (i_ary, j_ary)) ) #count_matrix = count_matrix.tolil() #count_matrix[(bad_states, bad_states)] = 1 #count_matrix = count_matrix.tocsr() print count_matrix.data.shape, count_matrix.nonzero()[0].shape #NZ = np.array(count_matrix.nonzero()).T #keep_ind = [] #for i in xrange(len(NZ)): # if NZ[i][0] in bad_states or NZ[i][1] in bad_states: # pass # else: # keep_ind.append(i) #keep_ind = np.array(keep_ind) #N = NZ.max()+1 #count_matrix = scipy.sparse.csr_matrix( (np.array(count_matrix.data)[keep_ind], NZ[keep_ind].T), shape=(N,N), copy=True ) try: t_matrix = MSMLib.build_msm(count_matrix, symmetrize=args.symmetrize)[1] except: return None vals = msm_analysis.get_eigenvectors(t_matrix, args.num_vals, epsilon=1)[0] vals.sort() return vals[::-1]
def __init__(self): super(FourStateTmat, self).__init__() counts = [ [100, 30, 1, 1], [30, 100, 1, 1], [3, 3, 100, 30], [3, 3, 30, 100] ] counts = np.array(counts) counts = scipy.sparse.csr_matrix(counts, dtype=np.int) rev_counts, tmat, populations, mapping = msml.build_msm( counts, symmetrize='MLE', ergodic_trimming=True) self.n_states = tmat.shape[0] self.tmat = tmat self.counts = counts self.rev_counts = rev_counts self.step_func = self.step_sparse
def build_from_memberships(memberships, lag_time=1): """Build an MSM from a time array of membership vectors.""" # Sliding window from_states = memberships[:-lag_time: 1] to_states = memberships[lag_time:: 1] assert len(from_states) == len(to_states) n_pairs = len(from_states) n_times = 2 n_clusters = memberships.shape[1] pairs = np.zeros((n_pairs, n_times, n_clusters)) pairs[:, 0, :] = from_states pairs[:, 1, :] = to_states counts = get_counts_from_pairs(pairs, n_clusters) rev_counts, t_matrix, populations, mapping = msm.build_msm(counts, ergodic_trimming=False) return rev_counts, t_matrix, populations, mapping
def build_new(centroids, trajs, fuzziness, dist, soft=True, neigen=4, show=False, desc=None): """Build an MSM from points and centroids. First this function generates membership vectors. if soft is False, 'Quantize' the membership vectors to mirror the hard clustering case, else use the fuzzy nature of the clusters in building the MSM. """ n_states = len(centroids) time_pairs = get_giant_state_list(centroids, trajs, fuzziness, dist, soft=soft) print("Got state list") counts_mat = buildmsm.get_counts_from_pairs(time_pairs, n_states) print("Got count matrix") rev_counts, t_matrix, populations, mapping = msml.build_msm(counts_mat) if desc is None: if soft: desc = 'New, Fuzzy' else: desc = 'New, not-so-fuzzy' analyze_msm(t_matrix, centroids, desc=desc, show=show, neigen=neigen)
def set_coordinate_as_committors(self, lag_time=1, symmetrize='transpose'): """ Set the reaction coordinate to be the committors (pfolds). Employs the reactant, product states provided as the sources, sinks respectively for the committor calculation. Parameters ---------- lag_time : int The MSM lag time to use (in units of frames) in the estimation of the MSM transition probability matrix from the `counts` matrix. symmetrize : str {'mle', 'transpose', 'none'} Which symmetrization method to employ in the estimation of the MSM transition probability matrix from the `counts` matrix. """ t_matrix = MSMLib.build_msm(self.counts, symmetrize) self.reaction_coordinate_values = tpt.calculate_committors([self.reactant], [self.product], t_matrix) return
def __init__(self, link_prob_f, link_prob_b): super(EightStateTmat, self).__init__() n = self.n_states * 2 # Do outer product connecty_mat = np.array([ [1.0, link_prob_b], [link_prob_f, 1.0] ]) double_counts = np.multiply.outer(connecty_mat, self.counts.todense()) # Turn it into a 2d matrix double_counts = np.swapaxes(double_counts, 1, 2) double_counts = np.reshape(double_counts, (n, n)) double_counts = scipy.sparse.csr_matrix(double_counts, dtype=np.int) rev_counts, tmat, populations, mapping = msml.build_msm( double_counts, symmetrize='MLE', ergodic_trimming=True) # Record that we now have twice as many states self.n_states = n self.tmat = tmat self.counts = double_counts self.rev_counts = rev_counts
ratemtx_fn = pjoin(args.outdir, "K.mtx") tcounts_fn = pjoin(args.outdir, "tCounts.mtx") unsym_fn = pjoin(args.outdir, "tCounts.UnSym.mtx") mapping_fn = pjoin(args.outdir, "Mapping.dat") fixed_fn = pjoin(args.outdir, "Assignments.Fixed.h5") pops_fn = pjoin(args.outdir, "Populations.dat") if not os.path.exists(args.outdir): os.mkdir(args.outdir) outlist = [ratemtx_fn, tcounts_fn, unsym_fn, fixed_fn, pops_fn] for e in outlist: arglib.die_if_path_exists(e) # if lag time is not one, there's going to be a unit mispatch between # what you get and what you're expecting. lag_time = 1 counts, rev_counts, t_matrix, populations, mapping = MSMLib.build_msm( assignments, lag_time=lag_time, symmetrize=args.symmetrize ) K = MSMLib.estimate_rate_matrix(rev_counts, assignments) np.savetxt(pops_fn, populations) np.savetxt(mapping_fn, mapping, "%d") scipy.io.mmwrite(ratemtx_fn, K) scipy.io.mmwrite(tcounts_fn, rev_counts) scipy.io.mmwrite(unsym_fn, counts) Serializer.SaveData(fixed_fn, assignments) for e in outlist: logger.info("Saved %s" % e)
def get_implied_timescales_helper(args): """Helper function to compute implied timescales with multiprocessing Does not work in interactive mode Parameters ---------- assignments_fn : str Path to Assignments.h5 file on disk n_states : int Number of states lag_time : list List of lag times to calculate the timescales at n_implied_times : int, optional Number of implied timescales to calculate at each lag time sliding_window : bool, optional Use sliding window trimming : bool, optional Use ergodic trimming symmetrize : {'MLE', 'Transpose', None} Symmetrization method Returns ------- lagTimes : ndarray vector of lag times impTimes : ndarray vector of implied timescales See Also -------- MSMLib.build_msm get_eigenvectors """ assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args try: assignments = io.loadh(assignments_fn, 'arr_0') except KeyError: assignments = io.loadh(assignments_fn, 'Data') try: from msmbuilder import MSMLib counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, sliding_window=sliding_window) rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming) except ValueError as e: logger.critical(e) sys.exit(1) #TJL: set Epsilon high, should not raise err here n_eigenvectors = n_implied_times + 1 e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0] # make sure to leave off equilibrium distribution lag_times = lag_time * np.ones((n_implied_times)) imp_times = -lag_times / np.log(e_values[1: n_eigenvectors]) # save intermediate result in case of failure # res = np.zeros((n_implied_times, 2)) # res[:,0] = lag_times # res[:,1] = np.real(imp_times) return (lag_times, imp_times)
def get_implied_timescales_helper(args): """Helper function to compute implied timescales with multiprocessing Does not work in interactive mode Parameters ---------- assignments_fn : str Path to Assignments.h5 file on disk n_states : int Number of states lag_time : list List of lag times to calculate the timescales at n_implied_times : int, optional Number of implied timescales to calculate at each lag time sliding_window : bool, optional Use sliding window trimming : bool, optional Use ergodic trimming symmetrize : {'MLE', 'Transpose', None} Symmetrization method Returns ------- lagTimes : ndarray vector of lag times impTimes : ndarray vector of implied timescales See Also -------- MSMLib.build_msm get_eigenvectors """ assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args logger.info("Calculating implied timescales at lagtime %d" % lag_time) try: assignments = io.loadh(assignments_fn, "arr_0") except KeyError: assignments = io.loadh(assignments_fn, "Data") try: from msmbuilder import MSMLib counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, sliding_window=sliding_window) rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming) except ValueError as e: logger.critical(e) sys.exit(1) n_eigenvectors = n_implied_times + 1 if symmetrize in ["MLE", "Transpose"]: e_values = get_reversible_eigenvectors(t_matrix, n_eigenvectors, populations=populations)[0] else: e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0] # Correct for possible change in n_eigenvectors from trimming n_eigenvectors = len(e_values) n_implied_times = n_eigenvectors - 1 # make sure to leave off equilibrium distribution lag_times = lag_time * np.ones((n_implied_times)) imp_times = -lag_times / np.log(e_values[1:n_eigenvectors]) return (lag_times, imp_times)
def run(lag_time, assignments_list, symmetrize='MLE', input_mapping="None", out_dir="./Data/"): # set the filenames for output tProb_fn = os.path.join(out_dir, "tProb.mtx") tCounts_fn = os.path.join(out_dir, "tCounts.mtx") map_fn = os.path.join(out_dir, "Mapping.dat") pops_fn = os.path.join(out_dir, "Populations.dat") if len(assignments_list) == 1: assignments_fn_list = [os.path.join(out_dir, "Assignments.Fixed.h5")] else: assignments_fn_list = [os.path.join(out_dir, "Assignments.Fixed.%d.h5" % i) for i in xrange(len(assignments_list))] # make sure none are taken output_list = [tProb_fn, tCounts_fn, map_fn, pops_fn] + assignments_fn_list arglib.die_if_path_exists(output_list) # if given, apply mapping to assignments for i in xrange(len(assignments_list)): if input_mapping != "None": MSMLib.apply_mapping_to_assignments(assignments_list[i], input_mapping) n_assigns_before_trim = get_num_assignments(assignments_list) #num_states = np.unique(np.concatenate([ np.unique(ass[np.where(ass != -1)]) # for ass in assignments_list])).shape[0] num_states = np.max([np.max(ass) for ass in assignments_list]) + 1 counts = MSMLib.get_count_matrix_from_assignments(assignments_list[0], n_states=None, lag_time=lag_time, sliding_window=False) for i in xrange(1, len(assignments_list)): print i counts = counts + \ MSMLib.get_count_matrix_from_assignments(assignments_list[i], n_states=num_states, lag_time=lag_time, sliding_window=False) rev_counts, t_matrix, populations, mapping = \ MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=True) for i in xrange(len(assignments_list)): MSMLib.apply_mapping_to_assignments(assignments_list[i], mapping) n_assigns_after_trim = get_num_assignments(assignments_list) # if had input mapping, then update it if input_mapping != "None": mapping = mapping[input_mapping] # Print a statement showing how much data was discarded in trimming percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0 logger.warning("Ergodic trimming discarded: " "%f percent of your data", percent) # Save all output scipy.io.mmwrite(tProb_fn, t_matrix) scipy.io.mmwrite(tCounts_fn, rev_counts) np.savetxt(map_fn, mapping, "%d") np.savetxt(pops_fn, populations) for i in xrange(len(assignments_fn_list)): assignments_fn = assignments_fn_list[i] assignments = assignments_list[i] msmbuilder.io.saveh(assignments_fn, assignments) for output in output_list: logger.info("Wrote: %s", output) return