def main(): """Parse command line inputs, load up files, and build a movie.""" parser = arglib.ArgumentParser(description=""" Create an MSM movie by sampling a sequence of states and sampling a random conformation from each state in the sequence. """) parser.add_argument('project') parser.add_argument('assignments', default='Data/Assignments.Fixed.h5') parser.add_argument('tprob', default='Data/tProb.mtx') parser.add_argument('num_steps') parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''') parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory. The filetype suffix will be used to select the output file format.""") args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def run( tProb, start_state, steps, project, out_pdb, out_xtc ): state_traj = msm_analysis.sample( tProb, start_state, steps ) print "Sampled tProb." state_sizes = np.bincount( assignments[np.where(assignments!=-1)] ) # size of each state. which_ind_traj = [ np.random.randint( state_sizes[i] ) for i in state_traj ] # Random integer in each state in the traj uniq_states = np.unique( state_traj ) state_lookup = dict(zip(uniq_states, np.arange(uniq_states.shape[0]))) print "Translating to trajectory, frame pairs..." which_states = [ np.array( np.where( assignments == i ) ).T for i in uniq_states ] # The (traj,frame) list for each state which_traj = np.array([which_states[state_lookup[uniq_states[state_lookup[state]]]][i] for state, i in zip(state_traj, which_ind_traj)]) # Grab a random (traj,frame) for each state visited in the trajectory print "Loading frames from the Trajectory." traj = project.load_frame(which_traj[0][0], which_traj[0][1]) for i in range(1, len(which_traj)): traj += project.load_frame(which_traj[i][0], which_traj[i][1]) print i traj[0].save_to_pdb(out_pdb) traj.save_to_xtc(out_xtc) print "Saved output to %s and %s" % (out_pdb, out_xtc) return
def __init__(self): self.epsilon = 1E-7 self.alpha = 0.001 # Confidence for uncertainty estimate # Testing is stochastic; we expect errors 0.1 % of the time. self.max_lag = 100 self.times = np.arange(self.max_lag) self.num_steps = 100000 self.C = np.array([[500, 2], [2, 50]]) self.T = MSMLib.estimate_transition_matrix(self.C) self.state_traj = np.array(msm_analysis.sample(self.T, 0, self.num_steps))
def main(modeldir, start, type): start=int(start) data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0]) pdb=files[0] unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) T=mmread('%s/tProb.mtx' % modeldir) startstate=unbound[start] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) steps=100000 print "on start state %s" % startstate if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)): print "loading from states" traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)) else: traj=msm_analysis.sample(T, int(startstate),int(steps)) numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj) print "checking for chkpt file" checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate)) if len(checkfile) > 0: movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb) n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0]) os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0])) print "checkpointing at state index %s out of %s" % (n, len(traj)) checkfile=checkfile[0] restart=True else: restart=False n=0 movie=project.empty_traj() while n < len(traj): print "on state %s" % n state=int(traj[n]) t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10) if n==0: movie['XYZList']=t[0]['XYZList'] n+=1 continue elif n % 100==0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) if restart==True: os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0])) movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)) checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n) n+=1 continue elif n!=0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) n+=1 continue movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def raw_msm_correlation(trans_matrix, observable, assignments, n_steps=10000, starting_state=0): """Calculate an autocorrelation function from an MSM. Parameters ---------- trans_matrix : sparse or dense matrix Transition matrix observable : np.ndarray, shape=[n_trajs, max_n_frames] Value of the observable in each conf assignments : np.ndarray, shape=[n_trajs, max_n_frames] State membership for each conf n_steps : int Number of steps to simulate starting_state : int State to start the trajectory from Notes ----- This function works by first generating a 'sample' trajectory from the MSM. This approach is necessary as it allows treatment of intra-state dynamics. Returns ------- correlation : np.ndarray, shape=[n_steps] The autocorrelation of the observable traj : np.ndarray, shape=[n_steps] The simulated trajectory, represented as a sequence of states obs_traj : np.ndarray, shape=[n_steps] The observable signal, as a sequence of values from traj """ traj = msm_analysis.sample(trans_matrix, starting_state, n_steps) # NOTE: I'm not sure if this MaxInt is right. (RTM 9/6) MaxInt = np.ones(assignments.shape).sum() obs_traj = np.array([ observable[np.where(assignments == State)].take( [np.random.randint(MaxInt)], mode='wrap') for State in traj ]) corr = fft_acf(obs_traj) return corr, traj, obs_traj
def raw_msm_correlation(trans_matrix, observable, assignments, n_steps=10000, starting_state=0): """Calculate an autocorrelation function from an MSM. Parameters ---------- trans_matrix : sparse or dense matrix Transition matrix observable : np.ndarray, shape=[n_trajs, max_n_frames] Value of the observable in each conf assignments : np.ndarray, shape=[n_trajs, max_n_frames] State membership for each conf n_steps : int Number of steps to simulate starting_state : int State to start the trajectory from Notes ----- This function works by first generating a 'sample' trajectory from the MSM. This approach is necessary as it allows treatment of intra-state dynamics. Returns ------- correlation : np.ndarray, shape=[n_steps] The autocorrelation of the observable traj : np.ndarray, shape=[n_steps] The simulated trajectory, represented as a sequence of states obs_traj : np.ndarray, shape=[n_steps] The observable signal, as a sequence of values from traj """ traj = msm_analysis.sample(trans_matrix, starting_state, n_steps) # NOTE: I'm not sure if this MaxInt is right. (RTM 9/6) MaxInt = np.ones(assignments.shape).sum() obs_traj = np.array( [observable[ np.where( assignments == State ) ].take( [ np.random.randint( MaxInt ) ], mode='wrap' ) for State in traj ] ) corr = fft_acf(obs_traj) return corr, traj, obs_traj
def main(): """Parse command line inputs, load up files, and build a movie.""" args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states( assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def entry_point(): """Parse command line inputs, load up files, and build a movie.""" args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states( assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
triples = [ (0, 1, 2), # (waypoint, source, sink) to test (0, 1, 3), (0, 1, 4) ] # load in the transition matrx N = 11 T = np.transpose(np.genfromtxt('mat_1.dat')[:, :-3]) print(T) print(T.shape) print(T.sum(1)) # sample from it print("Making a len: %d traj" % steps) traj = msm_analysis.sample(T, np.random.randint(11), steps, force_dense=True) print("Generated traj") # count the fraction visits n_visited_waypoint = 0 n_notvisited_waypoint = 0 started = False visited = False for (waypoint, source, sink) in triples: for n, i in enumerate(traj): if n % 10000 == 0:
steps = 10**6 # sample steps triples = [ (0, 1, 2), # (waypoint, source, sink) to test (0, 1, 3), (0, 1, 4) ] # load in the transition matrx N = 11 T = np.transpose( np.genfromtxt('mat_1.dat')[:,:-3] ) print T print T.shape print T.sum(1) # sample from it print "Making a len: %d traj" % steps traj = msm_analysis.sample(T, np.random.randint(11), steps, force_dense=True) print "Generated traj" # count the fraction visits n_visited_waypoint = 0 n_notvisited_waypoint = 0 started = False visited = False for (waypoint, source, sink) in triples: for n,i in enumerate(traj): if n % 10000 == 0:
""" import os import sys import numpy as np import scipy.sparse from msmbuilder import io, msm_analysis, MSMLib from bayesmutant import SimpleMutantSampler P = np.loadtxt('base_transition_matrix.dat') mutant_transition_matrix = P + 0.2*scipy.sparse.rand(P.shape[0], P.shape[1], density=0.1).todense() mutant_transition_matrix /= np.sum(mutant_transition_matrix, axis=1) trajectory = np.array(msm_analysis.sample(P, 0, 5000)) base_counts = MSMLib.get_counts_from_traj(trajectory).todense() print 'base counts' print base_counts ms = SimpleMutantSampler(base_counts, mutant_transition_matrix) ms.step(5000) print 'observed counts' print ms.counts io.saveh('sampling.h5', base_counts=base_counts, samples=ms.samples, observed_counts=ms.counts, scores=ms.scores, transition_matrix=mutant_transition_matrix)
print "Cannot load msm data." tProb = mmread(args.tProb) try: raw_data = io.loadh(args.raw_data)["arr_0"] except: raw_data = io.loadh(args.raw_data)["Data"] num_frames = raw_data.shape[1] num_lagtimes = num_frames / args.lagtime # msm_acf = msm_analysis.msm_acf(tProb, msm_data, np.arange(num_lagtimes), # num_modes=args.num_modes) sampled_traj = msm_analysis.sample(tProb, np.random.randint(tProb.shape[0]), num_lagtimes) data_traj = msm_data[sampled_traj] msm_acf = autocorrelate.fft_autocorrelate(data_traj) raw_acfs = [] for i in xrange(np.max([raw_data.shape[0], 10])): max_non_neg = np.where(raw_data[i] != -1)[0].max() row = raw_data[i][: max_non_neg + 1] raw_acfs.append(autocorrelate.fft_autocorrelate(row)) figure() axes((0.18, 0.18, 0.72, 0.72)) raw_label = "Raw Data"
def _run_trial(arg_dict): # inject the arg_dict into the local namespace - may be a bad idea... for key in arg_dict.keys(): exec(key + " = arg_dict['" + key + "']") # initialize data structures to hold output distance_to_target = np.zeros(rounds_of_sampling) obs_distance = np.zeros(rounds_of_sampling) # the assignments array will hold all of the output of all simulations assignments = -1.0 * np.ones((rounds_of_sampling * simultaneous_samplers + 1, max(size_of_intial_data, length_of_sampling_trajs+1) )) # initialize the "true" transition matrix if not transition_matrix: assert num_states > 0 C_rand = np.random.randint( 0, 100, (num_states, num_states) ) C_rand += C_rand.T T = MSMLib.estimate_transition_matrix( C_rand ) else: T = transition_matrix num_states = T.shape[0] T = sparse.csr_matrix(T) msm_analysis.check_transition(T) if observable_function: try: obs_goal = observable_function(T) except Exception as e: print >> sys.stderr, e raise Exception("Error evaluating function: %s" % observable_function.__name__) assignments[0,:size_of_intial_data] = msm_analysis.sample(T, None, size_of_intial_data) # iterate, adding simulation time for sampling_round in range(rounds_of_sampling): # apply the adaptive sampling method - we need to be true to what a # real simulation would actually see for the counts matrix mod_assignments = assignments.copy() mapping = MSMLib.renumber_states( mod_assignments ) C_mod = MSMLib.get_count_matrix_from_assignments( mod_assignments ) T_mod = MSMLib.estimate_transition_matrix(C_mod) adaptive_sampling_multivariate = SamplerObject.sample(C_mod) # choose the states to sample from (in the original indexing) state_inds = np.arange(len(adaptive_sampling_multivariate)) sampler = stats.rv_discrete(name='sampler', values=[state_inds, adaptive_sampling_multivariate]) starting_states = sampler.rvs( size=simultaneous_samplers ) starting_states = mapping[starting_states] # start new 'simulations' in each of those states for i,init_state in enumerate(starting_states): a_ind = sampling_round * simultaneous_samplers + i + 1 s_ind = length_of_sampling_trajs + 1 assignments[a_ind,:s_ind] = msm_analysis.sample(T, init_state, s_ind) # build a new MSM from all the simulation so far C_raw = MSMLib.get_count_matrix_from_assignments( assignments, n_states=num_states ) C_raw = C_raw + C_raw.T # might want to add trimming, etc. T_pred = MSMLib.estimate_transition_matrix(C_raw) # calculate the error between the real transition matrix and our best prediction assert T.shape == T_pred.shape distance_to_target[sampling_round] = np.sqrt( ((T_pred - T).data ** 2).sum() ) \ / float(num_states) if observable_function: obs_distance[sampling_round] = np.abs(observable_function(T_mod) - obs_goal) return distance_to_target, obs_distance