def main(): global data2d global As # First I need to turn the assignments matrix into a 1D list of assignments sys.stdout = os.fdopen(sys.stdout.fileno(),'w',0) print "Reading in Assignments... from %s " % options.assFN As = io.loadh(options.assFN)['arr_0'].astype(int) print "Reading in data... from %s " % options.dataFN try: f = io.loadh( options.dataFN ) try: data2d = f['arr_0'] except: data2d = f['Data'] except: data = load(options.dataFN) proj = Project.load_from( options.projFN ) data2d = msmTools.reshapeRawData( data, proj ) print "Calculating averages for:" pool = mp.Pool(options.procs) clusters = range( As.max() + 1) result = pool.map_async(calcAvg,clusters[:]) result.wait() sol = result.get() sol = array(sol) savetxt(options.outFN, sol) return
def main(modeldir, genfile, type): project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) data=dict() pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0]) data['rmsd']=data['rmsd'][frames] com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com']) data['com']=data['com'][frames] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type)) for p in range(0, 20): movie=project.empty_traj() path=paths['Paths'][p] flux=paths['fluxes'][p]/paths['fluxes'][0] if flux < 0.2: break print "flux %s" % flux frames=numpy.where(path!=-1)[0] path=numpy.array(path[frames], dtype=int) for (n, state) in enumerate(path): t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 20) if n==0: movie['XYZList']=t[0]['XYZList'] else: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) movie.save_to_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p))
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize, nProc, output): arglib.die_if_path_exists(output) # Setup some model parameters try: Assignments = io.loadh(AssignmentsFn, 'arr_0') except KeyError: Assignments = io.loadh(AssignmentsFn, 'Data') NumStates = max(Assignments.flatten()) + 1 if NumStates <= NumEigen - 1: NumEigen = NumStates - 2 logger.warning( "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues." ) del Assignments logger.info("Getting %d eigenvalues (timescales) for each lagtime...", NumEigen) lagTimes = range(MinLagtime, MaxLagtime + 1, Interval) logger.info("Building MSMs at the following lag times: %s", lagTimes) # Get the implied timescales (eigenvalues) impTimes = msm_analysis.get_implied_timescales(AssignmentsFn, lagTimes, n_implied_times=NumEigen, sliding_window=True, symmetrize=symmetrize, n_procs=nProc) numpy.savetxt(output, impTimes) return
def test_2(self): try: subprocess.Popen('ipcluster start --cluster-id=testclusterid --n=1 --daemonize', shell=True) time.sleep(5) args = self.Args() args.output_dir = tempfile.mkdtemp() args.cluster_id = 'testclusterid' logger = AssignParallel.setup_logger() AssignParallel.main(args, self.metric, logger) assignments = io.loadh(os.path.join(args.output_dir, 'Assignments.h5'), 'arr_0') r_assignments = io.loadh(os.path.join(fixtures_dir(), 'Assignments.h5'), 'Data') distances = io.loadh(os.path.join(args.output_dir, 'Assignments.h5.distances'), 'arr_0') r_distances = io.loadh(os.path.join(fixtures_dir(), 'Assignments.h5.distances'), 'Data') npt.assert_array_equal(assignments, r_assignments) npt.assert_array_almost_equal(distances, r_distances) except: raise finally: shutil.rmtree(args.output_dir) subprocess.Popen('ipcluster stop --cluster-id=testclusterid', shell=True).wait()
def main(): """Parse command line inputs, load up files, and build a movie.""" parser = arglib.ArgumentParser(description=""" Create an MSM movie by sampling a sequence of states and sampling a random conformation from each state in the sequence. """) parser.add_argument('project') parser.add_argument('assignments', default='Data/Assignments.Fixed.h5') parser.add_argument('tprob', default='Data/tProb.mtx') parser.add_argument('num_steps') parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''') parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory. The filetype suffix will be used to select the output file format.""") args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def test_2(self): try: subprocess.Popen( 'ipcluster start --cluster-id=testclusterid --n=1 --daemonize', shell=True) time.sleep(5) args = self.Args() args.output_dir = tempfile.mkdtemp() args.cluster_id = 'testclusterid' logger = AssignParallel.setup_logger() AssignParallel.main(args, self.metric, logger) assignments = io.loadh( os.path.join(args.output_dir, 'Assignments.h5'), 'arr_0') r_assignments = io.loadh( os.path.join(fixtures_dir(), 'Assignments.h5'), 'Data') distances = io.loadh( os.path.join(args.output_dir, 'Assignments.h5.distances'), 'arr_0') r_distances = io.loadh( os.path.join(fixtures_dir(), 'Assignments.h5.distances'), 'Data') npt.assert_array_equal(assignments, r_assignments) npt.assert_array_almost_equal(distances, r_distances) except: raise finally: shutil.rmtree(args.output_dir) subprocess.Popen('ipcluster stop --cluster-id=testclusterid', shell=True).wait()
def run(MinLagtime, MaxLagtime, Interval, NumEigen, AssignmentsFn, symmetrize, nProc, output): arglib.die_if_path_exists(output) # Setup some model parameters try: Assignments = io.loadh(AssignmentsFn, "arr_0") except KeyError: Assignments = io.loadh(AssignmentsFn, "Data") NumStates = max(Assignments.flatten()) + 1 if NumStates <= NumEigen - 1: NumEigen = NumStates - 2 logger.warning( "Number of requested eigenvalues exceeds the rank of the transition matrix! Defaulting to the maximum possible number of eigenvalues." ) del Assignments logger.info("Getting %d eigenvalues (timescales) for each lagtime...", NumEigen) lagTimes = range(MinLagtime, MaxLagtime + 1, Interval) logger.info("Building MSMs at the following lag times: %s", lagTimes) # Get the implied timescales (eigenvalues) impTimes = msm_analysis.get_implied_timescales( AssignmentsFn, lagTimes, n_implied_times=NumEigen, sliding_window=True, symmetrize=symmetrize, n_procs=nProc ) numpy.savetxt(output, impTimes) return
def load(tica_fn, metric): """ load a tICA solution to use in projecting data. Parameters: ----------- tica_fn : str filename pointing to tICA solutions metric : metrics.Vectorized subclass instance metric used to prepare trajectories """ # the only variables we need to save are the two matrices # and the eigenvectors / values as well as the lag time logger.warn("NOTE: You can only use the tICA solution, you will " "not be able to continue adding data") f = io.loadh(tica_fn) tica_obj = tICA(f['lag'][0], prep_metric=metric) # lag entry is an array... with a single item tica_obj.timelag_corr_mat = f['timelag_corr_mat'] tica_obj.cov_mat = f['cov_mat'] tica_obj.vals = f['vals'] tica_obj.vecs = f['vecs'] tica_obj._sort() return tica_obj
def run(tProb, observable, init_pops=None, num_vecs=10, output='evec_amps.h5'): if init_pops is None: init_pops = np.ones(tProb.shape[0]).astype(float) / float(tProb.shape[0]) else: init_pops = init_pops.astype(float) init_pops /= init_pops.sum() assert (observable.shape[0] == init_pops.shape[0]) assert (observable.shape[0] == tProb.shape[0]) try: f = io.loadh('eigs%d.h5' % num_vecs) vals = f['vals'] vecsL = f['vecs'] except: vals, vecsL = msm_analysis.get_eigenvectors(tProb, num_vecs + 1, right=False) io.saveh('eigs%d.h5' % num_vecs, vals=vals, vecs=vecsL) equil = vecsL[:,0] / vecsL[:,0].sum() dyn_vecsL = vecsL[:, 1:] # normalize the left and right eigenvectors dyn_vecsL /= np.sqrt(np.sum(dyn_vecsL * dyn_vecsL / np.reshape(equil, (-1, 1)), axis=0)) dyn_vecsR = dyn_vecsL / np.reshape(equil, (-1, 1)) amps = dyn_vecsL.T.dot(observable) * dyn_vecsR.T.dot(init_pops) io.saveh(output, evals=vals[1:], amplitudes=amps) logger.info("saved output to %s" % output)
def main(file): ass=io.loadh(file) dir=os.path.dirname(file) base=os.path.basename(file) newdir='%s/subsample' % dir if not os.path.exists(newdir): os.mkdir(newdir) p=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) data=dict() totals=dict() iterations=int(ass['arr_0'].shape[1]/10.0) start=max(p.traj_lengths) for iter in range(0, iterations): new=start-10 if new < 10: break totals[new]=0 data[new]=-numpy.ones((ass['arr_0'].shape[0], new), dtype=int) for i in range(0, ass['arr_0'].shape[0]): data[new][i]=ass['arr_0'][i][:new] frames=numpy.where(data[new][i]!=-1)[0] totals[new]+=len(frames) start=new ohandle=open('%s/times.h5' % (newdir), 'w') for key in sorted(data.keys()): print data[key].shape print "total time is %s" % totals[key] ohandle.write('%s\t%s\t%s\n' % (data[key].shape[0], data[key].shape[1], totals[key]))
def load_from(cls, filename): """ Load project from disk Parameters ---------- filename : string filename_or_file can be a path to a legacy .h5 or current .yaml file. Returns ------- project : the loaded project object """ rootdir = os.path.abspath(os.path.dirname(filename)) if filename.endswith('.yaml'): with open(filename) as f: ondisk = yaml.load(f) records = { 'conf_filename': ondisk['conf_filename'], 'traj_lengths': [], 'traj_paths': [], 'traj_converted_from': [], 'traj_errors': [] } for trj in ondisk['trajs']: records['traj_lengths'].append(trj['length']) records['traj_paths'].append(trj['path']) records['traj_errors'].append(trj['errors']) records['traj_converted_from'].append( trj['converted_from']) elif filename.endswith('.h5'): ondisk = io.loadh(filename, deferred=False) n_trajs = len(ondisk['TrajLengths']) records = { 'conf_filename': str(ondisk['ConfFilename'][0]), 'traj_lengths': ondisk['TrajLengths'], 'traj_paths': [], 'traj_converted_from': [None] * n_trajs, 'traj_errors': [None] * n_trajs } for i in xrange(n_trajs): # this is the convention used in the hdf project format to get the traj paths path = os.path.join( ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0]) records['traj_paths'].append(path) else: raise ValueError('Sorry, I can only open files in .yaml' ' or .h5 format: %s' % filename) return cls(records, validate=True, project_dir=rootdir)
def main(assfile, lag, nproc): lag=int(lag) nproc=int(nproc) Assignments=io.loadh(assfile) num=int(assfile.split('Assignments_sub')[1].split('.h5')[0]) dir=os.path.dirname(assfile) newdir='%s/boot-sub%s' % (dir, num) ref_sub=numpy.loadtxt('%s/times.h5' % dir, usecols=(1,)) ref_total=numpy.loadtxt('%s/times.h5' % dir, usecols=(2,)) times=dict() for (i,j) in zip(ref_sub, ref_total): times[i]=j proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) multinom=int(times[num]) if not os.path.exists(newdir): os.mkdir(newdir) if 'Data' in Assignments.keys(): Assignments=Assignments['Data'] else: Assignments=Assignments['arr_0'] print Assignments.shape NumStates = max(Assignments.flatten()) + 1 Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True) Counts=Counts.todense() Counts=Counts*(1.0/lag) T=numpy.array(Counts) frames=numpy.where(T==0) T[frames]=1 Popsample=dict() iteration=0 total_iteration=100/nproc print "%s total iterations" % total_iteration if 100 % nproc != 0: remain=100 % nproc else: remain=False print "iterating thru tCount samples" count=0 while iteration < 100: if count*nproc > 100: nproc=remain print "sampling iteration %s" % iteration Tfresh=T.copy() input = zip([Tfresh]*nproc, [multinom]*nproc, range(0, NumStates)) pool = multiprocessing.Pool(processes=nproc) result = pool.map_async(parallel_get_matrix, input) result.wait() all = result.get() pool.terminate() for c_matrix in all: scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), c_matrix) #rev_counts, t_matrix, Populations, Mapping=x #scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix) #numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations) #numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping) iteration+=1 count+=1 print "dont with iteration %s" % iteration*nproc
def main(modeldir, genfile, type, write=False): proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0]) data['rmsd']=data['rmsd'][frames] com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com'][frames]) residues=['F36', 'H87', 'I56', 'I90', 'W59', 'Y82', 'hydrophob_dist', 'oxos_dist'] loops=['loop1', 'loop2', 'loop3'] for loop in loops: data[loop]=numpy.loadtxt('%s.%srmsd.dat' % (genfile.split('.lh5')[0], loop)) data[loop]=data[loop][frames] for res in residues: file='%s_%spair.dat' % (genfile.split('.lh5')[0], res) if os.path.exists(file): data[res]=numpy.loadtxt(file) data[res]=data[res][frames] angles=['phi', 'omega'] for ang in angles: file='%s_%s.dat' % (genfile.split('.lh5')[0], ang) if os.path.exists(file): data[ang]=numpy.loadtxt(file) data[ang]=data[ang][frames] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) bound=numpy.loadtxt('%s/tpt-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int) Tdense=T.todense() Tdata=dict() for i in unbound: for j in unbound: if Tdense[i,j]!=0: if i not in Tdata.keys(): Tdata[i]=[] Tdata[i].append(j) #print Tdata cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive Q=tpt.calculate_committors(unbound, bound, T) ohandle=open('%s/commitor_states.txt' % modeldir, 'w') for i in range(0,len(Q)): if Q[i]>0.40 and Q[i]<0.6: ohandle.write('%s\n' % i) #t=project.get_random_confs_from_states(ass['arr_0'], [int(i),], 20) #t[0].save_to_xtc('%s/commottor_state%s.xtc' % (modeldir, i)) if write==True: for op in sorted(data.keys()): pylab.figure() pylab.scatter(data['com'], data[op], c=Q, cmap=cm, alpha=0.7, s=[map_size(i) for i in Q]) pylab.xlabel('L RMSD') pylab.ylabel(op) pylab.colorbar() pylab.show()
def main(modeldir, genfile, type, write=False): data=dict() pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] unbound=numpy.loadtxt('%s/tpt-rmsd-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) bound=numpy.loadtxt('%s/tpt-rmsd-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int) dir=modeldir.split('Data')[0] name=glob.glob('%s/fkbp*xtal*pdb' % dir) pdb=Trajectory.load_from_pdb(name[0]) paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type)) committors=numpy.loadtxt('%s/commitor_states.txt' % modeldir, dtype=int) colors=['red', 'orange', 'green', 'cyan', 'blue', 'purple'] colors=colors*40 if type=='strict': ref=5 elif type=='super-strict': ref=3 elif type=='medium': ref=10 elif type=='loose': ref=15 #for p in range(0, 3): for p in range(0, 1): path=paths['Paths'][p] print "Bottleneck", paths['Bottlenecks'][p] flux=paths['fluxes'][p]/paths['fluxes'][0] if flux < 0.2: break print "flux %s" % flux frames=numpy.where(path!=-1)[0] path=numpy.array(path[frames], dtype=int) print path if write==True: size=(paths['fluxes'][p]/paths['fluxes'][0])*1000 traj=Trajectory.load_from_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p), Conf=pdb) data=build_metric(dir, pdb, traj) dir=modeldir.split('Data')[0] for op in sorted(data.keys()): #for op in residues: pylab.figure() pylab.scatter(data['rmsd'], data[op], c=colors[p], alpha=0.7) #, s=size) for j in paths['Bottlenecks'][p]: frame=numpy.where(paths['Paths'][p]==j)[0] pylab.scatter(data['rmsd'][frame*20], data[op][frame*20], marker='x', c='k', alpha=0.7, s=50) location=numpy.where(committors==paths['Paths'][p][frame])[0] if location.size: print "path %s state %s bottleneck in committors" % (p, j) print data['rmsd'][frame*20], data[op][frame*20] pylab.title('path %s' % p) pylab.xlabel('P-L RMSD') #pylab.xlabel('P-L COM') pylab.ylabel(op) pylab.xlim(0,max(data['rmsd'])+5) #pylab.ylim(0,max(data[op])+5) pylab.show()
def check_container(filename): ondisk = io.loadh(filename, deferred=False) if n_vtrajs != len(ondisk['hashes']): raise ValueError('You asked for {} vtrajs, but your checkpoint \ file has {}'.format(n_vtrajs, len(ondisk['hashes']))) if not np.all(ondisk['hashes'] == hashes): raise ValueError('Hash mismatch. Are these checkpoint files for \ the right project?')
def dump_count_matrix(self,assignfn,lagtime=1,outfn="count_matrix.txt"): from msmbuilder import io from msmbuilder import MSMLib assignments = io.loadh(assignfn, 'arr_0') # returns sparse lil_matrix counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lagtime, sliding_window=True) counts = counts.tocoo() np.savetxt(outfn,(counts.row, counts.col, counts.data))
def main(assfile, lag, nproc): lag=int(lag) nproc=int(nproc) Assignments=io.loadh(assfile) dir=os.path.dirname(assfile) newdir='%s/sample-counts' % dir proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) multinom=sum(proj.traj_lengths) if not os.path.exists(newdir): os.mkdir(newdir) if 'Data' in Assignments.keys(): Assignments=Assignments['Data'] else: Assignments=Assignments['arr_0'] print Assignments.shape NumStates = max(Assignments.flatten()) + 1 Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True) Counts=Counts.todense() Counts=Counts*(1.0/lag) T=numpy.array(Counts) frames=numpy.where(T==0) T[frames]=1 Popsample=dict() iteration=0 total_iteration=100/nproc print "%s total iterations" % total_iteration if 100 % nproc != 0: remain=100 % nproc else: remain=False print "iterating thru tCount samples" count=0 while iteration < 100: if count*nproc > 100: nproc=remain print "sampling iteration %s" % iteration Tfresh=T.copy() counts=range(0, nproc) input = zip([Tfresh]*nproc, [multinom]*nproc, [NumStates]*nproc, counts) pool = multiprocessing.Pool(processes=nproc) result = pool.map_async(parallel_get_matrix, input) result.wait() all = result.get() print "computed resampled matrices" pool.terminate() for count_matrix in all: #rev_counts, t_matrix, Populations, Mapping=x scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), count_matrix) # scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix) # numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations) # numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping) iteration+=1 count+=1 print "dont with iteration %s" % iteration*nproc
def main(modeldir, start, type): start=int(start) data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0]) pdb=files[0] unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) T=mmread('%s/tProb.mtx' % modeldir) startstate=unbound[start] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) steps=100000 print "on start state %s" % startstate if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)): print "loading from states" traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)) else: traj=msm_analysis.sample(T, int(startstate),int(steps)) numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj) print "checking for chkpt file" checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate)) if len(checkfile) > 0: movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb) n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0]) os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0])) print "checkpointing at state index %s out of %s" % (n, len(traj)) checkfile=checkfile[0] restart=True else: restart=False n=0 movie=project.empty_traj() while n < len(traj): print "on state %s" % n state=int(traj[n]) t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10) if n==0: movie['XYZList']=t[0]['XYZList'] n+=1 continue elif n % 100==0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) if restart==True: os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0])) movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)) checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n) n+=1 continue elif n!=0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) n+=1 continue movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def main(modeldir, gensfile, rcut=None): mapdata=dict() ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0]) com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,)) data['com']=com[1:] pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) mapdata['rmsd']=[] mapdata['com']=[] for x in range(0, len(data['rmsd'])): if map[x]!=-1: mapdata['com'].append(data['com'][x]) mapdata['rmsd'].append(data['rmsd'][x]) #RMSD cutoff cutoffs=numpy.arange(1,30,0.5) bound_pops=[] for type in mapdata.keys(): pylab.figure() ohandle=open('%s/%s_msm_frees.dat' % (modeldir, type), 'w') data=[] for cutoff in cutoffs: bound_pops=[] for (state, x) in enumerate(mapdata['rmsd']): if x < cutoff: bound_pops.append(pops[state]) ### calculate binding free energy from populations if len(bound_pops)==0: dG=100 else: bound=numpy.sum(bound_pops) unbound=1-bound dG=-0.6*numpy.log(bound/unbound) #dG=-0.6*numpy.log(bound/(unbound**2)) ### calculate standard state correction, in ansgtroms here boxvolume=244.80*(10**3) v0=1600 corr=-0.6*numpy.log(boxvolume/v0) dG_corr=dG+corr if cutoff==float(rcut): print cutoff, dG_corr data.append(dG_corr) ohandle.write('%s\t%s\n' % (cutoff, dG_corr)) pylab.plot(cutoffs, data, label=type) pylab.legend() pylab.ylim(-8, (-1)*corr) pylab.show()
def load_from(cls, filename): """ Load project from disk Parameters ---------- filename : string filename_or_file can be a path to a legacy .h5 or current .yaml file. Returns ------- project : the loaded project object """ rootdir = os.path.abspath(os.path.dirname(filename)) if filename.endswith('.yaml'): with open(filename) as f: ondisk = yaml.load(f) records = {'conf_filename': ondisk['conf_filename'], 'traj_lengths': [], 'traj_paths': [], 'traj_converted_from': [], 'traj_errors': []} for trj in ondisk['trajs']: records['traj_lengths'].append(trj['length']) records['traj_paths'].append(trj['path']) records['traj_errors'].append(trj['errors']) records['traj_converted_from'].append(trj['converted_from']) elif filename.endswith('.h5'): ondisk = io.loadh(filename, deferred=False) n_trajs = len(ondisk['TrajLengths']) records = {'conf_filename': str(ondisk['ConfFilename'][0]), 'traj_lengths': ondisk['TrajLengths'], 'traj_paths': [], 'traj_converted_from': [ [None] ] * n_trajs, 'traj_errors': [None] * n_trajs} for i in xrange(n_trajs): # this is the convention used in the hdf project format to get the traj paths path = os.path.join( ondisk['TrajFilePath'][0], ondisk['TrajFileBaseName'][0] + str(i) + ondisk['TrajFileType'][0] ) records['traj_paths'].append(path) else: raise ValueError('Sorry, I can only open files in .yaml' ' or .h5 format: %s' % filename) return cls(records, validate=False, project_dir=rootdir)
def check_container(filename): ondisk = io.loadh(filename, deferred=False) if n_vtrajs != len(ondisk["hashes"]): raise ValueError( "You asked for {} vtrajs, but your checkpoint \ file has {}".format( n_vtrajs, len(ondisk["hashes"]) ) ) if not np.all(ondisk["hashes"] == hashes): raise ValueError( "Hash mismatch. Are these checkpoint files for \ the right project?" )
def main(modeldir): proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) data=dict() data['dist']=numpy.loadtxt('%s/prot_lig_distance.dat' % modeldir, usecols=(1,)) data['rmsd']=numpy.loadtxt('%s/Gens.rmsd.dat' % modeldir, usecols=(2,)) com=numpy.loadtxt('%s/Gens.vmd_com.dat' % modeldir, usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com']) pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] pylab.scatter(data['com'][frames], data['rmsd'][frames]) pylab.scatter([refcom,], [0,], c='k', marker='x', s=100) pylab.xlabel('P-L COM') pylab.ylabel('P-L RMSD') pylab.show()
def main(modeldir): data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) if not os.path.exists('%s/adaptive-states/' % modeldir): os.mkdir('%s/adaptive-states/' % modeldir) for state in sorted(set(ass['arr_0'].flatten())): if state!=-1: t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 5) for i in range(0, 5): print state, i (a, b, c) =t[0]['XYZList'].shape movie=project.empty_traj() movie['XYZList']=numpy.zeros((1, b, c), dtype=numpy.float32) movie['XYZList'][0]=t[0]['XYZList'][i] movie.save_to_pdb('%s/adaptive-states/state%s-%s.pdb' % (modeldir, int(state), i))
def load_from_disk(cls, filename): """Load up a clusterer from disk This is useful because computing the Z-matrix (done in __init__) is the most expensive part, and assigning is cheap Parameters ---------- filename : str location to save to Raises ------ TODO: Probablt raises something if filename doesn't exist? """ data = io.loadh(filename, deferred=False) Z, traj_lengths = data['z_matrix'], data['traj_lengths'] #Next two lines are a hack to fix Serializer bug. KAB if np.rank(traj_lengths)==0: traj_lengths = [traj_lengths] return cls(None, None, precomputed_values=(Z, traj_lengths))
def load_from_disk(cls, filename): """Load up a clusterer from disk This is useful because computing the Z-matrix (done in __init__) is the most expensive part, and assigning is cheap Parameters ---------- filename : str location to save to Raises ------ TODO: Probablt raises something if filename doesn't exist? """ data = io.loadh(filename, deferred=False) Z, traj_lengths = data['z_matrix'], data['traj_lengths'] #Next two lines are a hack to fix Serializer bug. KAB if np.rank(traj_lengths) == 0: traj_lengths = [traj_lengths] return cls(None, None, precomputed_values=(Z, traj_lengths))
def test_load_2(self): "load using deferred=False" TestData = io.loadh(self.filename1, deferred=False)['arr_0'] npt.assert_array_equal(TestData, self.data)
def main_learn(args): "main method for the learn subcommand" s = io.loadh(args.triplets) metric_string = ''.join(s['metric']) if args.learn_method == 'diagonal': # type conversion alpha = float(args.alpha) rho, weights = lmmdm.optimize_diagonal(s['AtoB'], s['AtoC'], alpha, loss='huber') if metric_string == 'dihedral': metric = metrics.Dihedral(metric='seuclidean', V=weights) elif metric_string == 'drmsd': metric = metrics.AtomPairs(metric='seuclidean', V=weights, atom_pairs=s['atom_pairs']) elif metric_string == 'rmsd': metric = WRMSD(metric='seuclidean', V=weights) elif metric_string == 'recipcontact': metric = metrics.ContinuousContact(contacts='all', scheme='CA', metric='seuclidean', V=weights) else: raise NotImplementedError('Sorry') # save to disk pickle.dump(metric, open(args.metric, 'w')) print 'Saved metric pickle to {}'.format(args.metric) np.save(args.matrix, [weights, rho]) print 'Saved weights as flat text to {}'.format(args.matrix) elif args.learn_method == 'dense': initialize = args.initialize if not args.initialize in ['euclidean', 'diagonal']: try: initialize = np.load(initialize) except IOError as e: print >> sys.stderr, '''-i --initialize must be either "euclidean", "diagonal", or the path to a flat text matrix''' print >> sys.stderr, e sys.exit(1) # type conversion alpha, epsilon = map(float, [args.alpha, args.epsilon]) outer_iterations, inner_iterations = map(int, [args.outer_iterations, args.inner_iterations]) rho, weights = lmmdm.optimize_diagonal(s['AtoB'], s['AtoC'], alpha, loss='huber') rho, metric_matrix = lmmdm.optimize_dense(s['AtoB'], s['AtoC'], alpha, rho, np.diag(weights), loss='huber', epsilon=1e-5, max_outer_iterations=outer_iterations, max_inner_iterations=inner_iterations) if metric_string == 'dihedral': metric = metrics.Dihedral(metric='mahalanobis', VI=metric_matrix) elif metric_string == 'drmsd': metric = metrics.AtomPairs(metric='mahalanobis', VI=metric_matrix, atom_pairs=s['atom_pairs']) elif metric_string == 'rmsd': metric = WRMSD(metric='mahalanobis', VI=metric_matrix) elif metric_string == 'recipcontact': metric = metrics.ContinuousContact(contacts='all', scheme='CA', metric='mahalanobis', VI=metrix_matrix) else: raise NotImplementedError('Sorry') # save to disk pickle.dump(metric, open(args.metric, 'w')) print 'Saved metric pickle to {}'.format(args.metric) np.save(args.matrix, [metric_matrix, rho]) print 'Saved weights, rho to {}'.format(args.matrix)
def test_load_2(self): "load using deferred=True" deferred = io.loadh(self.filename1, deferred=True) npt.assert_array_equal(deferred['arr_0'], self.data) deferred.close()
def test_save(self): """Save HDF5 to disk and load it back up""" io.saveh(self.filename2, self.data) TestData = io.loadh(self.filename2, 'arr_0') npt.assert_array_equal(TestData, self.data)
def test_load_1(self): "Load by specifying array name" TestData = io.loadh(self.filename1, 'arr_0') npt.assert_array_equal(TestData, self.data)
#!/usr/bin/env python from msmbuilder import io, MSMLib from scipy.io import mmwrite import sys from msmbuilder import arglib parser = arglib.ArgumentParser() parser.add_argument("assignments") parser.add_argument("lagtime", type=int, default=1) parser.add_argument("sliding_window", default=False, action="store_true") parser.add_argument("output", default="tCounts.raw.mtx") args = parser.parse_args() try: ass = io.loadh(args.assignments)["Data"] except: ass = io.loadh(args.assignments)["arr_0"] C = MSMLib.get_count_matrix_from_assignments(ass, lag_time=args.lagtime, sliding_window=args.sliding_window) print C.sum() mmwrite(args.output, C)
def main(modeldir, gensfile, write=False): if not os.path.exists('%s/eig-states/' % modeldir): os.mkdir('%s/eig-states/' % modeldir) ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w') project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) gens=Trajectory.load_from_lhdf(gensfile) T=mmread('%s/tProb.mtx' % modeldir) data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0]) com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,)) data['com']=com[1:] pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) map_rmsd=[] map_com=[] for x in range(0, len(data['rmsd'])): if map[x]!=-1: map_com.append(data['com'][x]) map_rmsd.append(data['rmsd'][x]) map_com=numpy.array(map_com) map_rmsd=numpy.array(map_rmsd) T=mmread('%s/tProb.mtx' % modeldir) eigs_m=msm_analysis.get_eigenvectors(T, 10) cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive print numpy.shape(eigs_m[1][:,1]) for i in range(0,1): order=numpy.argsort(eigs_m[1][:,i]) if i==0: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n') for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values maxes=numpy.array(maxes) if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') else: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values order=numpy.argsort(eigs_m[1][:,i]) mins=[] gen_mins=[] values=[] ohandle.write('eig%s mins\n' % i) for n in order[:5]: gen_mins.append(numpy.where(map==n)[0]) mins.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "mins at ", mins, values if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') get_structure(modeldir, i, gen_mins, mins, gens, project, ass, type='min') pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5) print map_com[order][numpy.argmax(eigs_m[1][order,i])] print eigs_m[1][order,i][1] CB=pylab.colorbar() l,b,w,h=pylab.gca().get_position().bounds ll, bb, ww, hh=CB.ax.get_position().bounds CB.ax.set_position([ll, b+0.1*h, ww, h*0.8]) CB.set_label('Eig%s Magnitudes' % i) ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)') xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)') pylab.legend(loc=8, frameon=False) pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
the structures of each of the cluster centers. Produced using Cluster.py.''', default='Data/Gens.lh5') parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() if -1 in args.states: print "Ripping PDBs for all states" args.states = 'all' if args.conformations_per_state == -1: print "Getting all PDBs for each state" args.conformations_per_state = 'all' atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int) assignments = io.loadh(args.assignments) project = Project.load_from(args.project) if args.lprmsd_permute_atoms == 'None': permute_indices = None else: permute_indices = ReadPermFile(args.lprmsd_permute_atoms) if args.lprmsd_alt_indices == 'None': alt_indices = None else: alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int) run(project, assignments, args.conformations_per_state, args.states, args.output_dir, args.generators, atom_indices, permute_indices, alt_indices, args.total_memory_gb)
conformations to randomly sample from your data per state''', type=int) parser.add_argument('format', help='''Format to output the data in. Note that the PDB format is uncompressed and not efficient. For XTC, you can view the trajectory using your project's topology file''', default='lh5', choices=['pdb', 'xtc', 'lh5']) args = parser.parse_args() if args.output == 'XRandomConfs': args.output = '%dRandomConfs.%s' % (args.conformations_per_state, args.format) try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') project = Project.load_from(args.project) random_confs = run(project, assignments, args.conformations_per_state) if args.format == 'pdb': random_confs.SaveToPDB(args.output) elif args.format == 'lh5': random_confs.SaveToLHDF(args.output) elif args.format == 'xtc': random_confs.SaveToXTC(args.output) else: raise ValueError('Unrecognized format')
discards (expensive!) data, so should only be used if an optimal clustering is not available. Note: Check your cluster sized with CalculateClusterRadii.py to get a handle on how big they are before you trim. Recall the radius is the *average* distance to the generator, here you are enforcing the *maximum* distance. Output: A trimmed assignments file (Assignments.Trimmed.h5).""") parser.add_argument('assignments', default='Data/Assignments.Fixed.h5') parser.add_argument('distances', default='Data/Assignments.h5.distances') parser.add_argument('rmsd_cutoff', help="""distance value at which to trim, in. Data further than this value to its generator will be discarded. Note: this is measured with whatever distance metric you used to cluster""", type=float) parser.add_argument('output', default='Data/Assignments.Trimmed.h5') args = parser.parse_args() arglib.die_if_path_exists(args.output) try: assignments = io.loadh(args.assignments, 'arr_0') distances = io.loadh(args.distances, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') distances = io.loadh(args.distances, 'Data') trimmed = run(assignments, distances, args.rmsd_cutoff) io.saveh(args.output, trimmed) logger.info('Saved output to %s', args.output)
parser.add_argument('-N',dest='N',default=0,type=int,help='Which eigenvector to look at.') parser.add_argument('--double',dest='double',default=False,action='store_true',help='Pass this flag if you used msmbuilder.metrics.Dihedrals, which means there is a sin and cosine entry for each angle') options = parser.parse_args() import numpy as np from msmbuilder import io, Trajectory from msmbuilder import metrics from msmbuilder.geometry import dihedral import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * import os, sys, re pdb = Trajectory.load_trajectory_file( options.pdbFN ) pca = io.loadh( options.pcaFN ) decInd = np.argsort( pca['vals'] )[::-1] v0 = np.abs(pca['vecs'][:,decInd][:,options.N]) if options.double: if v0.shape[0] % 2: print "There are an odd number of entries, so --double should not be passed here, or something else has gone wrong." exit() n0 = v0.shape[0] v0 = v0[:n0/2] + v0[n0/2:]
def main(coarse_val, orig_val, rcut): data=dict() data['coarse']=dict() data['orig']=dict() dirs=dict() dirs['coarse']='./d%s' % coarse_val dirs['orig']='./d%s' % orig_val proj=Project.load_from('ProjectInfo.yaml') types=['ass', 'rmsd', 'dist', 'gens'] for key in ['coarse', 'orig']: for type in types: if 'ass' in type: ass=io.loadh('%s/Data/Assignments.h5' % dirs[key]) data[key][type]=ass['arr_0'] elif 'dist' in type: ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key]) data[key][type]=ass['arr_0'] elif 'rmsd' in type: rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key]) data[key][type]=rmsd elif 'gens' in type: gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key]) data[key][type]=gens unboundmap=dict() boundmap=dict() #unboundstates=dict() #unboundrmsd=dict() # build map dict for orig to coarse unbound states, bound will stay same newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int) for j in range(0, data['orig']['ass'].shape[0]): for (n,i) in enumerate(data['orig']['ass'][j]): # if unbound if i != -1: if data['orig']['rmsd'][i] > float(rcut): state=data['coarse']['ass'][j][n] newass[j][n]=state+10000 else: newass[j][n]=i count=0 unique=set(newass.flatten()) boundmap=dict() unboundmap=dict() for x in unique: locations=numpy.where(newass==x) newass[locations]=count if x >= 10000: unboundmap[count]=(x-10000) else: boundmap[count]=x count+=1 io.saveh('%s/Coarsed_r%s_Assignments.h5' % (dirs['orig'], rcut), newass) subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut) if not os.path.exists(subdir): os.mkdir(subdir) ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w') b=data['orig']['gens']['XYZList'].shape[1] c=data['orig']['gens']['XYZList'].shape[2] dicts=[boundmap, unboundmap] names=['bound', 'unbound'] labels=['orig', 'coarse'] total=len(boundmap.keys()) + len(unboundmap.keys()) structure=proj.empty_traj() structure['XYZList']=numpy.zeros((total, b, c), dtype='float32') count=0 for (name, label, mapdata) in zip( names, labels, dicts): print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys())) for i in sorted(mapdata.keys()): macro=mapdata[i] structure['XYZList'][count]=data[label]['gens']['XYZList'][macro] ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro])) print name, count count+=1 structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))
import numpy as np from scipy.optimize import fsolve from msmbuilder import io import argparse import matplotlib from matplotlib.pyplot import * import IPython parser = argparse.ArgumentParser() parser.add_argument('-d', dest='data', help='data for each state') parser.add_argument('-f', dest='eig', help='eigenvector value of each state') args = parser.parse_args() M = io.loadh(args.data, 'HB_maps') if len(M.shape) > 2: M = M.reshape((M.shape[0], -1)) M = M - M.mean(0) print M.shape eig = io.loadh(args.eig, 'arr_0') b = eig / np.sqrt(eig.dot(eig) / eig.shape[0]) b = np.reshape(b, (-1, 1)) sigma = M.T.dot(M) pca_vals, pca_vecs = np.linalg.eig(sigma) ind = np.where(pca_vals > 1E-8)[0]
parser.add_argument('output', help="""The name of the RandomConfs trajectory (.lh5) to write. XRandomConfs.lh5, where X=Number of Conformations.""", default='XRandomConfs') parser.add_argument('conformations_per_state', help='''Number of conformations to randomly sample from your data per state''', type=int) parser.add_argument('format', help='''Format to output the data in. Note that the PDB format is uncompressed and not efficient. For XTC, you can view the trajectory using your project's topology file''', default='lh5', choices=['pdb', 'xtc', 'lh5']) args = parser.parse_args() if args.output == 'XRandomConfs': args.output = '%dRandomConfs.%s' % (args.conformations_per_state, args.format) try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') project = Project.load_from(args.project) random_confs = run(project, assignments, args.conformations_per_state) if args.format == 'pdb': random_confs.SaveToPDB(args.output) elif args.format == 'lh5': random_confs.SaveToLHDF(args.output) elif args.format == 'xtc': random_confs.SaveToXTC(args.output) else: raise ValueError('Unrecognized format')
def get_implied_timescales_helper(args): """Helper function to compute implied timescales with multiprocessing Does not work in interactive mode Parameters ---------- assignments_fn : str Path to Assignments.h5 file on disk n_states : int Number of states lag_time : list List of lag times to calculate the timescales at n_implied_times : int, optional Number of implied timescales to calculate at each lag time sliding_window : bool, optional Use sliding window trimming : bool, optional Use ergodic trimming symmetrize : {'MLE', 'Transpose', None} Symmetrization method Returns ------- lagTimes : ndarray vector of lag times impTimes : ndarray vector of implied timescales See Also -------- MSMLib.build_msm get_eigenvectors """ assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args try: assignments = io.loadh(assignments_fn, 'arr_0') except KeyError: assignments = io.loadh(assignments_fn, 'Data') try: from msmbuilder import MSMLib t_matrix = MSMLib.build_msm(assignments, lag_time, symmetrize, sliding_window, trimming)[1] except ValueError as e: logger.critical(e) sys.exit(1) #TJL: set Epsilon high, should not raise err here n_eigenvectors = n_implied_times + 1 e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0] # make sure to leave off equilibrium distribution lag_times = lag_time * np.ones((n_implied_times)) imp_times = -lag_times / np.log(e_values[1:n_eigenvectors]) # save intermediate result in case of failure # res = np.zeros((n_implied_times, 2)) # res[:,0] = lag_times # res[:,1] = np.real(imp_times) return (lag_times, imp_times)
def get_implied_timescales_helper(args): """Helper function to compute implied timescales with multiprocessing Does not work in interactive mode Parameters ---------- assignments_fn : str Path to Assignments.h5 file on disk n_states : int Number of states lag_time : list List of lag times to calculate the timescales at n_implied_times : int, optional Number of implied timescales to calculate at each lag time sliding_window : bool, optional Use sliding window trimming : bool, optional Use ergodic trimming symmetrize : {'MLE', 'Transpose', None} Symmetrization method Returns ------- lagTimes : ndarray vector of lag times impTimes : ndarray vector of implied timescales See Also -------- MSMLib.build_msm get_eigenvectors """ assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args try: assignments = io.loadh(assignments_fn, 'arr_0') except KeyError: assignments = io.loadh(assignments_fn, 'Data') try: from msmbuilder import MSMLib counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, sliding_window=sliding_window) rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming) except ValueError as e: logger.critical(e) sys.exit(1) #TJL: set Epsilon high, should not raise err here n_eigenvectors = n_implied_times + 1 e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0] # make sure to leave off equilibrium distribution lag_times = lag_time * np.ones((n_implied_times)) imp_times = -lag_times / np.log(e_values[1: n_eigenvectors]) # save intermediate result in case of failure # res = np.zeros((n_implied_times, 2)) # res[:,0] = lag_times # res[:,1] = np.real(imp_times) return (lag_times, imp_times)
dcds=numpy.loadtxt('mapped_trajs.txt', usecols=(0,), dtype=str) xtcs=numpy.loadtxt('mapped_trajs.txt', usecols=(1,), dtype=str) mapping=dict() for (i,j) in zip(dcds, xtcs): name=i.split('_nowat_')[0] mapping[j]=name #for j in mapping.keys(): # total=mapping[j]+bw[j] # if total!=totals[j]: # print "problem" # import pdb # pdb.set_trace() ohandle=open('d6/msml1000_coarse_r10_d20/traj_frames.txt', 'w') ass=io.loadh('d6/msml1000_coarse_r10_d20/Assignments.Fixed.h5') mapfile=numpy.loadtxt('d6/msml1000_coarse_r10_d20/Mapping.dat') sample=False for state in sorted(set(ass['arr_0'].flatten())): if state!=-1: traj=numpy.where(ass['arr_0']==state)[0] frames=numpy.where(ass['arr_0']==state)[1] indices=numpy.random.random_integers(0, len(traj)-1, len(traj)) for ind in indices: traj_ind=traj[ind] mapped_traj=mapping['trj%s' % traj_ind] if mapped_traj in bw.keys(): minval=bw[mapped_traj] else: minval=0 location=numpy.where((traj==traj_ind)&(frames>minval))[0]
from time import time import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) sh = logging.StreamHandler(sys.stdout) formatter = logging.Formatter(fmt="%(asctime)s - %(message)s", datefmt="%H:%M:%S") sh.setFormatter(formatter) logger.addHandler(sh) logger.propagate = False logger.info("start") Proj = Project.load_from(args.proj_FN) logger.info("loaded project info") try: Ass = io.loadh(args.ass_FN)["arr_0"] except: Ass = io.loadh(args.ass_FN)["Data"] pdb = Trajectory.load_from_pdb(Proj.conf_filename) which = np.loadtxt(args.which).astype(int) distance_cutoff = 0.32 angle_cutoff = 120 def get_hb(traj): # get accH - donor distance: dists = contact.atom_distances(traj["XYZList"], atom_contacts=which[:, 1:])
import sys, os import scipy.io from msmbuilder import MSMLib from msmbuilder.io import loadh, saveh try: Assignments = loadh("%s" % (sys.argv[1]), 'arr_0').astype(int) except KeyError: Assignments = loadh("%s" % (sys.argv[1]), 'Data').astype(int) NumStates = max(Assignments.flatten()) + 1 LagTime = int(sys.argv[2]) Counts = MSMLib.get_count_matrix_from_assignments(Assignments, n_states=NumStates, lag_time=LagTime, sliding_window=True) scipy.io.mmwrite('%s' % (sys.argv[3]), Counts)