def test_fs_tent(): cwd = os.path.abspath(os.curdir) dirname = tempfile.mkdtemp() FsPeptide(dirname).get() try: os.chdir(dirname) top = md.load(dirname + '/fs_peptide/fs-peptide.pdb') idx = [at.index for at in top.topology.atoms if at.residue.index in [3, 4, 5, 6, 7, 8]] traj1 = md.load(dirname + '/fs_peptide/trajectory-1.xtc', stride=100, top=top, atom_indices=idx) traj2 = md.load(dirname + '/fs_peptide/trajectory-2.xtc', stride=100, top=top, atom_indices=idx) traj = (traj1, traj2) yield _test_tent_alpha, traj yield _test_tent_contact, traj yield _test_tent_dihedral, traj finally: os.chdir(cwd) shutil.rmtree(dirname)
def main(trj,topol,reference): target = md.load(trj, top=topol) reference = md.load(reference) topology = target.topology switch1 = topology.select("residue 12 to 13") print(switch1)
def get_cached(self): top = md.load(join(self.data_dir, 'ala2.pdb')) trajectories = [] for fn in glob(join(self.data_dir, 'trajectory*.dcd')): trajectories.append(md.load(fn, top=top)) return Bunch(trajectories=trajectories, DESCR=self.description())
def test_load(): filenames = [ "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.binpos", "traj.h5", "frame0.nc", "traj.h5", "frame0.lammpstrj", "frame0.xyz", ] num_block = 3 for filename in filenames: t0 = md.load(get_fn(filename), top=nat, discard_overlapping_frames=True) t1 = md.load(get_fn(filename), top=nat, discard_overlapping_frames=False) t2 = md.load([get_fn(filename) for i in xrange(num_block)], top=nat, discard_overlapping_frames=False) t3 = md.load([get_fn(filename) for i in xrange(num_block)], top=nat, discard_overlapping_frames=True) # these don't actually overlap, so discard_overlapping_frames should # have no effect. the overlap is between the last frame of one and the # first frame of the next. yield lambda: eq(t0.n_frames, t1.n_frames) yield lambda: eq(t0.n_frames * num_block, t2.n_frames) yield lambda: eq(t3.n_frames, t2.n_frames)
def calc_obs(traj): arg_cz_id = 2442 glu_cd_id = 862 lys_nz_id = 634 tyr_oh_id = 2019 inactive = mdt.load("./topologies/inactive.pdb") active = mdt.load("./topologies/active.pdb") aloop_atoms_list = [i.index for residue in np.arange(147, 168) for i in inactive.topology.residue(residue).atoms] all_heavy = [i.index for i in inactive.topology.atoms if i.residue.is_protein and i.element.name != "hydrogen"] print("Processing %s" % traj) # load the trajectory trj = mdt.load(traj, atom_indices=np.arange(inactive.n_atoms)) inactive_rms = mdt.rmsd(trj, inactive, atom_indices=all_heavy) active_rms = mdt.rmsd(trj, active, atom_indices=all_heavy) aloop_rms = mdt.rmsd(trj, inactive, frame=0, atom_indices=aloop_atoms_list) distances = mdt.compute_distances(trj, np.vstack(([arg_cz_id, glu_cd_id], [lys_nz_id, glu_cd_id]))) return dict( fname=os.path.basename(traj), inactive_rmsd=inactive_rms, active_rmsd=active_rms, aloop_inactive_rmsd=aloop_rms, glu_arg=distances[:, 0], gly_lys=distances[:, 1], )
def test_dihedral_pbc(): traj_uncorrected = md.load(get_fn('1am7_uncorrected.xtc'), top=get_fn('1am7_protein.pdb')) traj_corrected = md.load(get_fn('1am7_corrected.xtc'), top=get_fn('1am7_protein.pdb')) epsilon = 6E-3 ang1 = md.geometry.compute_phi(traj_uncorrected, opt=False, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=False, periodic=True)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=True, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=True, periodic=True)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=True, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=True, periodic=False)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=False, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=False, periodic=False)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=True, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=False, periodic=False)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=False, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=True, periodic=True)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon ang1 = md.geometry.compute_phi(traj_uncorrected, opt=False, periodic=True)[1] ang2 = md.geometry.compute_phi(traj_corrected, opt=True, periodic=False)[1] assert np.max(np.abs(ang1 - ang2)) < epsilon
def check(): out1 = md.load(os.path.join(output_dir, fn2), **load_kwargs_check1) out2 = md.load(os.path.join(output_dir, 'subset.' + fn2), **load_kwargs_check2) out3 = md.load(os.path.join(output_dir, 'stride.' + fn2), **load_kwargs_check1) if ext1 in ['.lh5'] or ext2 in ['.lh5']: decimal = 3 else: decimal = 6 eq(out1.xyz, TRAJ.xyz, decimal=decimal) eq(out2.xyz, TRAJ.xyz[:, atom_indices], decimal=decimal) eq(out3.xyz, TRAJ.xyz[::3], decimal=decimal) if ext1 not in ['.binpos', '.lh5'] and ext2 not in ['.binpos', '.lh5']: # binpos doesn't save unitcell information eq(out1.unitcell_vectors, TRAJ.unitcell_vectors, decimal=2) eq(out2.unitcell_vectors, TRAJ.unitcell_vectors, decimal=2) eq(out3.unitcell_vectors, TRAJ.unitcell_vectors[::3], decimal=2) if all(e in ['.xtc', '.trr', '.nc', '.h5'] for e in [ext1, ext2]): # these formats contain time information eq(out1.time, TRAJ.time) eq(out2.time, TRAJ.time) eq(out3.time, TRAJ.time[::3]) if ext2 in ['.pdb', '.h5', '.lh5']: # these formats contain a topology in the file that was # read from disk eq(out1.topology, TRAJ.topology) eq(out2.topology, TRAJ.topology.subset(atom_indices)) eq(out3.topology, TRAJ.topology)
def main(): parser = argparse.ArgumentParser(description='custom featurization of clc fah trjs') parser.add_argument('--ref', type=str, help='homology model pdb file') parser.add_argument('--trj', type=str, help='trajectory file') parser.add_argument('--mol2', type=str, help='homology model mol2 file (charges needed for dipole calc)') args = parser.parse_args() # load system data trj = mdtraj.load(args.trj, top=args.ref) hmodel = mdtraj.load(args.ref) ### feature 0: protein RMSD from hmodel ### pi_noh = [atom.index for atom in trj.top.atoms if ((atom.residue.is_protein) and (atom.element.symbol != 'H'))] p_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=pi_noh) ### feature 1: GLU128 RMSD from hmodel ### e128 = res_ndxs(hmodel, vs_ri['glu128']) e128_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=e128) ### feature 2: LYS317 and GLU318 RMSD from hmodel ### tl = np.concatenate((res_ndxs(hmodel, vs_ri['lys317']), res_ndxs(hmodel, vs_ri['glu318']))) tl_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=tl) ### feature 2: distance between ASP32 and LYS127 ### a32 = ele_ndxs(hmodel, vs_ri['asp32'], ['OD1', 'OD2']) l127 = ele_ndxs(hmodel, vs_ri['lys127'], ['NZ']) al_pairs = cartesian([a32, l127]) # i think the asp oxygens are degenerate, so i'll look at the min here al_dist = np.amin(al_pairs, axis=1)
def get_J3_HN_HA(traj, top, frame=None, model="Habeck", outname = None): '''Compute J3_HN_HA for frames in a trajectories. Parameters ---------- traj: trajectory file top: topology file frame: specific frame for computing model: Karplus coefficient models ["Ruterjans1999","Bax2007","Bax1997","Habeck" ,"Vuister","Pardi"] outname: if not None, the output will be saved and a file name (in the format of string) is required. ''' J=[] if frame is None: t = md.load(traj,top=top) J = compute_J3_HN_HA(t, model = model) if frame is not None: for i in range(len(frame)): t = md.load(traj,top=top)[frame[i]] d = compute_J3_HN_HA(t, model = model) if i == 0: J.append(d[0]) J.append(d[1]) else: J.append(d[1]) if outname is not None: print('saving output file...') np.save(outname, J) print('Done!') return J
def test_load_combination(): # Test that the load function's stride and atom_indices work across # all trajectory formats topology = md.load(get_fn('native.pdb')).topology ainds = np.array([a.index for a in topology.atoms if a.element.symbol == 'C']) filenames = [ 'frame0.binpos', 'frame0.dcd', 'frame0.trr', 'frame0.xtc', 'frame0.nc', 'frame0.h5', 'frame0.pdb', 'frame0.lammpstrj', 'frame0.xyz'] if not (on_win and on_py3): filenames.append('legacy_msmbuilder_trj0.lh5') no_kwargs = [md.load(fn, top=topology) for fn in map(get_fn, filenames)] strided3 = [md.load(fn, top=topology, stride=3) for fn in map(get_fn, filenames)] subset = [md.load(fn, top=topology, atom_indices=ainds) for fn in map(get_fn, filenames)] for i, (t1, t2) in enumerate(zip(no_kwargs, strided3)): yield lambda: eq(t1.xyz[::3], t2.xyz) yield lambda: eq(t1.time[::3], t2.time) if t1.unitcell_vectors is not None: yield lambda: eq(t1.unitcell_vectors[::3], t2.unitcell_vectors) yield lambda: eq(t1.topology, t2.topology) for i, (t1, t2) in enumerate(zip(no_kwargs, subset)): yield lambda: eq(t1.xyz[:, ainds, :], t2.xyz) yield lambda: eq(t1.time, t2.time) if t1.unitcell_vectors is not None: yield lambda: eq(t1.unitcell_vectors, t2.unitcell_vectors) yield lambda: eq(t1.topology.subset(ainds), t2.topology)
def calculate_rmsd(trajectory, topology, reference): import mdtraj traj = mdtraj.load(trajectory, top=topology) ref = mdtraj.load(reference) rmsd = mdtraj.rmsd(traj, ref) data = {"step": str(traj.n_frames), "rmsd": str(rmsd[-1])} return data
def test_atom_indices_1(): atom_indices = np.arange(10) top = md.load(get_fn("native.pdb")) t0 = md.load(get_fn("frame0.mdcrd"), top=top) t1 = md.load(get_fn("frame0.mdcrd"), top=top, atom_indices=atom_indices) eq(t0.xyz[:, atom_indices], t1.xyz)
def test_vsite_elements(get_fn): # Test case for issue #265 pdb_filename = get_fn('GG-tip4pew.pdb') trj = md.load(pdb_filename) trj.save_hdf5(temp) trj2 = md.load(temp, top=pdb_filename)
def load_trajs(trajins, topin): # pretrajs = [] for trajin in trajins: # print("Loading {:s}".format(trajin)) # .. # Load raw information from each trajectory traj1 = None if topin is None: # traj1 = md.load(trajin) # !! # The "topology" (the .gro file) must be included if the trajectory comes from GROMACS # else: # traj1 = md.load(trajin, top=topin) # pretrajs.append(traj1) # return pretrajs
def fetch_fs_peptide(data_home=None, download_if_missing=True): """Loader for the Fs peptide dataset Parameters ---------- data_home : optional, default: None Specify another download and cache folder for the datasets. By default all mixtape data is stored in '~/mixtape_data' subfolders. download_if_missing: optional, True by default If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. """ data_home = get_data_home(data_home=data_home) if not exists(data_home): makedirs(data_home) data_dir = join(data_home, TARGET_DIRECTORY) if not exists(data_dir): print('downloading fs peptide from %s to %s' % (DATA_URL, data_home)) fhandle = urlopen(DATA_URL) buf = BytesIO(fhandle.read()) zip_file = ZipFile(buf) makedirs(data_dir) for name in zip_file.namelist(): zip_file.extract(name, path=data_dir) top = md.load(join(data_dir, 'fs_peptide.pdb')) trajectories = [] for fn in sorted(glob(join(data_dir, 'trajectory*.xtc'))): print('loading %s...' % basename(fn)) trajectories.append(md.load(fn, top=top)) return Bunch(trajectories=trajectories, DESCR=__doc__)
def test_lprmsd_5(get_fn): t = md.load(get_fn('frame0.h5')) t1 = md.load(get_fn('frame0.h5')) r = md.rmsd(t, t1, 0) a = md.lprmsd(t, t1, 0, permute_groups=[[]], superpose=True) eq(a, r, decimal=3)
def _trj_load(file, top): if os.path.isdir(file): return mdt.load(os.path.join(file,"positions.xtc"),top=top) else: os.system("tar -xvjf %s"%file) return mdt.load("positions.xtc", top=top) return
def test_lengths(): num = 3 inptrajs = ['PROJ9761/RUN3/CLONE9/frame{}.xtc'.format(i) for i in range(num)] stride = 8 subprocess.check_call( ['gmx', 'trjcat', '-f'] + inptrajs + ['-o', 'catty.xtc'], stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) with mdtraj.open("catty.xtc") as xtc: stridelen = len(xtc) // stride remain = len(xtc) % stride assert stridelen == num * PROJ61_LENGTH_PER_GEN, (stridelen, remain) top = mdtraj.load_prmtop("tops-p9712/4bw5.prmtop") traj1 = mdtraj.load("catty.xtc", top=top)[::stride] # blarg! the last frame is duplicatey traj2 = mdtraj.load(inptrajs[0], top=top)[::stride][:-1] traj2 += mdtraj.load(inptrajs[1], top=top)[::stride][:-1] traj2 += mdtraj.load(inptrajs[2], top=top)[::stride] traj3 = mdtraj.load(inptrajs, top=top, discard_overlapping_frames=True)[::stride] np.testing.assert_array_equal(traj1.xyz, traj3.xyz) np.testing.assert_array_equal(traj1.xyz, traj2.xyz)
def test_lprmsd_3(get_fn): # resolve rotation and permutation togetehr t1 = md.load(get_fn('alanine-dipeptide-explicit.pdb'))[0] t2 = md.load(get_fn('alanine-dipeptide-explicit.pdb'))[0] h2o_o_indices = [a.index for a in t1.topology.atoms if a.residue.name == 'HOH' and a.element.symbol == 'O'][0:20] h2o_h_indices = [a.index for a in t1.topology.atoms if a.residue.name == 'HOH' and a.element.symbol == 'H'][0:20] backbone_indices = [a.index for a in t1.topology.atoms if a.element.symbol in ['C', 'N']][:5] # scramble two groups of indices t2.xyz[:, random.permutation(h2o_o_indices)] = t2.xyz[:, h2o_o_indices] t2.xyz[:, random.permutation(h2o_h_indices)] = t2.xyz[:, h2o_h_indices] # offset the backbone indices slightly t2.xyz[:, backbone_indices] += 0.001 * random.randn(len(backbone_indices), 3) # rotate everything rot = rotation_matrix_from_quaternion(uniform_quaternion()) t2.xyz[0].dot(rot) print('raw distinguishable indices', backbone_indices) atom_indices = np.concatenate((h2o_o_indices, backbone_indices)) value = md.lprmsd(t2, t1, atom_indices=atom_indices, permute_groups=[h2o_o_indices]) t1.xyz[:, h2o_o_indices] += random.randn(len(h2o_o_indices), 3) print('final value', value) assert value[0] < 1e-2
def featurize_pnas_distance(traj_dir, features_dir, ext, inactive_dir, active_dir, inactive_distances_dir, active_distances_dir, coords_dir, inactive_distances_csv, active_distances_csv, coords_csv, scale = 7.14, residues_map = None): if not os.path.exists(features_dir): os.makedirs(features_dir) inactive = md.load(inactive_dir) active = md.load(active_dir) agonist_bound = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] trajs = get_trajectory_files(traj_dir, ext = ext) #trajs = [t for t in trajs if "clone0.lh5" in t] #traj_objs = md.load(trajs) featurize_partial = partial(compute_pnas_coords_and_distance, inactive = inactive, active = active, scale = scale, residues_map = residues_map) pool = mp.Pool(16) features = pool.map(featurize_partial, trajs) #for traj in trajs: # featurize_partial(traj) pool.terminate() coords = [f[0] for f in features] inactive_distances = [f[1][0] for f in features] active_distances = [f[1][1] for f in features] verbosedump(coords, coords_dir) verbosedump(inactive_distances, inactive_distances_dir) verbosedump(active_distances, active_distances_dir) write_map_to_csv(coords_csv, convert_np_to_map(coords), ["frame", "tm3_tm6_dist", "rmsd_npxxy_inactive", "rmsd_npxxy_active", "rmsd_connector_inactive", "rmsd_connector_active"]) write_map_to_csv(active_distances_csv, convert_np_to_map(active_distances), ["frame", "pnas_distance_active"]) print("Completed featurizing")
def featurize_pnas_distance_pdbs(traj_dir, new_filename, features_dir, inactive_dir, active_dir, inactive_distances_dir, active_distances_dir, coords_dir): #if not os.path.exists(features_dir): os.makedirs(features_dir) inactive = md.load(inactive_dir) active = md.load(active_dir) agonist_bound = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] samples = get_trajectory_files(traj_dir, ext = ".pdb") pool = mp.Pool(mp.cpu_count()) trajs = pool.map(load_pdb_traj, samples) trajs_joined = trajs[0].join(trajs[1:]) trajs_joined.save_hdf5(new_filename) features = compute_pnas_coords_and_distance(new_filename, inactive, active) coords = [f[0] for f in features] inactive_distances = [f[1][0] for f in features] active_distances = [f[1][1] for f in features] verbosedump(coords, coords_dir) verbosedump(inactive_distances, inactive_distances_dir) verbosedump(active_distances, active_distances_dir) print("Completed featurizing")
def test_cpptraj(get_fn): trj0 = md.load(get_fn('frame0.dcd'), top=get_fn('frame0.pdb')) trj0.save(temp) top = get_fn("frame0.pdb") subprocess.check_call([ 'cpptraj', '-p', top, '-y', temp, '-x', temp2 ]) trj1 = md.load(temp, top=top) trj2 = md.load(temp2, top=top) np.testing.assert_array_almost_equal(trj0.xyz, trj2.xyz) np.testing.assert_array_almost_equal(trj1.xyz, trj2.xyz) np.testing.assert_array_almost_equal(trj0.unitcell_vectors, trj2.unitcell_vectors) np.testing.assert_array_almost_equal(trj1.unitcell_vectors, trj2.unitcell_vectors) np.testing.assert_array_almost_equal(trj0.time, trj1.time) np.testing.assert_array_almost_equal(trj0.time, trj2.time) np.testing.assert_array_almost_equal(trj1.time, trj2.time)
def MDTRAJwrite(mol, filename): try: import mdtraj as md from htmd.util import tempname ext = os.path.splitext(filename)[1][1:] if ext == 'gz': pieces = filename.split('.') ext = '{}.{}'.format(pieces[-2], pieces[-1]) if ext in _MDTRAJ_TOPOLOGY_SAVERS: tmppdb = tempname(suffix='.pdb') mol.write(tmppdb) traj = md.load(tmppdb) os.remove(tmppdb) elif ext in _MDTRAJ_TRAJECTORY_SAVERS: tmppdb = tempname(suffix='.pdb') tmpxtc = tempname(suffix='.xtc') mol.write(tmppdb) mol.write(tmpxtc) traj = md.load(tmpxtc, top=tmppdb) os.remove(tmppdb) os.remove(tmpxtc) else: raise ValueError('Unknown file type for file {}'.format(filename)) # traj.xyz = np.swapaxes(np.swapaxes(self.coords, 1, 2), 0, 1) / 10 # traj.time = self.time # traj.unitcell_lengths = self.box.T / 10 traj.save(filename) except Exception as e: raise ValueError('MDtraj reader failed for file {} with error "{}"'.format(filename, e))
def main(opts): print 'Loading atom indices file for trajectories', opts.ndx ndx = np.loadtxt(opts.ndx, dtype=np.int) print 'Loading cells from', opts.cells cells = mdtraj.load(opts.topol, atom_indices=ndx) cells.xyz = load_cells_gps(opts.cells) print 'Loading trajectories', ' '.join(opts.trajs) traj = mdtraj.load(opts.trajs, top=opts.topol, atom_indices=ndx) print 'Assigning to {} cells'.format(len(cells)) rmsds = -np.ones((len(cells), len(traj))) for i in xrange(len(cells)): rmsds[i] = mdtraj.rmsd(traj, cells, frame=i) rmsds = rmsds.T A = -np.ones((len(traj),), dtype=np.int) for f in xrange(len(traj)): A[f] = rmsds[f].argmin() np.savetxt(opts.assignments, A, fmt='%d') print 'Computing populations' P = np.bincount(A) np.savetxt(opts.populations, P, fmt='%d')
def test_load_combination(ref_traj, get_fn): # Test that the load function's stride and atom_indices work across # all trajectory formats topology = md.load(get_fn('native.pdb')).topology ainds = np.array([a.index for a in topology.atoms if a.element.symbol == 'C']) no_kwargs = md.load(get_fn(ref_traj.fn), top=topology) strided3 = md.load(get_fn(ref_traj.fn), top=topology, stride=3) subset = md.load(get_fn(ref_traj.fn), top=topology, atom_indices=ainds) # test 1 t1 = no_kwargs t2 = strided3 assert eq(t1.xyz[::3], t2.xyz) assert eq(t1.time[::3], t2.time) if t1.unitcell_vectors is not None: assert eq(t1.unitcell_vectors[::3], t2.unitcell_vectors) assert eq(t1.topology, t2.topology) # test 2 t1 = no_kwargs t2 = subset assert eq(t1.xyz[:, ainds, :], t2.xyz) assert eq(t1.time, t2.time) if t1.unitcell_vectors is not None: assert eq(t1.unitcell_vectors, t2.unitcell_vectors) assert eq(t1.topology.subset(ainds), t2.topology)
def deprecated_test_fah_core17_1(): from mdtraj.utils import six from mdtraj.testing import get_fn, eq filename = get_fn('frame0.xtc') tempdir = tempfile.mkdtemp() tar_filename = os.path.join(tempdir, "results-000.tar.bz2") archive = tarfile.open(tar_filename, mode='w:bz2') tar = tarfile.open(tar_filename, "w:bz2") tar.add(filename, arcname="positions.xtc") tar.close() shutil.copy(tar_filename, os.path.join(tempdir, "results-001.tar.bz2")) trj0 = md.load(get_fn("frame0.xtc"), top=get_fn("frame0.h5")) output_filename = os.path.join(tempdir, "traj.h5") fah.concatenate_core17(tempdir, trj0, output_filename) trj = md.load(output_filename) eq(trj.n_atoms, trj0.n_atoms) eq(trj.n_frames, trj0.n_frames * 2) shutil.copy(tar_filename, os.path.join(tempdir, "results-002.tar.bz2")) fah.concatenate_core17(tempdir, trj0, output_filename) # Should notice the new file and append it to the HDF file. trj = md.load(output_filename) eq(trj.n_atoms, trj0.n_atoms) eq(trj.n_frames, trj0.n_frames * 3)
def test_pdbwrite(get_fn): pdb = get_fn('native.pdb') p = load(pdb) p.save(temp) r = load(temp) eq(p.xyz, r.xyz)
def main(): p = args.pro ppref = p.split('/')[-1].split('.')[0] l = args.lig lpref = l.split('.')[0] if args.clean_lig: mdtraj.load(l).save_pdb('tidy_' + l) comb = sr + 'clc/dock/analysis/comb_pl.py ' + p + ' ' + l call_chimera(comb) # keep remarks w binding info fn = ppref + '_' + lpref + '.pdb' get_remarks = 'grep REMARK ' + l + '>> ' + fn call_cl(get_remarks) ref = args.ref if ref != None: import numpy as np # read in combined pdb lp = mdtraj.load(fn) # find distance between reference residue and ligand pi = lp.topology.select('resid ' + ref + ' and name C') li = lp.topology.select('not protein and name S') lq = lp.atom_slice(li).xyz[0][0] pq = lp.atom_slice(pi).xyz[0][0] dist = np.linalg.norm(pq-lq) # log results with open('dists.dat', 'a') as f: f.write(lpref + ': ' + str(dist) + '\n') f.close() # remove comb pdb if beyond cutoff distance if dist > args.cut: os.remove(fn)
def test_load_frame(): files = [ "frame0.nc", "frame0.h5", "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.mdcrd", "frame0.binpos", "frame0.xyz", "frame0.lammpstrj", ] if not (on_win and on_py3): files.append("legacy_msmbuilder_trj0.lh5") trajectories = [md.load(get_fn(f), top=get_fn("native.pdb")) for f in files] rand = [np.random.randint(len(t)) for t in trajectories] frames = [md.load_frame(get_fn(f), index=r, top=get_fn("native.pdb")) for f, r in zip(files, rand)] for traj, frame, r, f in zip(trajectories, frames, rand, files): def test(): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg="%d, %d: %s" % (traj[r].time[0], frame.time[0], f)) test.description = "test_load_frame: %s" % f yield test t1 = md.load(get_fn("2EQQ.pdb")) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn("2EQQ.pdb"), r) eq(t1[r].xyz, t2.xyz)
def test_standardize_water(): """Test utility function standardize_water. The water bonds must be recognized even when residue names do not match the standard definition in mdtraj.formats.pdb.data.residues.xml. """ water_filepath = utils.get_data_filename("chemicals/water/water.mol2") water_traj = md.load(water_filepath) # Store in pdb format and lose CONECT records. water_pdb_filepath = tempfile.mktemp(suffix='.pdb') water_traj.save_pdb(water_pdb_filepath) with open(water_pdb_filepath, 'r') as f: pdb_lines = f.readlines() with open(water_pdb_filepath, 'w') as f: for line in pdb_lines: if line[:6] != 'CONECT': f.write(line) # Test pre-condition: MDTraj cannot detect water bonds automatically. water_traj = md.load(water_pdb_filepath) assert water_traj.topology.n_bonds == 0 # The function modifies the Trajectory and bonds are now recognized. assert packmol.standardize_water(water_traj) is True assert water_traj.topology.n_bonds == 2 # Remove temporary file. os.remove(water_pdb_filepath)
def feat(irow): i, row = irow traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) feat_traj = dihed_feat.partial_transform(traj) return i, feat_traj
def test_2(): t = md.load(get_fn('2EQQ.pdb')) for i in range(len(t)): yield lambda: assert_(call_dssp(t[i]), md.compute_dssp(t[i], simplified=False)[0])
def test_6(): t = md.load(get_fn('alanine-dipeptide-explicit.pdb')) a = md.compute_dssp(t, simplified=True) protein_residues = np.array([set(a.name for a in r.atoms).issuperset(('C', 'N', 'O', 'CA')) for r in t.topology.residues]) assert np.unique(a[:, protein_residues]) == "C" assert np.unique(a[:, np.logical_not(protein_residues)]) == 'NA'
def test_show_mdtraj(): import mdtraj as md from mdtraj.testing import get_fn fn = nv.datafiles.PDB traj = md.load(fn) view = nv.show_mdtraj(traj)
'Root for naming PDB files: root + _ + frame + .pdb (e.g. trj_1.pdb)') ap.add_argument('--stride', default=1, type=int, help='Read only i-th frame. Default: reads all (i=1)') cmd = ap.parse_args() # Read/Parse Topology topology_fpath = check_file(cmd.topology) if topology_fpath.endswith('cif'): structure = app.PDBxFile(topology_fpath) topology = md.Topology.from_openmm(structure.topology) else: structure = md.load(cmd.topology) topology = structure.topology logging.info('Read topology from file: {}'.format(topology_fpath)) # Read trajectory trajectory_fpath = check_file(cmd.trajectory) logging.info('Reading trajectory from file: {}'.format(trajectory_fpath)) trj = md.load(trajectory_fpath, top=topology, stride=cmd.stride) logging.info('Removing PBCs and imaging molecules') topology.create_standard_bonds() anchors = topology.find_molecules() sorted_bonds = sorted(topology.bonds, key=lambda x: x[0].index) sorted_bonds = np.asarray([[b0.index, b1.index]
plt.axes().tick_params(labelsize=14) # plt.ylim([0, 0.6]) plt.tight_layout() plt.savefig("%s_density.pdf" % regions[i]) plt.show() if args.solvate: colors = ['xkcd:red', 'xkcd:green', 'blue', 'xkcd:yellow'] else: colors = ['red', 'green', 'blue'] if not args.load: print('Loading trajectory...', end="") t = md.load('%s' % args.traj, top='%s' % args.gro)[args.begin:args.end] print('done') box = t.unitcell_vectors nT = t.n_frames npores = 4 r_max = 0 if args.solvate: results = np.zeros([len(regions) + 1, args.bins]) else: results = np.zeros([len(regions), args.bins]) #keep = [a.index for a in t.topology.atoms if a.residue.name != 'HOH'] # everything kept if system not solvated components = ['C', 'C1', 'C2', 'C3', 'C4', 'C5'] comp = [a.index for a in t.topology.atoms if a.name in components]
md_npt.loadFile(inpf) sysgro, systop = md_npt.preparation(sysdir='SYS/', mddir='MD') simulation_npt = md_npt.setup(sysgro, systop) context = simulation_npt.context ofname = 'energy' + index + '_openmm_trr.dat' #fname = 'MD/md' + index + '.xtc' fname = 'MD/mdtrr' + index + '.trr' #fname = 'MD/copies' +index + '.trr' with open(ofname, 'wt') as f: f.write('# step\tEnergy(kJ/mol)\n') # load trajcetory traj = md.load(fname,top=sysgro) # read box informations ucell_vs = traj.unitcell_vectors for i in range(len(traj)): # read and set positions context.setPositions(traj.openmm_positions(i)) # set periodicbixvectors ucell_v = ucell_vs[i] context.setPeriodicBoxVectors(ucell_v[0],ucell_v[1],ucell_v[2]) state = context.getState(getEnergy=True) energyval = state.getPotentialEnergy() energyval /= kilojoules/mole print(i, energyval) with open(ofname, 'a+') as f:
import os, sys import numpy as np import mdtraj as md from sklearn.externals import joblib msm_fn = 'MSMs-combined-macro40/MSMs-combined.pkl' traj_log_fn = '../../trajectories.log' assignment_fn = 'Assignments.fixed.Map40.npy' msm = joblib.load(msm_fn) a = np.load(assignment_fn) selected_pairs_by_state = msm.draw_samples(a, 5) PDBs_dir = 'PDBs-combined' if not os.path.exists(PDBs_dir): os.makedirs(PDBs_dir) with open(traj_log_fn, 'r') as logfn: trajlog = logfn.readlines() for state_id in range(selected_pairs_by_state.shape[0]): for i, (traj_id, frame_id) in enumerate(selected_pairs_by_state[state_id]): p_id = trajlog[traj_id][:-1].split('/')[-2][-4:] pdb_fn = '../../Gen0-%s.pdb' % p_id pdb = md.load_pdb(pdb_fn) traj = md.load(trajlog[traj_id][:-1], top=pdb) output_fn = os.path.join(PDBs_dir, "State%d-%d.pdb" % (state_id, i)) traj[frame_id].save_pdb(output_fn)
if not int_type in ["ALL", "BB-BB", "BB-SC", "SC-SC"]: printUsage() BB_names = [ "CA", "C", "N", "O", "H", "OP1", "OP2", "O5'", "O2'", "HO2'", "O3'" ] def checkType(n1, n2): if int_type == "ALL": return True if int_type == "BB-BB": return n1 in BB_names and n2 in BB_names if int_type == "BB-SC": return n1 in BB_names != n2 in BB_names if int_type == "SC-SC": return not n1 in BB_names and not n2 in BB_names return True print("Reading md-trajectory ..") t = md.load(trj_file, top=top_file) frame_count = len(t) print("Analyzing hbond network in %d frames .." % frame_count) hbonds_allframes = md.wernet_nilsson(t) hbond_frames = defaultdict(set) for f, frame in enumerate(t[:]): #hbonds = md.baker_hubbard(frame, periodic=True) hbonds = hbonds_allframes[f] print("Frame %d .. %d hbonds" % (f, hbonds.shape[0])) for hbond in hbonds: a1 = t.topology.atom(hbond[0]) a2 = t.topology.atom(hbond[2]) if not checkType(a1.name, a2.name): continue
def build_mixture_prmtop(mol2_filenames, frcmod_filenames, box_filename, prmtop_filename, inpcrd_filename): """Create a prmtop and inpcrd from a collection of mol2 and frcmod files as well as a single box PDB. We have used this for setting up simulations of binary mixtures. - Original code by : Chodera Lab / Openmoltools project (https://github.com/choderalab/openmoltools) Parameters ---------- mol2_filenames : list(str) Filenames of GAFF flavored mol2 files. Each must contain exactly ONE ligand. frcmod_filenames : str Filename of input GAFF frcmod filenames. box_filename : str Filename of PDB containing an arbitrary box of the mol2 molecules. prmtop_filename : str output prmtop filename. Should have suffix .prmtop inpcrd_filename : str output inpcrd filename. Should have suffix .inpcrd water_model : str, optional. Default: "TIP3P" String specifying water model to be used IF water is present as a component of the mixture. Valid options are currently "TIP3P", "SPC", or None. If None is specified, flexible GAFF-water will be used as for any other solute (old behavior). Returns ------- tleap_commands : str The string of commands piped to tleap for building the prmtop and inpcrd files. This will *already* have been run, but the output can be useful for debugging or archival purposes. However, this will reflect temporary file names for both input and output file as these are used to avoid tleap filename restrictions. Notes ----- This can be easily broken if there are missing, duplicated, or inconsistent ligand residue names in your box, mol2, and frcmod files. You can use mdtraj to edit the residue names with something like this: trj.top.residue(0).name = "L1" """ # Check for one residue name per mol2 file and uniqueness between all mol2 files all_names = set() for filename in mol2_filenames: t = md.load(filename) names = set([r.name for r in t.top.residues]) if len(names) != 1: raise (ValueError( "Must have a SINGLE residue name in each mol2 file.")) all_names = all_names.union(list(names)) if len(all_names) != len(mol2_filenames): raise (ValueError("Must have UNIQUE residue names in each mol2 file.")) if len(mol2_filenames) != len(frcmod_filenames): raise (ValueError( "Must provide an equal number of frcmod and mol2 file names.")) #Get number of files nfiles = len(mol2_filenames) #Build absolute paths of input files so we can use context and temporary directory infiles = mol2_filenames + frcmod_filenames + [box_filename] infiles = [os.path.abspath(filenm) for filenm in infiles] #Build absolute paths of output files so we can copy them back prmtop_filename = os.path.abspath(prmtop_filename) inpcrd_filename = os.path.abspath(inpcrd_filename) #Use temporary directory and do the setup with md.utils.enter_temp_directory(): all_names = [ md.load(filename).top.residue(0).name for filename in mol2_filenames ] mol2_section = "\n".join("%s = loadmol2 %s" % (all_names[k], filename) for k, filename in enumerate(mol2_filenames)) amberparams_section = "\n".join( "loadamberparams %s" % (filename) for k, filename in enumerate(frcmod_filenames)) tleap_commands = TLEAP_TEMPLATE % dict( mol2_section=mol2_section, amberparams_section=amberparams_section, box_filename=box_filename, prmtop_filename=prmtop_filename, inpcrd_filename=inpcrd_filename) print(tleap_commands) file_handle = open('tleap_commands', 'w') file_handle.writelines(tleap_commands) file_handle.close() logger.debug('Running tleap in temporary directory.') cmd = "tleap -f %s " % file_handle.name logger.debug(cmd) output = getoutput(cmd) logger.debug(output) return tleap_commands
def main(): #Part to load coordinate file topfile = sys.argv[1] trjfile = sys.argv[2] outfile = sys.argv[3] nhboutfile = sys.argv[4] #short time n_HB ave outfile nsoutfile = sys.argv[5] #number of solid molecules outfile (time evolution) aname1=input("atom names for donors? ex) OW \n") aname2=input("atom names for hydrogens? ex) HW1 HW2 \n") aname3=input("atom names for acceptors? ex) UO \n") rrange=input("minimum, maximum D-A distance in angstroms? ex) 1.00 5.00 \n") rsplit=rrange.split() rmin,rmax=float(rsplit[0])*angtonm,float(rsplit[1])*angtonm rbin=float(input("r bin size in angstrom? ex) 0.02 \n"))*angtonm abin=float(input("cosine(theta) bin size? ex) 0.02 \n")) rnbin,anbin=int((rmax-rmin)/rbin),int(2/abin) hbrcut=float(input("cutoff distance(D-A) for hydrogen bond in angstroms? ex) 3.5 ? \n"))*angtonm #In the donor-H -- acceptor triplet, D - A RDF r_min should be considered. hbacut=float(input("cutoff angle in degree width from 180, for hydrogen bond? ex) 30 \n")) hbamin,hbamax=(180.0-hbacut)*degtorad,(180.0+hbacut)*degtorad #in radian. tskip=int(input("Once in how many frames do you want to take? ex) 10 \n")) teq=int(input("How many initial frames do you want to cut as equilibration? ex) 5000 \n")) thbave=float(input("How many ps to calculate short-time n_HB average? ex) 20 \n")) nhbscut=float(input("Desired short-time n_HB cutoff for solid identification? ex) 2.50 \n")) nhbmax=4 #assume that a single molecule can H-bond 4 times at most start_time=timeit.default_timer() #input 1 : load surf traj. (big file) traj=md.load(trjfile,top=topfile) traj=traj[teq::tskip] topology=traj.topology #monomer filtering. neglect useless molecules for the Hbond calculation nstep,totnmon=traj.n_frames,topology.n_residues elapsed=timeit.default_timer() - start_time print('finished trajectory loading {}'.format(elapsed)) print(nstep,' frames ') nthbave=int(thbave/traj.timestep) #prepare 2dbins for hydrogen-acceptor distance and H-bond angle #make atom indices list (before filtering too far pairs) #should avoid intramolecular atomic pair asplit1,asplit2,asplit3=aname1.split(),aname2.split(),aname3.split() text1,text2,text3='','','' for word in asplit1: text1+='name '+word+' or ' for word in asplit2: text2+='name '+word+' or ' for word in asplit3: text3+='name '+word+' or ' text1,text2,text3=text1[:-4],text2[:-4],text3[:-4] seld=topology.select(text1) selh=topology.select(text2) sela=topology.select(text3) n_atomd,n_atomh,n_atoma=len(seld),len(selh),len(sela) print(n_atomd,n_atomh,n_atoma) dhpairs=[] for j in topology.bonds: if j[0].index in selh and j[1].index in seld: dhpairs.append([j[1].index,j[0].index]) elif j[0].index in seld and j[1].index in selh: dhpairs.append([j[0].index,j[1].index]) fulllist_angles,donmonindex_angles=[],[] for row in dhpairs: for i in sela: if topology.atom(row[1]).residue!=topology.atom(i).residue: extrow=row.copy() extrow.append(i) fulllist_angles.append(extrow) extrow_moni=[topology.atom(x).residue.index for x in extrow] donmonindex_angles.append(extrow_moni[0]) #list_dist=numpy.array(list_dist) fulllist_angles,donmonindex_angles=numpy.array(fulllist_angles),numpy.array(donmonindex_angles) uniqhbmon=numpy.unique(donmonindex_angles) nmon = len(uniqhbmon) #number of donor residues involved in the Hbond topology tracking. print(nmon,'hydrogen bond donating monomers ') n_angles_full = len(fulllist_angles) print("dhpairs # = {}, full list # angles = {} ".format(len(dhpairs),n_angles_full)) #prepare fragmental traj calc.(memory saver) if nstep>=200: #if there're many frames.. nfrag=200 else: nfrag=1 #hbond counting information array hbcount_molecule = numpy.empty((nstep,nhbmax+1)) mon_hbond_count,allmon_hbond_count = numpy.zeros((nmon,nstep),dtype=int),numpy.zeros((totnmon,nstep),dtype=int) hbtot=0 for ifrag in range(nfrag): #loop of individual snapshot calc. blength=int(nstep/nfrag) bstart,bend=ifrag*blength,(ifrag+1)*blength fragtraj=traj[bstart:bend] #fragtraj=traj[istep] #calculate distances between donors and acceptors, angle dist = (md.compute_distances(fragtraj,fulllist_angles[:,[0,2]])).flatten() angl = (md.compute_angles(fragtraj,fulllist_angles)).flatten() # recalculate NaN values of angles for nan_item in numpy.argwhere(numpy.isnan(angl)).reshape(-1): i_frame = int(nan_item/n_angles_full) i_angle = nan_item%n_angles_full #print(" Nan at {} th frame and {} th angle".format(i_frame,i_angle)) #print(" <-- {} th atoms".format(list_angles[i_angle])) i_abc = fulllist_angles[i_angle] a = traj.xyz[i_frame][i_abc[0]] b = traj.xyz[i_frame][i_abc[1]] c = traj.xyz[i_frame][i_abc[2]] print(" <-- position: \n {} \n {} \n {}".format(a,b,c)) ba = a - b bc = c - b cosine_angle = numpy.dot(ba, bc) / (numpy.linalg.norm(ba) * numpy.linalg.norm(bc)) print(" distance= {}".format(numpy.linalg.norm(bc))) angle = numpy.arccos(cosine_angle) print(" get correct value from NaN, {} (rad) {} (deg)".format(angle, angle*180.0/numpy.pi)) angl[nan_item] = copy.copy(angle) i_thresd,i_thresa=numpy.where(dist<=hbrcut),numpy.where(numpy.logical_and(hbamin<=angl, angl<=hbamax)) i_hb = numpy.intersect1d(i_thresd[0],i_thresa[0]) hbtot+=len(i_hb) cosangl=numpy.cos(angl) #hbond monomer counting section. for x in i_hb: i_frame = int(x/n_angles_full) + bstart i_angle = x%n_angles_full i_mon = donmonindex_angles[i_angle] allmon_hbond_count[i_mon, i_frame] += 1 #spatd array section - should regard gnuplot pm3d-compatible format. #hold x. increment y. when a full cycle of y range ends, make an empty line. #histogram counts_2d, edge_r, edge_cosa = numpy.histogram2d(dist,cosangl,bins=[rnbin,anbin],range=[[rmin,rmax],[-1.0,1.0]]) #volume in each radial shell vol = numpy.power(edge_r[1:],3) - numpy.power(edge_r[:-1],3) vol *= 4/3.0 * numpy.pi # Average number density box_vol = numpy.average(fragtraj.unitcell_volumes) density = n_angles_full / box_vol rdf_2d = (counts_2d * anbin/ nstep ) / (density* vol[:,None] ) if ifrag==0: totrdf_2d=numpy.copy(rdf_2d) else: totrdf_2d+=rdf_2d if ifrag%10==0: elapsed=timeit.default_timer() - start_time print('finished snapshot {} time {}'.format(ifrag,elapsed)) pm3d_print(outfile,totrdf_2d,rmin,rbin,-1.0,abin) #average number of H-bonds normalization : when D-H -- A exist, number of A atoms satisfies criterion, per one D-H. #therefore, (Total count in the whole trajectory)/((# of frames)*(total# of D-H pairs in 1 system)) hbavg=hbtot/(nstep*len(dhpairs)) print('Detected H-bond rcut {:8.3f} A angcut {:8.3f} deg totcount {} avg {:11.4f}'.format(hbrcut/angtonm,hbacut,hbtot,hbavg)) #Don't draw contour. short-time n_HB ave prob distribution. #process mon_hbond_count(each snapshot info) -> short-time n_HB ave array -> distribution #also, in short-time n_HB ave array construction : apply solid identification cutoff, then construct time evolution of n_solid array. nstint=int(nstep/nthbave) print(nstint,' number of short interval ') stave_nhb=numpy.empty(0) stint,nsol=numpy.zeros(nstint),numpy.zeros(nstint) mon_hbond_count=allmon_hbond_count[uniqhbmon] print(numpy.sum(mon_hbond_count), ' double check total HB count over molecules ') for k in range(nstint): stint[k]=thbave*k for imon in range(nmon): stave=numpy.average(mon_hbond_count[imon][k*nthbave:(k+1)*nthbave]) #print(stave) stave_nhb=numpy.append(stave_nhb,stave) if stave>=nhbscut: nsol[k]+=1 hist_stave_nhb,bin_edges=numpy.histogram(stave_nhb,bins=40,range=[0.0,4.0],density=True) bin_edges=bin_edges[:-1] bin_edges,hist_stave_nhb= bin_edges.reshape(-1,1),hist_stave_nhb.reshape(-1,1) hist_stave_nhb=numpy.hstack((bin_edges,hist_stave_nhb)) numpy.savetxt(nhboutfile,hist_stave_nhb) nsol_display=numpy.hstack((stint.reshape(-1,1),nsol.reshape(-1,1))) numpy.savetxt(nsoutfile,nsol_display) #for i in range(nstep): # hist,bin_edges=numpy.histogram(mon_hbond_count[:,i].flatten(),bins=nhbmax+1,range=[0,nhbmax+1],density=False) # hbcount_molecule[i]=hist.copy() #pm3d_print(hboutfile,hbcount_molecule.transpose(),0,1,0.0,traj.timestep) elapsed=timeit.default_timer() - start_time print('finished job {}'.format(elapsed))
def equilibrate(self, ff_name, water_name): input_pdb_filename = self.get_initial_pdb_filename(ff_name, water_name) equil_pdb_filename = self.get_equil_pdb_filename(ff_name, water_name) equil_dcd_filename = self.get_equil_dcd_filename(ff_name, water_name) equil_protein_pdb_filename = self.get_equil_protein_pdb_filename( ff_name, water_name) utils.make_path(equil_pdb_filename) if os.path.exists(equil_pdb_filename): return ff = app.ForceField('%s.xml' % ff_name, '%s.xml' % water_name) pdb = app.PDBFile(input_pdb_filename) modeller = app.Modeller(pdb.topology, pdb.positions) modeller.addSolvent(ff, model=water_mapping[water_name], padding=self.padding, ionicStrength=self.ionic_strength) topology = modeller.getTopology() positions = modeller.getPositions() system = ff.createSystem(topology, nonbondedMethod=app.PME, nonbondedCutoff=self.cutoff, constraints=app.HBonds) integrator = mm.LangevinIntegrator(self.temperature, self.equil_friction, self.equil_timestep) system.addForce( mm.MonteCarloBarostat(self.pressure, self.temperature, self.barostat_frequency)) platform = mm.Platform.getPlatformByName("CUDA") platform.setPropertyDefaultValue("CudaDeviceIndex", os.environ["CUDA_VISIBLE_DEVICES"]) simulation = app.Simulation(topology, system, integrator, platform=platform) simulation.context.setPositions(positions) print('Minimizing.') simulation.minimizeEnergy() simulation.context.setVelocitiesToTemperature(self.temperature) print('Equilibrating.') simulation.reporters.append( app.PDBReporter(equil_pdb_filename, self.n_equil_steps - 1)) simulation.reporters.append( app.DCDReporter(equil_dcd_filename, self.equil_output_frequency)) simulation.step(self.n_equil_steps) del simulation del system traj = md.load(equil_dcd_filename, top=equil_pdb_filename)[-1] traj.save(equil_pdb_filename) top, bonds = traj.top.to_dataframe() atom_indices = top.index[top.chainID == 0].values traj.restrict_atoms(atom_indices) traj.save(equil_protein_pdb_filename)
totalEnergy=True, potentialEnergy=True, temperature=True, density=True, progress=True, remainingTime=True, speed=True, totalSteps=totaltime, systemMass=totalMass, separator='\t') simulation_refs.reporters.append(log_reporter) # Run it0 = (int(numiter) - 1) * nframe itN = it0 + nframe ts = md.load(traj, top=sysgro) ucell_vs = ts.unitcell_vectors for it, t in enumerate(ts): if it < it0: continue elif it == itN: break # set velocities set_vel(context) # set positions pos = t.openmm_positions(0) print('################') print('frame = {0:04d}'.format(it)) print(pos[0]) context.setPositions(pos) # set periodicbixvectors
def atoms_from_topology(topology): """Retrieves atom list from topology file.""" top = md.load(topology).topology table, bonds = top.to_dataframe() atom_list = table['element'].tolist() return atom_list
def parse(filepath): md.load(filepath)
# List all possible user input inputs=parser.add_argument_group('Input arguments') inputs.add_argument('-h', '--help', action='help') inputs.add_argument('-top', action='store', dest='structure',help='Structure file corresponding to trajectory',type=str,required=True) inputs.add_argument('-traj', action='store', dest='trajectory',help='Trajectory',type=str,required=True) inputs.add_argument('-sel', action='store', dest='sel', help='Atom selection',type=str,default='name CA') inputs.add_argument('-min', action='store', dest='minimum_membership', help='Minimum number of frames in a cluster',type=int,default=2) inputs.add_argument('-cutoff', action='store', dest='cutoff', help='maximum cluster radius',type=float,required=True) inputs.add_argument('-o', action='store', dest='out_name',help='Output directory',type=str,required=True) # Parse into useful form UserInput=parser.parse_args() topology = UserInput.structure trajectory = UserInput.trajectory t = md.load(trajectory,top=topology) n_frames = t.n_frames sel = t.topology.select(UserInput.sel) t = t.atom_slice(sel) tempfile = tempfile.NamedTemporaryFile() distances = np.memmap(tempfile.name, dtype=float, shape=(n_frames,n_frames)) #distances = np.empty((n_frames, n_frames), dtype=float) t.center_coordinates() for i in range(n_frames): distances[i] = md.rmsd(target=t, reference=t, frame=i, precentered=True) t = None cutoff_mask = distances <= UserInput.cutoff distances = None centers = []
def test_mismatch(): # loading a 22 atoms xtc with a topology that has 2,000 atoms # some kind of error should happen! assert_raises( ValueError, lambda: md.load(get_fn('frame0.xtc'), top=get_fn('4K6Q.pdb')))
import numpy as np import mdtraj as md import glob import os for file in glob.glob( '/home/jf8/NarK-MD/strip-trajs-combined/NarK_stripped_dcd/*.dcd'): t = md.load(file, top='/home/jf8/NarK-MD/strip-trajs-combined/NarK-strip.pdb') dist = md.compute_contacts(t, contacts=[[112, 333], [249, 377], [54, 85], [125, 213], [54, 67], [133, 423], [358, 371], [220, 371], [109, 419], [193, 374], [35, 245], [9, 198], [62, 335], [123, 183], [280, 325], [225, 326], [25, 117], [4, 359], [383, 397], [208, 234], [9, 164], [157, 291], [162, 420], [124, 439], [48, 171], [241, 320], [52, 147], [29, 436], [53, 445], [125, 155], [294, 392], [99, 189], [230, 274], [92, 128], [91, 151], [97, 338], [356, 425], [200, 444], [351, 439], [138, 197], [42, 169], [234, 425], [110, 331], [36, 43], [240, 347], [160, 245], [6, 50], [293, 396], [287, 299], [25, 158], [13, 233], [22, 321], [210, 369], [29, 204], [230, 421], [256, 275], [205, 424], [237, 313], [117, 146], [34, 63], [377, 443]],
def test_seek_read_mode(): """Test the seek/tell capacity of the different TrajectoryFile objects in read mode. Basically, we just seek around the files and read different segments, keeping track of our location manually and checking with both tell() and by checking that the right coordinates are actually returned """ files = [ (md.NetCDFTrajectoryFile, 'frame0.nc'), (md.HDF5TrajectoryFile, 'frame0.h5'), (md.XTCTrajectoryFile, 'frame0.xtc'), (md.TRRTrajectoryFile, 'frame0.trr'), (md.DCDTrajectoryFile, 'frame0.dcd'), (md.MDCRDTrajectoryFile, 'frame0.mdcrd'), (md.BINPOSTrajectoryFile, 'frame0.binpos'), (md.BINPOSTrajectoryFile, 'frame0.binpos'), (md.LH5TrajectoryFile, 'legacy_msmbuilder_trj0.lh5'), ] for a, b in files: point = 0 xyz = md.load(get_fn(b), top=get_fn('native.pdb')).xyz length = len(xyz) kwargs = {} if a is md.MDCRDTrajectoryFile: kwargs = {'n_atoms': 22} with a(get_fn(b), **kwargs) as f: for i in range(100): r = np.random.rand() if r < 0.25: offset = np.random.randint(-5, 5) if 0 < point + offset < length: point += offset f.seek(offset, 1) else: f.seek(0) point = 0 if r < 0.5: offset = np.random.randint(1, 10) if point + offset < length: read = f.read(offset) if a not in [ md.BINPOSTrajectoryFile, md.LH5TrajectoryFile ]: read = read[0] readlength = len(read) read = mdtraj.utils.in_units_of( read, f.distance_unit, 'nanometers') eq(xyz[point:point + offset], read) point += readlength elif r < 0.75: offset = np.random.randint(low=-100, high=0) try: f.seek(offset, 2) point = length + offset except NotImplementedError: # not all of the *TrajectoryFiles currently support # seeking from the end, so we'll let this pass if they # say that they dont implement this. pass else: offset = np.random.randint(100) f.seek(offset, 0) point = offset eq(f.tell(), point)
def test_load_pdb_box(): t = md.load(get_fn('native2.pdb')) yield lambda: eq(t.unitcell_lengths[0], np.array([0.1, 0.2, 0.3])) yield lambda: eq(t.unitcell_angles[0], np.array([90.0, 90.0, 90.0])) yield lambda: eq(t.unitcell_vectors[0], np.array([[0.1, 0, 0], [0, 0.2, 0], [0, 0, 0.3]]))
def test_slice2(): t = md.load(get_fn('traj.h5')) yield lambda: t[0] == t[[0, 1]][0]
def f(): try: eq(len(md.open(get_fn(file), **kwargs)), len(md.load(get_fn(file), top=get_fn('native.pdb')))) except NotImplementedError as e: raise SkipTest(e)
setup_directory = setup_options['output_directory'] ligand_filename = setup_options['ligand_filename'] n_ligand_range = list(range(n_ligands)) ligand_permutations = list(itertools.permutations(n_ligand_range, 2)) ligand_pair_to_compute = ligand_permutations[molecule_index] initial_ligand = ligand_pair_to_compute[0] proposal_ligand = ligand_pair_to_compute[1] use_sterics = False temperature = 300.0*unit.kelvin equilibrium_snapshots_filename = os.path.join(equilibrium_output_directory, "{}_{}.h5".format(project_prefix, initial_ligand)) configuration_traj = md.load(equilibrium_snapshots_filename) file_to_read = os.path.join(setup_directory, "{}_{}_initial.npy".format(project_prefix, initial_ligand)) positions, topology, system, initial_smiles = np.load(file_to_read) topology = topology.to_openmm() topology.setPeriodicBoxVectors(system.getDefaultPeriodicBoxVectors()) ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx))
def f(): t.save(e) t2 = md.load(e, top=nat) eq(t.xyz, t2.xyz, err_msg=e) eq(t.time, t2.time, err_msg=e)
def analyze(self, debug=False, **kwargs): import pandas as pd from ...panedr import edr_to_df from ...analyzer.series import is_converged from ...analyzer.structure import check_vle_density df = edr_to_df('nvt.edr') potential_series = df.Potential length = potential_series.index[-1] ### Check structure freezing using Diffusion of COM of molecules. Only use last 400 ps of data diffusion, _ = self.gmx.diffusion('nvt.xtc', 'nvt.tpr', mol=True, begin=length - 400) if diffusion < 1E-8: # cm^2/s return {'failed': True, 'reason': 'freeze'} # use potential to do a initial determination # use at least 4/5 of the data _, when = is_converged(potential_series, frac_min=0) when = min(when, length * 0.2) # cut trj to pieces, analyze the change of density of liq and gas phases dt = potential_series.index[1] - potential_series.index[0] n_frame = len(potential_series.loc[when:]) dt_piece = 200 # ps # use int in stead of math.ceil. make sure each peace has at least dt_piece n_pieces = int((potential_series.index[-1] - when) / dt_piece) if debug: print('n_frame=%i dt=%f dt_piece=%f n_pieces=%i' % (n_frame, dt, dt_piece, n_pieces)) self.lz_liq_series = pd.Series() self.lz_gas_series = pd.Series() self.lz_int_series = pd.Series() import mdtraj time: [float] = [] # time of frames, saved to calculate vaporize Ngas: [int] = [] # number of molecules in gas phases phases: [[str]] = [] # [['l', 'g','i'], ...] # n_atoms * n_frames def in_which_phase(z): z -= idxmax while z < 0: z += lz while z > lz: z -= lz if z < r1 - s1 * mul or z > r2 + s2 * mul: return 'l' # liquid phase elif z > r1 + s1 * mul + 1 and z < r2 - s2 * mul - 1: # shrink 1 nm for gas phase near interface return 'g' # gas phase else: return 'i' # interface def in_gas(z): z -= idxmax while z < 0: z += lz while z > lz: z -= lz if z > r1 + s1 * mul and z < r2 - s2 * mul: return True return False dliq_series = pd.Series() # density of liquid phase timeseries dgas_series = pd.Series() # density of liquid phase timeseries for n in range(n_pieces): xvg = 'density-%i.xvg' % n begin = when + dt_piece * n end = when + dt_piece * (n + 1) - dt if debug: print('Time: ', begin, end) self.gmx.density('nvt.xtc', 'nvt.tpr', xvg=xvg, begin=begin, end=end, silent=True) df = pd.read_csv(xvg, skiprows=24, names=['Density'], index_col=0, sep='\s+') density_series = df.Density if not debug: os.remove(xvg) dz = density_series.index[1] - density_series.index[0] lz = dz * len(density_series) # Move gas phase to center. Series index starts from 0 idxmax = density_series.idxmax() density_series.index -= density_series.idxmax() _index_list = density_series.index.tolist() for i, v in enumerate(_index_list): if v < 0: _index_list[i] += lz density_series.index = _index_list density_series = density_series.sort_index() is_interface, is_gas_center, nodes = check_vle_density( density_series) if not is_interface or not is_gas_center: continue # Move gas phase to exact center. Series index not starts from 0 center = sum(nodes) / 2 _index_list = density_series.index.tolist() if center < lz / 2: shift = lz / 2 - center for i, v in enumerate(_index_list): if v > lz - shift: _index_list[i] -= lz else: shift = center - lz / 2 for i, v in enumerate(_index_list): if v < shift: _index_list[i] += lz density_series.index = _index_list density_series = density_series.sort_index() # Fit Tanh to determine the thickness of liquid phase dens_series_left = density_series.loc[:center] dens_series_righ = density_series.loc[center:] _max = max(density_series) _min = min(density_series) _c = (_max + _min) / 2 _A = (_max - _min) / 2 _coef1, _score1 = fit_vle_tanh(dens_series_left.index, dens_series_left, guess=[_c, -_A, nodes[0], 1]) _coef2, _score1 = fit_vle_tanh(dens_series_righ.index, dens_series_righ, guess=[_c, _A, nodes[1], 1]) # if debug: # print(dens_series_left) # print(dens_series_righ) # print('Tanh: ', _coef1) # print('Tanh: ', _coef2) c1, A1, r1, s1 = _coef1 # A1 is negative c2, A2, r2, s2 = _coef2 # A2 is positive # Check if density fluctuate along the z axis if abs(c1 - A1 - density_series.max()) > 50 / 1000 * density_series.max() or \ abs(c2 + A2 - density_series.max()) > 50 / 1000 * density_series.max() or \ abs(c1 + A1 - density_series.min()) > 100 / 1000 * density_series.max() or \ abs(c2 - A2 - density_series.min()) > 100 / 1000 * density_series.max(): continue mul = 2.6466524123622457 # arctanh(0.99) lz_liq = r1 - density_series.index[0] + density_series.index[ -1] - r2 + dz - s1 * mul - s2 * mul lz_gas = r2 - r1 - s1 * mul - s2 * mul lz_int = (s1 + s2) * mul * 2 # thickness of two interfaces if debug: print('Thickness of liquid, gas and interfaces: ', lz_liq, lz_gas, lz_int) self.lz_liq_series.at[begin] = lz_liq self.lz_gas_series.at[begin] = lz_gas self.lz_int_series.at[begin] = lz_int # Liquid phase should be at least 2 nm. Gas phase should be at least 5 nm if lz_liq < 2 or lz_gas < 5: continue d_liq = (c1 - A1 + c2 + A2) / 2 d_gas = (c1 + A1 + c2 - A2) / 2 d_gas = max(d_gas, 0) dliq_series.at[begin] = d_liq dgas_series.at[begin] = d_gas if debug: gro_com = 'com-%i.gro' % n self.gmx.traj_com('nvt.xtc', 'nvt.tpr', gro_com, begin=begin, end=end, silent=True) trj: mdtraj.Trajectory = mdtraj.load(gro_com) if phases == []: phases = [[] for i in range(trj.n_atoms)] for n_frame, xyz_array in enumerate(trj.xyz): time.append(trj.time[n_frame]) Ngas.append(0) for n_atom, xyz in enumerate(xyz_array): phase = in_which_phase(xyz[2]) phases[n_atom].append(phase) if in_gas(xyz[2]): Ngas[-1] += 1 # if n_frame == 0: # print(xyz, phase, idxmax, lz, nodes, lz_interface) # import sys # sys.exit() if debug: self.dliq_series = dliq_series self.dgas_series = dgas_series self.Ngas_series = pd.Series(Ngas, time) self.phase_series_list = [pd.Series(l, time) for l in phases] print(dliq_series) print(dgas_series) # Failed if more than 1/4 pieces do not have interface # Failed if pieces at last 1/5 time span have no interface if len(dliq_series ) < n_pieces * 0.75 or dliq_series.index[-1] < length * 0.8: return {'failed': True, 'reason': 'no_interface'} _, when_liq = is_converged(dliq_series, frac_min=0) _, when_gas = is_converged(dgas_series, frac_min=0) # Convergence should be at least 4 ns if when_liq > length - 4000: return None if when_gas > length - 4000: if dgas_series.loc[when_gas:].mean() > 5: return None else: # Even if not converge. The density is so small < 5 kg/m^3. Considered as converged. when_gas = length - 4000 when = max(when_liq, when_gas) if debug: self.N_vaporize = self.N_condense = 0 for phase_series in self.phase_series_list: n_vap, n_con = N_vaporize_condense(phase_series.loc[when:]) self.N_vaporize += n_vap self.N_condense += n_con temperature_and_stderr, pressure_and_stderr, pzz_and_stderr, st_and_stderr, potential_and_stderr = \ self.gmx.get_properties_stderr('nvt.edr', ['Temperature', 'Pressure', 'Pres-ZZ', '#Surf*SurfTen', 'Potential'], begin=when) return { 'length': length, 'converge': when, 'temperature': temperature_and_stderr, # K 'pressure': pressure_and_stderr, # bar 'pzz': pzz_and_stderr, # bar 'st': [i / 20 for i in st_and_stderr], # mN/m 'potential': potential_and_stderr, # kJ/mol 'dliq': list(block_average(dliq_series.loc[when:] / 1000)), # g/mL 'dgas': list(block_average(dgas_series.loc[when:] / 1000)), # g/mL }
args = get_options() #======================================= # assign the passed arguments and read the trajectory #======================================= traj = args.trj topology = args.topology #pca_traj = md.load(traj, top=topology) print('Reading trajectory ', args.trj, '...') try: pca_traj = md.load(traj, top=topology) except: raise IOError('Could not open trajectory {0} for reading. \n'.format(traj)) top = pca_traj.topology atm_name = args.atm_grp sele_grp = get_trajectory(atm_name, top) atom_indices = args.atom_indices # take the input trj name for output directory out_dir = args.out_dir out_dir = out_dir.split('/') out_dir = out_dir[-1] out_dir = 'out_' + out_dir if not os.path.exists(out_dir): os.makedirs(out_dir)
def generate_scan_input(root, filetype, mol_name, method, basis_set, dihedral=None, charge=0, multiplicity=1, symmetry=None, geom_opt=True, sp_energy=False, mem=None): """ This function takes a directory and writes out psi4 input files for all files that match the filetype specified :param root: str path to files :param filetype: str input filetypes :param mol_name: str molecule name :param dihedral: str index of atoms that should remain fixed. format '1 2 3 4' :param method: list of str QM method (see psi4 website for options) :param basis_set: list of str see psi4 website for options :param charge: int default 0 :param multiplicity: int default 1 :param symmetry: str symmetry of molecule. default None :param geom_opt: bool if True, run geometry optimization :param sp_energy: bool if True, run a single point energy calculation after geomoetry optimization :param mem: str memory allocation """ if not dihedral: dihedral = list(filter(None, root.split('/')))[-1].split('_') dihedral = dihedral[0] + ' ' + dihedral[1] + ' ' + dihedral[ 2] + ' ' + dihedral[3] input_files = [] pattern = "*.{}".format(filetype) for path, subdir, files in os.walk(root): for name in files: if fnmatch(name, pattern): input_files.append(os.path.join(path, name)) for f in input_files: fixed_dih_angle = f.split('/')[-2] if fixed_dih_angle == '0': fixed_dih_angle = '0.001' if fixed_dih_angle == '180': fixed_dih_angle = '180.001' if fixed_dih_angle == '360': fixed_dih_angle = '360.001' dihedral_string = dihedral + ' ' + fixed_dih_angle mol = md.load(f) starting_geom = "" for i, atom in enumerate(mol.topology.atoms): element = atom.element.symbol # Convert to Angstroms xyz = mol.xyz[0] * 10 starting_geom += " {} {:05.3f} {:05.3f} {:05.3f}\n".format( element, xyz[i][0], xyz[i][1], xyz[i][2]) output = pdb_to_psi4(starting_geom=starting_geom, mol_name=mol_name, method=method, basis_set=basis_set, charge=charge, multiplicity=multiplicity, symmetry=symmetry, geom_opt=geom_opt, sp_energy=sp_energy, fixed_dih=dihedral_string, mem=mem) filename = f.replace(filetype, 'dat') psi4_input = open(filename, 'w') psi4_input.write(output) psi4_input.close()
import mdtraj import numpy as np import os import glob # Create output directory if not present if not os.path.isdir('pdbfiles'): os.mkdir('pdbfiles') # Get list of trajectory file names to convert trajfiles_gaff2 = glob.glob('trajectories/*_gaff2.nc') traj_prefixes = [] for name in trajfiles_gaff2: newname= name.replace('_gaff2.nc', '') traj_prefixes.append(newname) # Loop over trajectories and convert for ttype in ['', '_gaff', '_gaff2']: for tprefix in traj_prefixes: molname = os.path.basename(tprefix) # Load relevant NetCDF trajectory trajfile = 'trajectories/%s%s.nc' % (molname,ttype) # We require a topology file which has the chemical contents of the system, so use a PDB file stored previously traj = mdtraj.load(trajfile, top='minimized/%s_smirff.pdb' % molname) # Align trajectory to frame 0 and write to PDB for viewing traj.superpose(traj[0]) traj.save(os.path.join('pdbfiles','%s%s.pdb' % (molname,ttype)))
import mdtraj import numpy parent = mdtraj.load('parent.xml', top='bstate.pdb') traj = mdtraj.load('seg.dcd', top='bstate.pdb') dist_parent = mdtraj.compute_distances(parent, [[0, 1]], periodic=True) dist_traj = mdtraj.compute_distances(traj, [[0, 1]], periodic=True) dist = numpy.append(dist_parent, dist_traj) d_arr = numpy.asarray(dist) d_arr = d_arr * 10 numpy.savetxt("dist.dat", d_arr)
from simtk import unit if len(sys.argv) != 8: print( 'usage %s <cuda device index> < temp K > < t_equil ns > < t_sim ns > < fric 1/ps > < pdb > < frame > ' ) exit(1) temp = float(sys.argv[2]) t_equil = float(sys.argv[3]) t_sim = float(sys.argv[4]) fric = float(sys.argv[5]) pdb_str = sys.argv[6] pdb_frame = int(sys.argv[7]) pdb = md.load(pdb_str) forcefield = app.ForceField('amber99sbildn.xml', 'amber99_obc.xml') system = forcefield.createSystem(pdb.topology.to_openmm(), nonbondedMethod=app.CutoffNonPeriodic, nonbondedCutoff=2.0 * unit.nanometers, constraints=app.HBonds) integrator = mm.LangevinIntegrator(temp * unit.kelvin, fric / unit.picoseconds, 2.0 * unit.femtoseconds) integrator.setConstraintTolerance(0.00001) platform = mm.Platform.getPlatformByName('CUDA') properties = {'CudaPrecision': 'mixed', 'CudaDeviceIndex': sys.argv[1]} simulation = app.Simulation(pdb.topology.to_openmm(), system, integrator, platform, properties) simulation.context.setPositions(pdb.xyz[pdb_frame])