def main(): parser = argparse.ArgumentParser(description='fitting trajectory.') parser.add_argument('-t', '--trr', default=MAINCHAIN_TRR, help='trajectory file (.trr)') parser.add_argument('--trj', required=True, help='.npy') parser.add_argument('-p', '--topology', required=True, help='topology file (.gro, .pdb)') parser.add_argument('-r', '--recursive', action='store_true', default=False, help='do fitting 2 times') parser.add_argument('-o', '--out', required=True, help='output file path (filename.trr or filename.npy or filename)') parser.add_argument('-w', '--max_wokers', default=2, type=int, help='max_wokers of multi-process') args = parser.parse_args() ### read file ### trj_mdtraj = md.load_trr(args.trr, top=args.topology) if args.trj: trj_mdtraj.xyz = load_trj(args.trj) n_frames = trj_mdtraj.n_frames print(f'Trajectory Info ({n_frames} frames, {trj_mdtraj.n_atoms} atoms)') ### preprocess ### trj_mdtraj, atomlist, wlist = preprocess(trj_mdtraj) ### fitting ### trj_array = recursive_fitting(trj_mdtraj.xyz, wlist, args.max_wokers, args.recursive) ### ndarray to trr ### topology = trj_mdtraj.topology trj_mdtraj = md.Trajectory(trj_array, topology) ### save ### ext = os.path.splitext(args.out)[1] if ext == ".trr": trj_mdtraj.save_trr(args.out) elif ext == ".npy": np.save(args.out, trj_mdtraj.xyz) else: trj_mdtraj.save_trr(args.out + '.trr') np.save(args.out + '.npy', trj_mdtraj.xyz)
def main(argv): # default values for options run_name = None outputfile = None number_qs = 10 frame_start = 1 frame_end = None # q_inverse = 0.3 q_range = [0.3] phi_range = [0, 1.0] try: opts, args = getopt.getopt(argv, "hi:o:q:r:p:s:e:", [ "ifile=", "ofile=", "q_inv=", "n_phi=", "phi_range=", "fstart=", "fend=" ]) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt == '-h': usage() sys.exit() elif opt in ("-i", "--ifile"): run_name = arg elif opt in ("-o", "--ofile"): outputfile = arg elif opt in ("-q", "--q_inv"): qs = arg.split('/') print qs if len(qs) > 1: try: q_range = [float(this_q) for this_q in qs] except ValueError: print 'Enter a single value of q of a range of values separated by /' sys.exit(2) else: try: q_range = [float(arg)] except ValueError: print 'Enter a single value of q of a range of values separated by space' sys.exit(2) elif opt in ("-p", "--n_phi"): number_qs = int(arg) elif opt in ("-r", "--phi_range"): phis = arg.split('/') if len(phis) == 2: try: phi_range = [float(this_phi) for this_phi in phis] except ValueError: print 'Enter two values (in units of pi) for starting and ending phi separated by /. ' sys.exit(2) else: print 'Enter two values (in units of pi) for starting and ending phi separated by /. ' sys.exit(2) elif opt in ("-s", "--fstart"): frame_start = int(arg) elif opt in ("-e", "--fend"): frame_end = int(arg) print 'Input run is %s' % run_name print 'Output file is %s' % outputfile print 'Number of phi used is %d from phi = %.3g pi to phi = %.3g pi' % ( number_qs, phi_range[0], phi_range[1]) # print 'Inverse of q is %.2f nm'%q_inverse print 'Computing correlators for the following q_inverse values in nm:' print q_range if run_name == None: print '<runname> must be provided.' usage() sys.exit(2) # data_path = os.getcwd()+'/data' data_path = '/home/shenglan/MD_simulations/water_box/cubic_2nm_' + run_name traj = md.load_trr(data_path + '/nvt-pr_' + run_name + '.trr', top=data_path + '/water-sol_' + run_name + '.gro') print('here is some info about the trajectory we are looking at:') print traj run = WaterStats(traj, run_name, read_mod='r') if frame_start >= run.n_frames: print 'Starting frame cannot be greater than the number of frames in simulation.' usage() sys.exit(2) elif frame_end == None: frames = np.arange(run.n_frames)[frame_start:] else: frames = np.arange(run.n_frames)[frame_start:frame_end] print("frames %d to %d are used for averaging." % (frames[0], frames[-1])) # wavelength of laser wavelength = 0.1 phi = np.linspace(phi_range[0] * np.pi, phi_range[1] * np.pi, number_qs) dt = 1.0 # ps for q_inverse in q_range: print('computing for q_invers = %.3g nm' % q_inverse) q = 1 / q_inverse * np.pi * 2.0 if outputfile == None: outputfile = 'corr_'+run_name+\ '_'+str(q_inverse)+'q_'+str(number_qs)+'p_'+\ str(frames[0])+\ '.csv' tic = time.clock() run.correlator(q, wavelength, frames, phi, cut_off=0.5, output=outputfile) toc = time.clock() print("Correlator process time for %.3g nm: %.2f" % (q_inverse, (toc - tic))) outputfile = None run.all_tthds.close() run.nearest_tthds.close()
from water_stats import WaterStats import mdtraj as md import numpy as np import matplotlib.pyplot as plt import os import time ############################################################################## # Code ############################################################################## # data_path='/Users/shenglanqiao/Documents/GitHub/waterMD/data' data_path = os.getcwd()+'/data' traj = md.load_trr(data_path+'/nvt-pr_run1.trr', top = data_path+'/water-sol_run1.gro') print ('here is some info about the trajectory we are looking at:') print traj test = WaterStats(traj,'run1',read_mod = 'r') R_water = 0.3 # output_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/output' output_path = '/home/shenglan/GitHub/waterMD/output' def test_rdf(r_range): test.radial_dist(r_range) rs, g_R, g_err = test.rdf[0],test.rdf[1],test.rdf[2] fig = plt.figure() plt.errorbar(rs,g_R, yerr=g_err) plt.title('gn(r)')
def main(argv): # default values for options run_name = None outputfile = None number_qs = 10 frame_start = 1 frame_end = None # q_inverse = 0.3 q_range = [0.3] phi_range = [0, 1.0] try: opts, args = getopt.getopt( argv, "hi:o:q:r:p:s:e:", ["ifile=", "ofile=", "q_inv=", "n_phi=", "phi_range=", "fstart=", "fend="] ) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt == "-h": usage() sys.exit() elif opt in ("-i", "--ifile"): run_name = arg elif opt in ("-o", "--ofile"): outputfile = arg elif opt in ("-q", "--q_inv"): qs = arg.split("/") print qs if len(qs) > 1: try: q_range = [float(this_q) for this_q in qs] except ValueError: print "Enter a single value of q of a range of values separated by /" sys.exit(2) else: try: q_range = [float(arg)] except ValueError: print "Enter a single value of q of a range of values separated by space" sys.exit(2) elif opt in ("-p", "--n_phi"): number_qs = int(arg) elif opt in ("-r", "--phi_range"): phis = arg.split("/") if len(phis) == 2: try: phi_range = [float(this_phi) for this_phi in phis] except ValueError: print "Enter two values (in units of pi) for starting and ending phi separated by /. " sys.exit(2) else: print "Enter two values (in units of pi) for starting and ending phi separated by /. " sys.exit(2) elif opt in ("-s", "--fstart"): frame_start = int(arg) elif opt in ("-e", "--fend"): frame_end = int(arg) print "Input run is %s" % run_name print "Output file is %s" % outputfile print "Number of phi used is %d from phi = %.3g pi to phi = %.3g pi" % (number_qs, phi_range[0], phi_range[1]) # print 'Inverse of q is %.2f nm'%q_inverse print "Computing correlators for the following q_inverse values in nm:" print q_range if run_name == None: print "<runname> must be provided." usage() sys.exit(2) # data_path = os.getcwd()+'/data' data_path = "/home/shenglan/MD_simulations/water_box/cubic_2nm_" + run_name traj = md.load_trr(data_path + "/nvt-pr_" + run_name + ".trr", top=data_path + "/water-sol_" + run_name + ".gro") print ("here is some info about the trajectory we are looking at:") print traj run = WaterStats(traj, run_name, read_mod="r") if frame_start >= run.n_frames: print "Starting frame cannot be greater than the number of frames in simulation." usage() sys.exit(2) elif frame_end == None: frames = np.arange(run.n_frames)[frame_start:] else: frames = np.arange(run.n_frames)[frame_start:frame_end] print ("frames %d to %d are used for averaging." % (frames[0], frames[-1])) # wavelength of laser wavelength = 0.1 phi = np.linspace(phi_range[0] * np.pi, phi_range[1] * np.pi, number_qs) dt = 1.0 # ps for q_inverse in q_range: print ("computing for q_invers = %.3g nm" % q_inverse) q = 1 / q_inverse * np.pi * 2.0 if outputfile == None: outputfile = ( "corr_" + run_name + "_" + str(q_inverse) + "q_" + str(number_qs) + "p_" + str(frames[0]) + ".csv" ) tic = time.clock() run.correlator(q, wavelength, frames, phi, cut_off=0.5, output=outputfile) toc = time.clock() print ("Correlator process time for %.3g nm: %.2f" % (q_inverse, (toc - tic))) outputfile = None run.all_tthds.close() run.nearest_tthds.close()
import mdtraj as md import h5py from water_stats import WaterStats import numpy as np import os import time ############################################################################## # Code ############################################################################## run_name = 'run9' data_path = '/home/shenglan/MD_simulations/water_box/cubic_2nm_run9' traj = md.load_trr(data_path+'/nvt-pr.trr', top = data_path+'/water-sol.gro') print ('here is some info about the trajectory we are looking at:') print traj ws = WaterStats(traj,run_name) cut_off = 0.5 tic = time.clock() for this_frame in range(ws.n_frames): tic_loop = time.clock() print 'finding tthds for frame %d' % this_frame if str(this_frame) in ws.nearest_tthds: pass else: tthds = ws.make_nearest_nb_tthds(cut_off,this_frame) ws.nearest_tthds.create_dataset(str(this_frame),data = tthds) toc_loop = time.clock()
print(__name__) if __name__ == "__main__": # cutoff parameters and stuff, can change water_contact_cutoff = 0.3 n_res_per_chain = 44 n_chains = 11 z_slab_updown = 0.5 # water loading and z selection watergro = 'water_heavy.gro' watertrr = 'water_heavy.trr' watertraj = md.load_trr(watertrr, top=watergro) water_slab_bool = select_z_slab(watertraj.xyz, z_slab_updown) water_slab_list = z_bool_to_indices(water_slab_bool) # protein loading and z selection - on a residue COM basis, not atomic protgro = 'prot_heavy.gro' prottrr = 'prot_heavy.trr' prottraj = md.load_trr(prottrr, top=protgro) prot_rescoms = calc_res_coms(prottraj) prot_slab_bool = select_z_slab(prot_rescoms, z_slab_updown) prot_slab_list = z_bool_to_indices(prot_slab_bool) # protein filtering based on water contacts prot_water_mindist = load_respertime('mindist_by_res.xvg') prot_water_contact_bool = prot_water_mindist < water_contact_cutoff prot_chain_indices = assign_prot_chains(n_res_per_chain, n_chains)
############################################################################## import mdtraj as md import numpy as np from scipy.fftpack import fft from itertools import combinations import matplotlib.pyplot as plt ############################################################################## # Code ############################################################################## data_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/data' # data_path='/home/shenglan/MD_simulations/water_box/cubic_2nm' traj = md.load_trr(data_path+'/nvt-pr.trr', top = data_path+'/water-sol.gro') print ('here is some info about the trajectory we are looking at:') print traj # time_step = 1 # in ps time_step=traj.timestep # in ps #atom.index for all Oxygen of the water molecules, get pariwise distances water_inds = traj.topology.select_atom_indices(selection='water') water_pairs = np.array(list(combinations(sorted(water_inds),2))) water_dist = md.compute_distances(traj,water_pairs) # unit in nm #examine statisitics for every frame mean_dist = np.mean(water_dist,axis=1) #sd_dist = np.std(water_dist,axis=1) traj_time = np.array(range(len(mean_dist)))*time_step
def load_trr(self): return md.load_trr(self.trr_file, self.top_file)
def parseGromacsModes(run_path, title="", model='nma', **kwargs): """Returns :class:`.NMA` containing eigenvectors and eigenvalues parsed from a run directory containing results from gmx covar or gmx nmeig followed by gmx anaeig including eigenvalues in an xvg file and eigenvectors in pdb files (see http://www.strodel.info/index_files/lecture/html/analysis-9.html). :arg run_path: path to the run directory :type run_path: str :arg title: title for resulting object Default is ``""`` :type title: str :arg model: type of calculated that was performed. It can be either ``"nma"`` or ``"pca"``. If it is not changed to ``"pca"`` then ``"nma"`` will be assumed. :type model: str :arg eigval_fname: filename or path for xvg file containing eigenvalues Default is ``"eigenval.xvg"`` as this is the default from Gromacs :type eigval_fname: str :arg eigvec_fname: filename or path for trr file containing eigenvectors Default is ``"eigenvec.trr"`` as this is the default from Gromacs :type eigvec_fname: str :arg pdb_fname: filename or path for pdb file containing the reference structure Default is ``"average.pdb"`` although this is probably suboptimal :type pdb_fname: str """ try: from mdtraj import load_trr except ImportError: raise ImportError( 'Please install mdtraj in order to use parseGromacsModes.') if not isinstance(run_path, str): raise TypeError('run_path should be a string') if not run_path.endswith('/'): run_path += '/' if not isinstance(title, str): raise TypeError('title should be a string') if model == 'pca': result = PCA(title) else: if model != 'nma': LOGGER.warn('model not recognised so using NMA') result = NMA(title) eigval_fname = kwargs.get('eigval_fname', 'eigenval.xvg') if not isinstance(eigval_fname, str): raise TypeError('eigval_fname should be a string') if isfile(eigval_fname): vals_fname = eigval_fname elif isfile(run_path + eigval_fname): vals_fname = run_path + eigval_fname else: raise ValueError('eigval_fname should point be a path to a file ' 'either relative to run_path or an absolute one') eigvec_fname = kwargs.get('eigvec_fname', 'eigenvec.trr') if not isinstance(eigvec_fname, str): raise TypeError('eigvec_fname should be a string') if isfile(eigvec_fname): vecs_fname = eigval_fname elif isfile(run_path + eigvec_fname): vecs_fname = run_path + eigvec_fname else: raise ValueError('eigvec_fname should point be a path to a file ' 'either relative to run_path or an absolute one') pdb_fname = kwargs.get('pdb_fname', 'average.pdb') if not isinstance(pdb_fname, str): raise TypeError('pdb_fname should be a string') if isfile(pdb_fname): pdb = eigval_fname elif isfile(run_path + pdb_fname): pdb = run_path + pdb_fname else: raise ValueError('pdb_fname should point be a path to a file ' 'either relative to run_path or an absolute one') fi = open(vals_fname, 'r') lines = fi.readlines() fi.close() eigvals = [] for line in lines: if not (line.startswith('@') or line.startswith('#')): eigvals.append(float(line.strip().split()[-1]) * 100) # convert to A**2 from nm**2 eigvals = np.array(eigvals) # Parse eigenvectors trr with mdtraj, which uses nm so doesn't rescale vecs_traj = load_trr(vecs_fname, top=pdb) # format vectors appropriately, skipping initial and average structures vectors = np.array([frame.xyz.flatten() for frame in vecs_traj[2:]]).T result.setEigens(vectors, eigvals) return result
from water_stats import WaterStats import mdtraj as md import numpy as np import matplotlib.pyplot as plt import os import time ############################################################################## # Code ############################################################################## # data_path='/Users/shenglanqiao/Documents/GitHub/waterMD/data' data_path = os.getcwd() + '/data' traj = md.load_trr(data_path + '/nvt-pr_run1.trr', top=data_path + '/water-sol_run1.gro') print('here is some info about the trajectory we are looking at:') print traj test = WaterStats(traj, 'run1', read_mod='r') R_water = 0.3 # output_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/output' output_path = '/home/shenglan/GitHub/waterMD/output' def test_rdf(r_range): test.radial_dist(r_range) rs, g_R, g_err = test.rdf[0], test.rdf[1], test.rdf[2] fig = plt.figure() plt.errorbar(rs, g_R, yerr=g_err)
#!/usr/bin/env python # coding: utf-8 import numpy as np import mdtraj as md import os home = os.getcwd() fTyr = md.load_trr("fTyr_md.trr", top="fTyr_md.gro").remove_solvent() # each frame is 100 ps configs = [c for c, t in zip(fTyr, fTyr.time) if t % 1000 == 0] for i, c in enumerate(configs): name = "config_" + str(i) if not os.path.isdir(name): os.mkdir(name) file = name + "/geo.pdb" c.save_pdb(file) os.chdir(home)
############################################################################## import mdtraj as md import h5py from water_stats import WaterStats import numpy as np import os ############################################################################## # Code ############################################################################## run_name = "run4" data_path = os.getcwd() + "/data" traj = md.load_trr(data_path + "/nvt-pr_" + run_name + ".trr", top=data_path + "/water-sol_" + run_name + ".gro") print ("here is some info about the trajectory we are looking at:") print traj ws = WaterStats(traj, run_name) cut_off = 0.5 for this_frame in range(ws.n_frames): if str(this_frame) in ws.all_tthds: pass else: tthds = [] for this_water in ws.water_inds: tthds.extend(ws.make_tthd(this_water, cut_off, this_frame)) ws.all_tthds.create_dataset(str(this_frame), data=tthds) # print len(tthds)
############################################################################## import mdtraj as md import h5py from water_stats import WaterStats import numpy as np import os ############################################################################## # Code ############################################################################## run_name = 'run4' data_path = os.getcwd() + '/data' traj = md.load_trr(data_path + '/nvt-pr_' + run_name + '.trr', top=data_path + '/water-sol_' + run_name + '.gro') print('here is some info about the trajectory we are looking at:') print traj ws = WaterStats(traj, run_name) cut_off = 0.5 for this_frame in range(ws.n_frames): if str(this_frame) in ws.all_tthds: pass else: tthds = [] for this_water in ws.water_inds: tthds.extend(ws.make_tthd(this_water, cut_off, this_frame)) ws.all_tthds.create_dataset(str(this_frame), data=tthds) # print len(tthds)
import mdtraj as md import h5py from water_stats import WaterStats import numpy as np import os import time ############################################################################## # Code ############################################################################## run_name = 'run5' data_path = os.getcwd()+'/data' traj = md.load_trr(data_path+'/nvt-pr_'+run_name+'.trr', top = data_path+'/water-sol_'+run_name+'.gro') print ('here is some info about the trajectory we are looking at:') print traj ws = WaterStats(traj,run_name) cut_off = 0.5 frame_ind = 1 half_box = ws.traj.unitcell_lengths[0][0]/2.*10 inds = range(1001)[101:] count = 33897 for frame_ind in inds: nbs = ws.find_nearest_nbs(cut_off,frame_ind,3) xyz_pos = ws.traj[frame_ind].xyz with open(os.getcwd()+'/output_data/tthd_pdb_1000.pdb','a') as f:
def extract_aligned_prot_lig_wat_traj(md_components, flask, trj_fn, opt, nmax=30, water_cutoff=15.0): """ Extracts the aligned protein trajectory and aligned ligand trajectory and aligned Water trajectory from a MD trajectory of a larger system that includes other components (eg water). The passed in setup mol must have the topology that matches the trajectory, and its xyz coordinates are the reference for the alignment. The alignment is done on the alpha carbons (atom name CA) of the active site residues within cutoff from the ligand. Once the alignment is done, the protein and ligand trajectories are each placed into a separate OEMol, one conformer per trajectory frame. Water trajectory is selecting the nmax waters from the ligand and protein CA within the cutoff distance for each trajectory snapshot Inputs: md_components: MDComponents object The md components carrying the setup starting flask. flask: OEMol The system flask trj_fn: String The filename of the hdf5-format MD trajectory or Gromacs .trr file format water_cutoff: Float The cutoff distance between the PL binding site and the waters in angstroms nmax: Integer max number of waters to select Outputs: multi_conf_protein: A multi conformer OEMol for the protein, one conformer per frame. multi_conf_ligand: A multi conformer OEMol for the ligand, one conformer per frame. multi_conf_water: A multi conformer OEMol for the waters, one conformer per frame. """ # Extract protein, ligand, water and excipients from the flask # protein, ligand, water, excipients = oeommutils.split(flask, ligand_res_name="LIG") set_up_flask, map_dic = md_components.create_flask protein = md_components.get_protein ligand = md_components.get_ligand check_nmax = nmax_waters(protein, ligand, water_cutoff) if check_nmax < nmax: opt['Logger'].warn( "The selected number of max waters cannot fit around the protein binding site: {} vs {}" .format(nmax, check_nmax)) void, traj_ext = os.path.splitext(trj_fn) traj_dir = os.path.dirname(trj_fn) if traj_ext == '.h5': trj = md.load_hdf5(trj_fn) elif traj_ext == '.trr': pdb_fn = glob.glob(os.path.join(traj_dir, '*.pdb'))[0] trj = md.load_trr(trj_fn, top=pdb_fn) trj = trj[1:] else: raise ValueError( "Trajectory file format {} not recognized in the trajectory {}". format(traj_ext, trj_fn)) # System topology top_trj = trj.topology # Ligand indexes # lig_idx = top_trj.select("resname LIG") lig_idx = map_dic['ligand'] # Protein indexes # prot_idx = top_trj.select("protein") # It is safer to use OE toolkits than mdtraj which is missing the protein caps prot_idx = map_dic['protein'] # for at in protein.GetAtoms(): # prot_idx.append(at.GetIdx()) # Water oxygen indexes water_O_idx = top_trj.select("water and element O") # Protein carbon alpha indexes prot_ca_idx = top_trj.select("backbone and element C") # Cutoff for the selection of the binding site atoms in A cutoff_bs = 5.0 # Carbon alpha binding site indexes ca_bs_idx = md.compute_neighbors(trj[0], cutoff_bs / 10.0, lig_idx, haystack_indices=prot_ca_idx, periodic=True)[0] # Carbon alpha binding site and ligand indexes ca_bs_lig_idx = np.concatenate((ca_bs_idx, lig_idx)) # Image the protein-ligand trajectory so the complex does not jump across box boundaries protlig = trj[0].atom_slice(np.concatenate((prot_idx, lig_idx))) protligAtoms = [atom for atom in protlig.topology.atoms] with open(os.devnull, 'w') as devnull: with contextlib.redirect_stderr(devnull): trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True) # trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True) count = 0 water_max_frames = [] # TODO DEBUG # trjImaged = trjImaged[:10] for frame in trjImaged: # print(count, flush=True) # Water oxygen binding site indexes water_O_bs_idx = md.compute_neighbors(frame, water_cutoff / 10.0, ca_bs_lig_idx, haystack_indices=water_O_idx, periodic=True) # Pair combination water indexes times ligand indexes wat_lig_pairs = np.array(np.meshgrid(water_O_bs_idx, lig_idx)).T.reshape(-1, 2) # Distances between the waters and the ligand in nm wat_lig_distances = md.compute_distances(frame, wat_lig_pairs, periodic=True, opt=True) # Reshape the wat_lig_distances ns = np.reshape(wat_lig_distances, (len(water_O_bs_idx[0]), len(lig_idx))) # Min distances in nm between the oxygen waters and the ligand min_wat_O_lig_distances = np.min(ns, axis=1) # Pair combination water indexes times protein binding site carbon alpha indexes wat_ca_bs_pairs = np.array(np.meshgrid(water_O_bs_idx, ca_bs_idx)).T.reshape(-1, 2) # Distances between the waters and the protein binding site carbon alpha in nm wat_ca_bs_distances = md.compute_distances(frame, wat_ca_bs_pairs, periodic=True, opt=True) # Reshape the wat_ca_bs_distances ns = np.reshape(wat_ca_bs_distances, (len(water_O_bs_idx[0]), len(ca_bs_idx))) # Min distances in nm between the oxygen waters and the protein binding site carbon alpha min_wat_O_ca_bs_distances = np.min(ns, axis=1) metrics = min_wat_O_lig_distances + min_wat_O_ca_bs_distances metric_distances = dict() for wat_idx, m in zip(water_O_bs_idx[0], metrics): metric_distances[int(wat_idx)] = m water_list_sorted_max = sorted(metric_distances.items(), key=lambda x: x[1])[:nmax] if len(water_list_sorted_max) != nmax: raise ValueError( "The ordered water list has the wrong size {} vs expected {} for the frame {}" .format(len(water_list_sorted_max), nmax, count)) water_max_frames.append(water_list_sorted_max) # print(min_wat_O_ca_bs_distances) # print(pairs[:len(lig_idx), :]) # for p,d in zip(wat_ca_bs_pairs, wat_ca_bs_distances[0]): # print(p,d) count += 1 # Put the reference mol xyz into the 1-frame topologyTraj to use as a reference in the fit setup_mol_array_coords = oechem.OEDoubleArray(3 * set_up_flask.GetMaxAtomIdx()) set_up_flask.GetCoords(setup_mol_array_coords) setup_mol_xyzArr = np.array(setup_mol_array_coords) setup_mol_xyzArr.shape = (-1, 3) trj_reference = trjImaged[0] # convert from angstroms to nanometers trj_reference.xyz[0] = setup_mol_xyzArr / 10.0 # Fitting trjImaged.superpose(trj_reference, 0, ca_bs_idx) # Delete Original Trajectory to save memory del trj # Molecule copies ligand_reference = oechem.OEMol(ligand) protein_reference = oechem.OEMol(protein) count = 0 # Create the multi conformer protein, ligand and water molecules for frame in trjImaged.xyz: # print("Trj Image loop", count, flush=True) # Extract coordinates in A xyz = frame * 10 # Set flask Coordinates as the current frame for the water extraction flask.SetCoords(xyz.flatten()) water_list_sorted_max = water_max_frames[count] # print(water_list_sorted_max) # TODO The following solution to extract the waters do not # keep the water order # Mark the close water atoms and extract them bv = oechem.OEBitVector(nmax * 3) water_idx = [] for pair in water_list_sorted_max: ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0])) # Select the whole water molecule for atw in oechem.OEGetResidueAtoms(ow): bv.SetBitOn(atw.GetIdx()) water_idx.append(atw.GetIdx()) pred_vec = oechem.OEAtomIdxSelected(bv) water_nmax_reference = oechem.OEMol() oechem.OESubsetMol(water_nmax_reference, flask, pred_vec) # TODO The following solution to extract the waters # keep the water order but is it seems extremely inefficient # water_list = [] # for pair in water_list_sorted_max: # bv = oechem.OEBitVector(3) # water_idx = [] # ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0])) # # # Select the whole water molecule # for atw in oechem.OEGetResidueAtoms(ow): # bv.SetBitOn(atw.GetIdx()) # water_idx.append(atw.GetIdx()) # # pred_vec = oechem.OEAtomIdxSelected(bv) # water = oechem.OEMol() # oechem.OESubsetMol(water, flask, pred_vec) # # water_list.append(water) # # # # print(len(water_list)) # # water_nmax_reference = oechem.OEMol() # for w in water_list: # oechem.OEAddMols(water_nmax_reference, w) # ligand and protein conf coordinates lig_xyz_list = [10 * frame[idx] for idx in lig_idx] lig_confxyz = oechem.OEFloatArray(np.array(lig_xyz_list).ravel()) prot_xyz_list = [10 * frame[idx] for idx in prot_idx] prot_confxyz = oechem.OEFloatArray(np.array(prot_xyz_list).ravel()) # Initialize the protein, ligand and water molecule topologies if count == 0: multi_conf_water = oechem.OEMol(water_nmax_reference) if multi_conf_water.NumAtoms() % 3 != 0: raise ValueError("Number of Water atoms is not multiple of 3") # Clean ResNumber and Chain on the multi conf water molecule # oechem.OEPerceiveResidues(multi_conf_water, oechem.OEPreserveResInfo_All) multi_conf_water.SetTitle("Water_" + str(nmax)) res_num = 0 i = 0 for at in multi_conf_water.GetAtoms(): res = oechem.OEAtomGetResidue(at) res.SetSerialNumber(i) res.SetName("HOH") res.SetChainID("Z") if i % 3 == 0: res_num += 1 res.SetResidueNumber(res_num) i += 1 ligand_reference.SetCoords(lig_confxyz) protein_reference.SetCoords(prot_confxyz) multi_conf_ligand = oechem.OEMol(ligand_reference) multi_conf_protein = oechem.OEMol(protein_reference) # Attach the conformers on the multi conformer protein, ligand and water molecules else: water_confxyz = oechem.OEFloatArray( water_nmax_reference.NumAtoms() * 3) water_nmax_reference.GetCoords(water_confxyz) multi_conf_water.NewConf(water_confxyz) multi_conf_ligand.NewConf(lig_confxyz) multi_conf_protein.NewConf(prot_confxyz) count += 1 return multi_conf_protein, multi_conf_ligand, multi_conf_water