Пример #1
0
def main():
    parser = argparse.ArgumentParser(description='fitting trajectory.')
    parser.add_argument('-t', '--trr', default=MAINCHAIN_TRR, help='trajectory file (.trr)')
    parser.add_argument('--trj', required=True, help='.npy')
    parser.add_argument('-p', '--topology', required=True, help='topology file (.gro, .pdb)')
    parser.add_argument('-r', '--recursive', action='store_true', default=False, help='do fitting 2 times')
    parser.add_argument('-o', '--out', required=True, help='output file path (filename.trr or filename.npy or filename)')
    parser.add_argument('-w', '--max_wokers', default=2, type=int, help='max_wokers of multi-process')
    args = parser.parse_args()


    ### read file ###
    trj_mdtraj = md.load_trr(args.trr, top=args.topology)

    if args.trj:
        trj_mdtraj.xyz = load_trj(args.trj)

    n_frames = trj_mdtraj.n_frames
    print(f'Trajectory Info ({n_frames} frames, {trj_mdtraj.n_atoms} atoms)')


    ### preprocess ###
    trj_mdtraj, atomlist, wlist = preprocess(trj_mdtraj)


    ### fitting ###
    trj_array = recursive_fitting(trj_mdtraj.xyz, wlist, args.max_wokers, args.recursive)


    ### ndarray to trr ###
    topology = trj_mdtraj.topology
    trj_mdtraj = md.Trajectory(trj_array, topology)


    ### save ###
    ext = os.path.splitext(args.out)[1]
    if ext == ".trr":
        trj_mdtraj.save_trr(args.out)
    elif ext == ".npy":
        np.save(args.out, trj_mdtraj.xyz)
    else:
        trj_mdtraj.save_trr(args.out + '.trr')
        np.save(args.out + '.npy', trj_mdtraj.xyz)
Пример #2
0
def main(argv):
    # default values for options
    run_name = None
    outputfile = None
    number_qs = 10
    frame_start = 1
    frame_end = None
    #     q_inverse = 0.3
    q_range = [0.3]
    phi_range = [0, 1.0]

    try:
        opts, args = getopt.getopt(argv, "hi:o:q:r:p:s:e:", [
            "ifile=", "ofile=", "q_inv=", "n_phi=", "phi_range=", "fstart=",
            "fend="
        ])
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit()
        elif opt in ("-i", "--ifile"):
            run_name = arg
        elif opt in ("-o", "--ofile"):
            outputfile = arg
        elif opt in ("-q", "--q_inv"):
            qs = arg.split('/')
            print qs
            if len(qs) > 1:
                try:
                    q_range = [float(this_q) for this_q in qs]
                except ValueError:
                    print 'Enter a single value of q of a range of values separated by /'
                    sys.exit(2)
            else:
                try:
                    q_range = [float(arg)]
                except ValueError:
                    print 'Enter a single value of q of a range of values separated by space'
                    sys.exit(2)

        elif opt in ("-p", "--n_phi"):
            number_qs = int(arg)
        elif opt in ("-r", "--phi_range"):
            phis = arg.split('/')
            if len(phis) == 2:
                try:
                    phi_range = [float(this_phi) for this_phi in phis]
                except ValueError:
                    print 'Enter two values (in units of pi) for starting and ending phi separated by /. '
                    sys.exit(2)
            else:
                print 'Enter two values (in units of pi) for starting and ending phi separated by /. '
                sys.exit(2)
        elif opt in ("-s", "--fstart"):
            frame_start = int(arg)
        elif opt in ("-e", "--fend"):
            frame_end = int(arg)
    print 'Input run is %s' % run_name
    print 'Output file is %s' % outputfile
    print 'Number of phi used is %d from phi = %.3g pi to phi = %.3g pi' % (
        number_qs, phi_range[0], phi_range[1])
    #     print 'Inverse of q is %.2f nm'%q_inverse
    print 'Computing correlators for the following q_inverse values in nm:'
    print q_range

    if run_name == None:
        print '<runname> must be provided.'
        usage()
        sys.exit(2)


#     data_path = os.getcwd()+'/data'
    data_path = '/home/shenglan/MD_simulations/water_box/cubic_2nm_' + run_name
    traj = md.load_trr(data_path + '/nvt-pr_' + run_name + '.trr',
                       top=data_path + '/water-sol_' + run_name + '.gro')
    print('here is some info about the trajectory we are looking at:')
    print traj
    run = WaterStats(traj, run_name, read_mod='r')
    if frame_start >= run.n_frames:
        print 'Starting frame cannot be greater than the number of frames in simulation.'
        usage()
        sys.exit(2)
    elif frame_end == None:
        frames = np.arange(run.n_frames)[frame_start:]
    else:
        frames = np.arange(run.n_frames)[frame_start:frame_end]

    print("frames %d to %d are used for averaging." % (frames[0], frames[-1]))

    # wavelength of laser
    wavelength = 0.1
    phi = np.linspace(phi_range[0] * np.pi, phi_range[1] * np.pi, number_qs)
    dt = 1.0  # ps
    for q_inverse in q_range:
        print('computing for q_invers = %.3g nm' % q_inverse)
        q = 1 / q_inverse * np.pi * 2.0

        if outputfile == None:
            outputfile = 'corr_'+run_name+\
            '_'+str(q_inverse)+'q_'+str(number_qs)+'p_'+\
            str(frames[0])+\
            '.csv'

        tic = time.clock()
        run.correlator(q,
                       wavelength,
                       frames,
                       phi,
                       cut_off=0.5,
                       output=outputfile)
        toc = time.clock()

        print("Correlator process time for %.3g nm: %.2f" % (q_inverse,
                                                             (toc - tic)))
        outputfile = None

    run.all_tthds.close()
    run.nearest_tthds.close()
Пример #3
0
from water_stats import WaterStats

import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt

import os
import time

##############################################################################
# Code
##############################################################################

# data_path='/Users/shenglanqiao/Documents/GitHub/waterMD/data'
data_path = os.getcwd()+'/data'
traj = md.load_trr(data_path+'/nvt-pr_run1.trr', top = data_path+'/water-sol_run1.gro')
print ('here is some info about the trajectory we are looking at:')
print traj
test = WaterStats(traj,'run1',read_mod = 'r')

R_water = 0.3

# output_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/output'
output_path = '/home/shenglan/GitHub/waterMD/output'

def test_rdf(r_range):
    test.radial_dist(r_range)
    rs, g_R, g_err = test.rdf[0],test.rdf[1],test.rdf[2]
    fig = plt.figure()
    plt.errorbar(rs,g_R, yerr=g_err)
    plt.title('gn(r)')
Пример #4
0
def main(argv):
    # default values for options
    run_name = None
    outputfile = None
    number_qs = 10
    frame_start = 1
    frame_end = None
    #     q_inverse = 0.3
    q_range = [0.3]
    phi_range = [0, 1.0]

    try:
        opts, args = getopt.getopt(
            argv, "hi:o:q:r:p:s:e:", ["ifile=", "ofile=", "q_inv=", "n_phi=", "phi_range=", "fstart=", "fend="]
        )
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    for opt, arg in opts:
        if opt == "-h":
            usage()
            sys.exit()
        elif opt in ("-i", "--ifile"):
            run_name = arg
        elif opt in ("-o", "--ofile"):
            outputfile = arg
        elif opt in ("-q", "--q_inv"):
            qs = arg.split("/")
            print qs
            if len(qs) > 1:
                try:
                    q_range = [float(this_q) for this_q in qs]
                except ValueError:
                    print "Enter a single value of q of a range of values separated by /"
                    sys.exit(2)
            else:
                try:
                    q_range = [float(arg)]
                except ValueError:
                    print "Enter a single value of q of a range of values separated by space"
                    sys.exit(2)

        elif opt in ("-p", "--n_phi"):
            number_qs = int(arg)
        elif opt in ("-r", "--phi_range"):
            phis = arg.split("/")
            if len(phis) == 2:
                try:
                    phi_range = [float(this_phi) for this_phi in phis]
                except ValueError:
                    print "Enter two values (in units of pi) for starting and ending phi separated by /. "
                    sys.exit(2)
            else:
                print "Enter two values (in units of pi) for starting and ending phi separated by /. "
                sys.exit(2)
        elif opt in ("-s", "--fstart"):
            frame_start = int(arg)
        elif opt in ("-e", "--fend"):
            frame_end = int(arg)
    print "Input run is %s" % run_name
    print "Output file is %s" % outputfile
    print "Number of phi used is %d from phi = %.3g pi to phi = %.3g pi" % (number_qs, phi_range[0], phi_range[1])
    #     print 'Inverse of q is %.2f nm'%q_inverse
    print "Computing correlators for the following q_inverse values in nm:"
    print q_range

    if run_name == None:
        print "<runname> must be provided."
        usage()
        sys.exit(2)

    #     data_path = os.getcwd()+'/data'
    data_path = "/home/shenglan/MD_simulations/water_box/cubic_2nm_" + run_name
    traj = md.load_trr(data_path + "/nvt-pr_" + run_name + ".trr", top=data_path + "/water-sol_" + run_name + ".gro")
    print ("here is some info about the trajectory we are looking at:")
    print traj
    run = WaterStats(traj, run_name, read_mod="r")
    if frame_start >= run.n_frames:
        print "Starting frame cannot be greater than the number of frames in simulation."
        usage()
        sys.exit(2)
    elif frame_end == None:
        frames = np.arange(run.n_frames)[frame_start:]
    else:
        frames = np.arange(run.n_frames)[frame_start:frame_end]

    print ("frames %d to %d are used for averaging." % (frames[0], frames[-1]))

    # wavelength of laser
    wavelength = 0.1
    phi = np.linspace(phi_range[0] * np.pi, phi_range[1] * np.pi, number_qs)
    dt = 1.0  # ps
    for q_inverse in q_range:
        print ("computing for q_invers = %.3g nm" % q_inverse)
        q = 1 / q_inverse * np.pi * 2.0

        if outputfile == None:
            outputfile = (
                "corr_" + run_name + "_" + str(q_inverse) + "q_" + str(number_qs) + "p_" + str(frames[0]) + ".csv"
            )

        tic = time.clock()
        run.correlator(q, wavelength, frames, phi, cut_off=0.5, output=outputfile)
        toc = time.clock()

        print ("Correlator process time for %.3g nm: %.2f" % (q_inverse, (toc - tic)))
        outputfile = None

    run.all_tthds.close()
    run.nearest_tthds.close()
Пример #5
0
import mdtraj as md

import h5py
from water_stats import WaterStats
import numpy as np
import os

import time

##############################################################################
# Code
##############################################################################

run_name = 'run9'
data_path = '/home/shenglan/MD_simulations/water_box/cubic_2nm_run9'
traj = md.load_trr(data_path+'/nvt-pr.trr', top = data_path+'/water-sol.gro')
print ('here is some info about the trajectory we are looking at:')
print traj
ws = WaterStats(traj,run_name)
cut_off = 0.5

tic = time.clock()
for this_frame in range(ws.n_frames):
    tic_loop = time.clock()
    print 'finding tthds for frame %d' % this_frame
    if str(this_frame) in ws.nearest_tthds:
        pass
    else:
        tthds = ws.make_nearest_nb_tthds(cut_off,this_frame)
        ws.nearest_tthds.create_dataset(str(this_frame),data = tthds)
    toc_loop = time.clock()
Пример #6
0

print(__name__)

if __name__ == "__main__":

    # cutoff parameters and stuff, can change
    water_contact_cutoff = 0.3
    n_res_per_chain = 44
    n_chains = 11
    z_slab_updown = 0.5

    # water loading and z selection
    watergro = 'water_heavy.gro'
    watertrr = 'water_heavy.trr'
    watertraj = md.load_trr(watertrr, top=watergro)
    water_slab_bool = select_z_slab(watertraj.xyz, z_slab_updown)
    water_slab_list = z_bool_to_indices(water_slab_bool)

    # protein loading and z selection - on a residue COM basis, not atomic
    protgro = 'prot_heavy.gro'
    prottrr = 'prot_heavy.trr'
    prottraj = md.load_trr(prottrr, top=protgro)
    prot_rescoms = calc_res_coms(prottraj)
    prot_slab_bool = select_z_slab(prot_rescoms, z_slab_updown)
    prot_slab_list = z_bool_to_indices(prot_slab_bool)

    # protein filtering based on water contacts
    prot_water_mindist = load_respertime('mindist_by_res.xvg')
    prot_water_contact_bool = prot_water_mindist < water_contact_cutoff
    prot_chain_indices = assign_prot_chains(n_res_per_chain, n_chains)
Пример #7
0
##############################################################################

import mdtraj as md

import numpy as np
from scipy.fftpack import fft
from itertools import combinations
import matplotlib.pyplot as plt

##############################################################################
# Code
##############################################################################

data_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/data'
# data_path='/home/shenglan/MD_simulations/water_box/cubic_2nm'
traj = md.load_trr(data_path+'/nvt-pr.trr', top = data_path+'/water-sol.gro')
print ('here is some info about the trajectory we are looking at:')
print traj

# time_step = 1 # in ps
time_step=traj.timestep # in ps

#atom.index for all Oxygen of the water molecules, get pariwise distances
water_inds = traj.topology.select_atom_indices(selection='water')
water_pairs = np.array(list(combinations(sorted(water_inds),2)))
water_dist = md.compute_distances(traj,water_pairs) # unit in nm

#examine statisitics for every frame
mean_dist = np.mean(water_dist,axis=1)
#sd_dist = np.std(water_dist,axis=1)
traj_time = np.array(range(len(mean_dist)))*time_step
    def load_trr(self):

        return md.load_trr(self.trr_file, self.top_file)
Пример #9
0
def parseGromacsModes(run_path, title="", model='nma', **kwargs):
    """Returns :class:`.NMA` containing eigenvectors and eigenvalues parsed from a run directory 
    containing results from gmx covar or gmx nmeig followed by gmx anaeig 
    including eigenvalues in an xvg file and eigenvectors in pdb files
    (see http://www.strodel.info/index_files/lecture/html/analysis-9.html).

    :arg run_path: path to the run directory
    :type run_path: str
    
    :arg title: title for resulting object
        Default is ``""``
    :type title: str

    :arg model: type of calculated that was performed. It can be either ``"nma"`` 
        or ``"pca"``. If it is not changed to ``"pca"`` then ``"nma"`` will be assumed.
    :type model: str

    :arg eigval_fname: filename or path for xvg file containing eigenvalues
        Default is ``"eigenval.xvg"`` as this is the default from Gromacs
    :type eigval_fname: str

    :arg eigvec_fname: filename or path for trr file containing eigenvectors
        Default is ``"eigenvec.trr"`` as this is the default from Gromacs
    :type eigvec_fname: str

    :arg pdb_fname: filename or path for pdb file containing the reference structure
        Default is ``"average.pdb"`` although this is probably suboptimal
    :type pdb_fname: str
    """
    try:
        from mdtraj import load_trr
    except ImportError:
        raise ImportError(
            'Please install mdtraj in order to use parseGromacsModes.')

    if not isinstance(run_path, str):
        raise TypeError('run_path should be a string')

    if not run_path.endswith('/'):
        run_path += '/'

    if not isinstance(title, str):
        raise TypeError('title should be a string')

    if model == 'pca':
        result = PCA(title)
    else:
        if model != 'nma':
            LOGGER.warn('model not recognised so using NMA')
        result = NMA(title)

    eigval_fname = kwargs.get('eigval_fname', 'eigenval.xvg')
    if not isinstance(eigval_fname, str):
        raise TypeError('eigval_fname should be a string')

    if isfile(eigval_fname):
        vals_fname = eigval_fname
    elif isfile(run_path + eigval_fname):
        vals_fname = run_path + eigval_fname
    else:
        raise ValueError('eigval_fname should point be a path to a file '
                         'either relative to run_path or an absolute one')

    eigvec_fname = kwargs.get('eigvec_fname', 'eigenvec.trr')
    if not isinstance(eigvec_fname, str):
        raise TypeError('eigvec_fname should be a string')

    if isfile(eigvec_fname):
        vecs_fname = eigval_fname
    elif isfile(run_path + eigvec_fname):
        vecs_fname = run_path + eigvec_fname
    else:
        raise ValueError('eigvec_fname should point be a path to a file '
                         'either relative to run_path or an absolute one')

    pdb_fname = kwargs.get('pdb_fname', 'average.pdb')
    if not isinstance(pdb_fname, str):
        raise TypeError('pdb_fname should be a string')

    if isfile(pdb_fname):
        pdb = eigval_fname
    elif isfile(run_path + pdb_fname):
        pdb = run_path + pdb_fname
    else:
        raise ValueError('pdb_fname should point be a path to a file '
                         'either relative to run_path or an absolute one')

    fi = open(vals_fname, 'r')
    lines = fi.readlines()
    fi.close()

    eigvals = []
    for line in lines:
        if not (line.startswith('@') or line.startswith('#')):
            eigvals.append(float(line.strip().split()[-1]) *
                           100)  # convert to A**2 from nm**2

    eigvals = np.array(eigvals)

    # Parse eigenvectors trr with mdtraj, which uses nm so doesn't rescale
    vecs_traj = load_trr(vecs_fname, top=pdb)

    # format vectors appropriately, skipping initial and average structures
    vectors = np.array([frame.xyz.flatten() for frame in vecs_traj[2:]]).T

    result.setEigens(vectors, eigvals)
    return result
Пример #10
0
from water_stats import WaterStats

import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt

import os
import time

##############################################################################
# Code
##############################################################################

# data_path='/Users/shenglanqiao/Documents/GitHub/waterMD/data'
data_path = os.getcwd() + '/data'
traj = md.load_trr(data_path + '/nvt-pr_run1.trr',
                   top=data_path + '/water-sol_run1.gro')
print('here is some info about the trajectory we are looking at:')
print traj
test = WaterStats(traj, 'run1', read_mod='r')

R_water = 0.3

# output_path = '/Users/shenglanqiao/Documents/GitHub/waterMD/output'
output_path = '/home/shenglan/GitHub/waterMD/output'


def test_rdf(r_range):
    test.radial_dist(r_range)
    rs, g_R, g_err = test.rdf[0], test.rdf[1], test.rdf[2]
    fig = plt.figure()
    plt.errorbar(rs, g_R, yerr=g_err)
Пример #11
0
#!/usr/bin/env python
# coding: utf-8

import numpy as np
import mdtraj as md
import os

home = os.getcwd()

fTyr = md.load_trr("fTyr_md.trr", top="fTyr_md.gro").remove_solvent()

# each frame is 100 ps
configs = [c for c, t in zip(fTyr, fTyr.time) if t % 1000 == 0]

for i, c in enumerate(configs):
    name = "config_" + str(i)
    if not os.path.isdir(name):
        os.mkdir(name)
    file = name + "/geo.pdb"
    c.save_pdb(file)

os.chdir(home)
Пример #12
0
##############################################################################

import mdtraj as md

import h5py
from water_stats import WaterStats
import numpy as np
import os

##############################################################################
# Code
##############################################################################

run_name = "run4"
data_path = os.getcwd() + "/data"
traj = md.load_trr(data_path + "/nvt-pr_" + run_name + ".trr", top=data_path + "/water-sol_" + run_name + ".gro")
print ("here is some info about the trajectory we are looking at:")
print traj
ws = WaterStats(traj, run_name)
cut_off = 0.5

for this_frame in range(ws.n_frames):
    if str(this_frame) in ws.all_tthds:
        pass
    else:
        tthds = []
        for this_water in ws.water_inds:
            tthds.extend(ws.make_tthd(this_water, cut_off, this_frame))
        ws.all_tthds.create_dataset(str(this_frame), data=tthds)

# print len(tthds)
Пример #13
0
##############################################################################

import mdtraj as md

import h5py
from water_stats import WaterStats
import numpy as np
import os

##############################################################################
# Code
##############################################################################

run_name = 'run4'
data_path = os.getcwd() + '/data'
traj = md.load_trr(data_path + '/nvt-pr_' + run_name + '.trr',
                   top=data_path + '/water-sol_' + run_name + '.gro')
print('here is some info about the trajectory we are looking at:')
print traj
ws = WaterStats(traj, run_name)
cut_off = 0.5

for this_frame in range(ws.n_frames):
    if str(this_frame) in ws.all_tthds:
        pass
    else:
        tthds = []
        for this_water in ws.water_inds:
            tthds.extend(ws.make_tthd(this_water, cut_off, this_frame))
        ws.all_tthds.create_dataset(str(this_frame), data=tthds)

# print len(tthds)
Пример #14
0
import mdtraj as md

import h5py
from water_stats import WaterStats
import numpy as np
import os

import time

##############################################################################
# Code
##############################################################################

run_name = 'run5'
data_path = os.getcwd()+'/data'
traj = md.load_trr(data_path+'/nvt-pr_'+run_name+'.trr', top = data_path+'/water-sol_'+run_name+'.gro')
print ('here is some info about the trajectory we are looking at:')
print traj
ws = WaterStats(traj,run_name)
cut_off = 0.5
frame_ind = 1

half_box = ws.traj.unitcell_lengths[0][0]/2.*10

inds = range(1001)[101:]
count = 33897
for frame_ind in inds:
    nbs = ws.find_nearest_nbs(cut_off,frame_ind,3)
    xyz_pos = ws.traj[frame_ind].xyz
    
    with open(os.getcwd()+'/output_data/tthd_pdb_1000.pdb','a') as f:
Пример #15
0
def extract_aligned_prot_lig_wat_traj(md_components,
                                      flask,
                                      trj_fn,
                                      opt,
                                      nmax=30,
                                      water_cutoff=15.0):
    """
    Extracts the aligned protein trajectory and aligned ligand trajectory and aligned
    Water trajectory from a MD trajectory of a larger system that includes other
    components (eg water).
    The passed in setup mol must have the topology that matches the trajectory, and its xyz
    coordinates are the reference for the alignment. The alignment is done on the
    alpha carbons (atom name CA) of the active site residues within cutoff
    from the ligand. Once the alignment is done, the protein and ligand trajectories
    are each placed into a separate OEMol, one conformer per trajectory frame.
    Water trajectory is selecting the nmax waters from the ligand and protein CA
    within the cutoff distance for each trajectory snapshot

    Inputs:
        md_components: MDComponents object
            The md components carrying the setup starting flask.

        flask: OEMol
            The system flask

        trj_fn: String
            The filename of the hdf5-format MD trajectory or Gromacs .trr file format
        water_cutoff: Float
            The cutoff distance between the PL binding site and the waters in angstroms
        nmax: Integer
            max number of waters to select
    Outputs:
        multi_conf_protein: A multi conformer OEMol for the protein, one conformer per frame.
        multi_conf_ligand: A multi conformer OEMol for the ligand, one conformer per frame.
        multi_conf_water: A multi conformer OEMol for the waters, one conformer per frame.
    """

    # Extract protein, ligand, water and excipients from the flask
    # protein, ligand, water, excipients = oeommutils.split(flask, ligand_res_name="LIG")

    set_up_flask, map_dic = md_components.create_flask
    protein = md_components.get_protein
    ligand = md_components.get_ligand

    check_nmax = nmax_waters(protein, ligand, water_cutoff)

    if check_nmax < nmax:
        opt['Logger'].warn(
            "The selected number of max waters cannot fit around the protein binding site: {} vs {}"
            .format(nmax, check_nmax))

    void, traj_ext = os.path.splitext(trj_fn)

    traj_dir = os.path.dirname(trj_fn)

    if traj_ext == '.h5':
        trj = md.load_hdf5(trj_fn)

    elif traj_ext == '.trr':
        pdb_fn = glob.glob(os.path.join(traj_dir, '*.pdb'))[0]
        trj = md.load_trr(trj_fn, top=pdb_fn)
        trj = trj[1:]
    else:
        raise ValueError(
            "Trajectory file format {} not recognized in the trajectory {}".
            format(traj_ext, trj_fn))

    # System topology
    top_trj = trj.topology

    # Ligand indexes
    # lig_idx = top_trj.select("resname LIG")
    lig_idx = map_dic['ligand']

    # Protein indexes
    # prot_idx = top_trj.select("protein")

    # It is safer to use OE toolkits than mdtraj which is missing the protein caps
    prot_idx = map_dic['protein']

    # for at in protein.GetAtoms():
    #     prot_idx.append(at.GetIdx())

    # Water oxygen indexes
    water_O_idx = top_trj.select("water and element O")

    # Protein carbon alpha indexes
    prot_ca_idx = top_trj.select("backbone and element C")

    # Cutoff for the selection of the binding site atoms in A
    cutoff_bs = 5.0

    # Carbon alpha binding site indexes
    ca_bs_idx = md.compute_neighbors(trj[0],
                                     cutoff_bs / 10.0,
                                     lig_idx,
                                     haystack_indices=prot_ca_idx,
                                     periodic=True)[0]

    # Carbon alpha binding site and ligand indexes
    ca_bs_lig_idx = np.concatenate((ca_bs_idx, lig_idx))

    # Image the protein-ligand trajectory so the complex does not jump across box boundaries
    protlig = trj[0].atom_slice(np.concatenate((prot_idx, lig_idx)))
    protligAtoms = [atom for atom in protlig.topology.atoms]

    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stderr(devnull):
            trjImaged = trj.image_molecules(inplace=False,
                                            anchor_molecules=[protligAtoms],
                                            make_whole=True)

    # trjImaged = trj.image_molecules(inplace=False, anchor_molecules=[protligAtoms], make_whole=True)

    count = 0
    water_max_frames = []

    # TODO DEBUG
    # trjImaged = trjImaged[:10]

    for frame in trjImaged:
        # print(count, flush=True)

        # Water oxygen binding site indexes
        water_O_bs_idx = md.compute_neighbors(frame,
                                              water_cutoff / 10.0,
                                              ca_bs_lig_idx,
                                              haystack_indices=water_O_idx,
                                              periodic=True)

        # Pair combination water indexes times ligand indexes
        wat_lig_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                             lig_idx)).T.reshape(-1, 2)

        # Distances between the waters and the ligand in nm
        wat_lig_distances = md.compute_distances(frame,
                                                 wat_lig_pairs,
                                                 periodic=True,
                                                 opt=True)

        # Reshape the wat_lig_distances
        ns = np.reshape(wat_lig_distances,
                        (len(water_O_bs_idx[0]), len(lig_idx)))

        # Min distances in nm between the oxygen waters and the ligand
        min_wat_O_lig_distances = np.min(ns, axis=1)

        # Pair combination water indexes times protein binding site carbon alpha indexes
        wat_ca_bs_pairs = np.array(np.meshgrid(water_O_bs_idx,
                                               ca_bs_idx)).T.reshape(-1, 2)

        # Distances between the waters and the protein binding site carbon alpha in nm
        wat_ca_bs_distances = md.compute_distances(frame,
                                                   wat_ca_bs_pairs,
                                                   periodic=True,
                                                   opt=True)

        # Reshape the wat_ca_bs_distances
        ns = np.reshape(wat_ca_bs_distances,
                        (len(water_O_bs_idx[0]), len(ca_bs_idx)))

        # Min distances in nm between the oxygen waters and the protein binding site carbon alpha
        min_wat_O_ca_bs_distances = np.min(ns, axis=1)

        metrics = min_wat_O_lig_distances + min_wat_O_ca_bs_distances

        metric_distances = dict()

        for wat_idx, m in zip(water_O_bs_idx[0], metrics):
            metric_distances[int(wat_idx)] = m

        water_list_sorted_max = sorted(metric_distances.items(),
                                       key=lambda x: x[1])[:nmax]

        if len(water_list_sorted_max) != nmax:
            raise ValueError(
                "The ordered water list has the wrong size {} vs expected {} for the frame {}"
                .format(len(water_list_sorted_max), nmax, count))

        water_max_frames.append(water_list_sorted_max)

        # print(min_wat_O_ca_bs_distances)
        # print(pairs[:len(lig_idx), :])
        # for p,d in zip(wat_ca_bs_pairs, wat_ca_bs_distances[0]):
        #     print(p,d)

        count += 1

    # Put the reference mol xyz into the 1-frame topologyTraj to use as a reference in the fit
    setup_mol_array_coords = oechem.OEDoubleArray(3 *
                                                  set_up_flask.GetMaxAtomIdx())
    set_up_flask.GetCoords(setup_mol_array_coords)

    setup_mol_xyzArr = np.array(setup_mol_array_coords)
    setup_mol_xyzArr.shape = (-1, 3)

    trj_reference = trjImaged[0]
    # convert from angstroms to nanometers
    trj_reference.xyz[0] = setup_mol_xyzArr / 10.0

    # Fitting
    trjImaged.superpose(trj_reference, 0, ca_bs_idx)

    # Delete Original Trajectory to save memory
    del trj

    # Molecule copies
    ligand_reference = oechem.OEMol(ligand)
    protein_reference = oechem.OEMol(protein)

    count = 0

    # Create the multi conformer protein, ligand and water molecules
    for frame in trjImaged.xyz:
        # print("Trj Image loop", count, flush=True)

        # Extract coordinates in A
        xyz = frame * 10

        # Set flask Coordinates as the current frame for the water extraction
        flask.SetCoords(xyz.flatten())
        water_list_sorted_max = water_max_frames[count]

        # print(water_list_sorted_max)

        # TODO The following solution to extract the waters do not
        #  keep the water order

        # Mark the close water atoms and extract them
        bv = oechem.OEBitVector(nmax * 3)
        water_idx = []

        for pair in water_list_sorted_max:

            ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))

            # Select the whole water molecule
            for atw in oechem.OEGetResidueAtoms(ow):
                bv.SetBitOn(atw.GetIdx())
                water_idx.append(atw.GetIdx())

        pred_vec = oechem.OEAtomIdxSelected(bv)
        water_nmax_reference = oechem.OEMol()
        oechem.OESubsetMol(water_nmax_reference, flask, pred_vec)

        # TODO The following solution to extract the waters
        #  keep the water order but is it seems extremely inefficient

        # water_list = []
        # for pair in water_list_sorted_max:
        #     bv = oechem.OEBitVector(3)
        #     water_idx = []
        #     ow = flask.GetAtom(oechem.OEHasAtomIdx(pair[0]))
        #
        #     # Select the whole water molecule
        #     for atw in oechem.OEGetResidueAtoms(ow):
        #         bv.SetBitOn(atw.GetIdx())
        #         water_idx.append(atw.GetIdx())
        #
        #     pred_vec = oechem.OEAtomIdxSelected(bv)
        #     water = oechem.OEMol()
        #     oechem.OESubsetMol(water, flask, pred_vec)
        #
        #     water_list.append(water)
        #
        #
        # # print(len(water_list))
        #
        # water_nmax_reference = oechem.OEMol()

        # for w in water_list:
        #     oechem.OEAddMols(water_nmax_reference, w)

        # ligand and protein conf coordinates
        lig_xyz_list = [10 * frame[idx] for idx in lig_idx]
        lig_confxyz = oechem.OEFloatArray(np.array(lig_xyz_list).ravel())

        prot_xyz_list = [10 * frame[idx] for idx in prot_idx]
        prot_confxyz = oechem.OEFloatArray(np.array(prot_xyz_list).ravel())

        # Initialize the protein, ligand and water molecule topologies
        if count == 0:

            multi_conf_water = oechem.OEMol(water_nmax_reference)

            if multi_conf_water.NumAtoms() % 3 != 0:
                raise ValueError("Number of Water atoms is not multiple of 3")

            # Clean ResNumber and Chain on the multi conf water molecule
            # oechem.OEPerceiveResidues(multi_conf_water, oechem.OEPreserveResInfo_All)
            multi_conf_water.SetTitle("Water_" + str(nmax))

            res_num = 0
            i = 0
            for at in multi_conf_water.GetAtoms():

                res = oechem.OEAtomGetResidue(at)
                res.SetSerialNumber(i)
                res.SetName("HOH")
                res.SetChainID("Z")
                if i % 3 == 0:
                    res_num += 1
                res.SetResidueNumber(res_num)
                i += 1

            ligand_reference.SetCoords(lig_confxyz)
            protein_reference.SetCoords(prot_confxyz)
            multi_conf_ligand = oechem.OEMol(ligand_reference)
            multi_conf_protein = oechem.OEMol(protein_reference)

        # Attach the conformers on the multi conformer protein, ligand and water molecules
        else:
            water_confxyz = oechem.OEFloatArray(
                water_nmax_reference.NumAtoms() * 3)
            water_nmax_reference.GetCoords(water_confxyz)

            multi_conf_water.NewConf(water_confxyz)
            multi_conf_ligand.NewConf(lig_confxyz)
            multi_conf_protein.NewConf(prot_confxyz)

        count += 1

    return multi_conf_protein, multi_conf_ligand, multi_conf_water