Пример #1
0
    def __init__(self, engine="GROMACS", box_length_complex=9, box_length_morph=4, ion_conc=0.154, shell=0,
                 neutralise=True, centre=True, protein_ff=_PC.AMBERDEFAULTPROTEINFF, ligand_ff=_PC.AMBERDEFAULTLIGANDFF,
                 water_ff=_PC.AMBERDEFAULTWATERFF, protein=None, ligand_ref=None, morphs=None, workdir=None):
        # work directory
        self.workdir = _fileio.Dir(workdir) if workdir else _fileio.Dir(".")

        # writing options
        self.engine = engine

        # solvation options
        self.box_length_complex = box_length_complex
        self.box_length_morph = box_length_morph
        self.ion_conc = ion_conc
        self.shell = shell
        self.neutralise = neutralise
        self.centre = centre

        # parametrisation options
        self.params = _parametrise.Params(protein_ff=protein_ff, ligand_ff=ligand_ff, water_ff=water_ff)

        # molecular systems
        if not isinstance(protein, Protein):
            self.protein = Protein(protein, ligand_ref=ligand_ref)
        else:
            self.protein = protein
        self.morphs = morphs
        self._complex_template = None
        self.systems_prep = {}
Пример #2
0
 def __init__(self,
              code=None,
              pdb_file=None,
              ligands=None,
              ligand_ref=None,
              fasta_file=None,
              complex_template=None,
              name=None,
              workdir=None):
     self.name = name if name is not None else code
     self.workdir = _fileio.Dir(workdir) if workdir else _fileio.Dir(
         self.name)
     with self.workdir:
         self.code = code
         self.complex_template = complex_template
         if pdb_file:
             self.pdb = pdb_file
         else:
             self.pdb = complex_template
         self.fasta = fasta_file
         self.ligands = ligands
         self.cofactors = []
         # remove ligands from PDB file and non-ligands from SDF files
         self.filter(missing_residues="all",
                     chains="all",
                     waters="all",
                     simple_anions="all",
                     complex_anions="all",
                     simple_cations="all",
                     complex_cations="all",
                     ligands="all",
                     cofactors="all")
         self.ligand_ref = ligand_ref
Пример #3
0
    def __init__(self, name, gro_files, top_files, work_dir=None, **lambda_dict):
        try:
            len_arr, = {len(arr) for arr in lambda_dict.values() if len(arr)}
        except ValueError:
            raise ValueError("Need lists of the same non-zero size")

        if not isinstance(gro_files, list):
            gro_files = [gro_files] * len_arr
        else:
            assert len(gro_files) == len_arr, "Number of GRO files does not match number of lambda values"

        if not isinstance(top_files, list):
            top_files = [top_files] * len_arr
        else:
            assert len(top_files) == len_arr, "Number of TOP files does not match number of lambda values"

        if work_dir is None:
            work_dir = _os.getcwd()

        self.files = [{} for _ in range(len_arr)]
        for f, gro_file, top_file in zip(self.files, gro_files, top_files):
            f["gro"] = _os.path.abspath(gro_file)
            f["top"] = _os.path.abspath(top_file)
        self._workdir = _fileio.Dir("%s/%s" % (work_dir, name))
        self.lambda_dict = lambda_dict
        self.protocols = []
        self.mbar_data = []
Пример #4
0
def saveAsGromacs(filebase, system):
    """
    Saves the input object to a GRO and a TOP file.

    Parameters
    ----------
    filebase : str
        Base of the output filenames.
    system : BioSimSpace.System or parmed.structure.Structure
        Input object to be saved.

    Returns
    -------
    files : [str, str]
        A list containing the absolute paths of the TOP and GRO files.
    """
    name = _os.path.basename(_tempfile.TemporaryDirectory().name)
    with _fileio.Dir(dirname=name, temp=True) as dir:
        if "BioSimSpace" in str(type(system)):
            _BSS.IO.saveMolecules(filebase, system, "Gro87,GroTop")
            _pmd.load_file(filebase + ".gro87").save(filebase + ".gro",
                                                     combine="all")
            _shutil.move(filebase + ".gro",
                         _os.path.join(dir.workdirname, filebase + ".gro"))
            _shutil.move(filebase + ".grotop",
                         _os.path.join(dir.workdirname, filebase + ".top"))
        elif isinstance(system, (_pmd.Structure, _PC.Morph.Morph)):
            system.save(filebase + ".gro")
            system.save(filebase + ".top")
            _shutil.move(filebase + ".gro",
                         _os.path.join(dir.workdirname, filebase + ".gro"))
            _shutil.move(filebase + ".top",
                         _os.path.join(dir.workdirname, filebase + ".top"))
        else:
            raise TypeError(
                "Passed object to save as GROMACS not recognised. Please pass BioSimSpace or ParmEd "
                "objects only.")
    return [
        _os.path.abspath(filebase + ".top"),
        _os.path.abspath(filebase + ".gro")
    ]
Пример #5
0
    def saveSystems(self, systems=None):
        """
        Saves all systems.

        Parameters
        ----------
        systems : dict or None
            Input systems to be saved. None means use self.systems_prep.
        """
        if systems is None: systems = self.systems_prep
        # TODO support other engines
        _logging.info("Saving solvated complexes as GROMACS...")
        with self.workdir:
            for name, (morph, complexes) in systems.items():
                with _fileio.Dir(name):
                    _IO.GROMACS.saveAsGromacs("morph", morph)

                    if len(complexes) == 1:
                        _IO.GROMACS.saveAsGromacs("complex_final", complexes[0])
                    else:
                        for j, complex in enumerate(complexes):
                            _IO.GROMACS.saveAsGromacs("complex_final%d" % j, complex)
Пример #6
0
    def __init__(self,
                 input,
                 parametrised_files=None,
                 name=None,
                 protonated=False,
                 minimise=None,
                 workdir="."):
        self.name = name
        self.workdir = _fileio.Dir(workdir)
        self.minimise = minimise
        # always set protonated to False and if a valid protonated file is given it is automatically set to True
        self.protonated_filename = None

        with self.workdir:
            if parametrised_files:
                self.molecule = _rdkit.openAsRdkit(parametrised_files,
                                                   removeHs=False,
                                                   minimise=False,
                                                   template=input)
                self.protonated_filename = _rdkit.saveFromRdkit(
                    self.molecule, "{}.pdb".format(name))
            elif isinstance(input, str):
                if _os.path.exists(input):
                    if protonated:
                        self.protonated_filename = input
                    else:
                        self.molecule = _rdkit.openAsRdkit(input,
                                                           minimise=minimise)
                else:
                    self.string = input
            elif isinstance(input, _rdchem.Mol):
                self.molecule = input
            else:
                raise TypeError(
                    "Need a SMILES, InChI string, filename or an RDKit object as an input"
                )
            self.parametrised_files = parametrised_files
            self.minimise = False
Пример #7
0
    def generateMBARData(self, n_cores=None, n_nodes=1, cont=True):
        """
        Manually generates input data for MBAR using gmx mdrun -rerun using N
        topologies and N trajectories generated from N simulations (N squared
        energy evaluations are needed). Doesn't work with constrained
        simulations. For most cases one should instead use the XVG files
        readily generated by GROMACS during the simulation.

        Parameters
        ----------
         n_cores : int or None, optional
            Number of cores used. Default: the same as the number of replicas.
        n_nodes : int, optional
            Number of nodes used.
        cont : bool, optional
            Whether to overwrite existing files or to continue from the last
            one.

        Returns
        -------
        mbar_data : numpy.ndarray
            Data which is to be passed to pymbar.
        """
        # GROMACS might generate a bunch of warnings when we apply a non-dummy Hamiltonian to a trajectory with dummies
        # due to clashes and backup too many structures. Here we suppress this backing up
        gmx_suppress_dump = _os.environ[
            "GMX_SUPPRESS_DUMP"] if "GMX_SUPPRESS_DUMP" in _os.environ.keys(
            ) else None
        _os.environ["GMX_SUPPRESS_DUMP"] = "1"
        self.mbar_data = []

        _logging.info("Generating Energy files...")
        with self._workdir:
            with _fileio.Dir("MBAR"):
                for i, file in enumerate(self.files):
                    self.mbar_data += [[]]
                    gro, top = file["gro"], file["top"]
                    for j, file in enumerate(self.files):
                        run = True
                        trr = file["trr"]
                        filebase = "Energy_%d_%d" % (i, j)
                        # we only overwrite the last generated energies, because they might be incomplete
                        if cont:
                            filebase_next = "Energy_%d_%d" % (
                                i + (j + 1) // len(self.files),
                                (j + 1) % len(self.files))
                            files_current = _glob.glob(
                                "%s/%s.xvg" % (_os.getcwd(), filebase))
                            files_next = _glob.glob(
                                "%s/%s.xvg" % (_os.getcwd(), filebase_next))
                            if len(files_current) and len(files_next):
                                run = False
                        if run:
                            # use the last protocol or create a default one
                            if len(self.protocols):
                                protocol = self.protocols[-1]
                            else:
                                protocol = _Protocol.Protocol(
                                    use_preset="default", **self.lambda_dict)
                            protocol.init_lambda_state = i
                            protocol.skip_positions = 0
                            protocol.skip_velocities = 0
                            protocol.skip_forces = 0
                            protocol.write_derivatives = False
                            protocol.random_velocities = False
                            protocol.constraint = "no"
                            protocol.__setattr__("continuation", "yes")
                            protocol.__setattr__("dhdl-print-energy",
                                                 "potential")
                            protocol.__setattr__("calc-lambda-neighbors", "0")
                            protocol.__setattr__("calc-lambda-neighbors", "0")

                            mdp = protocol.write("GROMACS", filebase=filebase)
                            tpr = filebase + ".tpr"

                            grompp_command = "%s grompp -maxwarn 10 -f '%s' -c '%s' -p '%s' -o '%s'" % (
                                _PC.GROMACSEXE, mdp, gro, top, tpr)
                            _runexternal.runExternal(grompp_command,
                                                     procname="gmx grompp")

                            if n_cores is None:
                                mdrun_command = "%s mdrun -s '%s' -rerun '%s' -deffnm '%s'" % (
                                    _PC.GROMACSEXE, tpr, trr, filebase)
                            else:
                                ppn = n_cores // n_nodes
                                mdrun_command = "{0} -np {1} --map-by ppr:{2}:node {3} mdrun -s '{4}' -rerun '{5}' " \
                                                "-deffnm {6}".format(_PC.MPIEXE, n_cores, ppn, _PC.GROMACSMPIEXE, tpr,
                                                                     trr, filebase)

                            _runexternal.runExternal(mdrun_command,
                                                     procname="gmx mdrun")

                        self.mbar_data[i] += list(
                            _MDAnalysis.auxiliary.XVG.XVGReader(
                                filebase + ".xvg")._auxdata_values[:, 1])

        # restore the original environment variable
        del _os.environ["GMX_SUPPRESS_DUMP"]
        if gmx_suppress_dump is not None:
            _os.environ["GMX_SUPPRESS_DUMP"] = gmx_suppress_dump

        return self.mbar_data
Пример #8
0
    def runSimulation(self,
                      name,
                      multi=False,
                      multidir=False,
                      single_lambda=None,
                      use_mpi=False,
                      mdrun_mpi=False,
                      gpu_id=None,
                      use_preset=None,
                      replex=None,
                      plumed_file=None,
                      n_cores_per_process=None,
                      n_nodes=1,
                      n_processes=None,
                      dlb=False,
                      gmx_kwargs=None,
                      **protocol_params):
        """
        Runs a simulation in GROMACS.

        Parameters
        ----------
        name : str
            The name of the simulation.
        multi : bool, optional
            Whether to run the simulations in parallel, using -multi.
        multidir : bool, optional
            Whether to run the simulations in parallel, using -multidir. Overrides multi.
        single_lambda : None, int, optional
            An integer runs a simulation at a single lambda value and overrides multi and replex.
            None runs all lambda values.
        use_mpi : bool, optional
            Whether to use ProtoCaller.GROMACSEXE or GROMACSMPIEXE.
        mdrun_mpi : bool, optional
            Whether to call mdrun as "mdrun" or as "mdrun_mpi".
        gpu_id : str, optional
            Which GPU id to be used, if applicable.
        use_preset : str, Protocaller.Protocol.Protocol, None
            Which default preset to use. One of: "minimisation", "equilibration_nvt", "equilibration_npt", "production"
            "vacuum". You can alternatively pass a custom protocol here, in which case **protocol_params will not be
            used.
        replex : int or None, optional
            Attempts replica exchange after replex number of steps using PLUMED. None means no replica exchange.
            Overrides use_mpi and multi if not None.
        plumed_file : str, optional
            If replex is True, it uses this file as a configuration for PLUMED, otherwise an empty file is used.
        n_cores_per_process : int or None, optional
            Number of cores used per process. Default: let GROMACS decide.
        n_nodes : int, optional
            Number of physical nodes used.
        n_processes : int, optional
            Number of processes. Default: the same as the number of simulation.
        dlb : bool
            Whether to enable dynamic load balancing. Default is False due to some possible instabilities with
            gmx_mpi mdrun in some cases.
        gmx_kwargs : dict
            Additional arguments to be passed to mdrun. The keys of the dictionary need to be the name of the option,
            e.g. "cpt" for checkpoint interval, while the values need to be the value of the option if it permits one
            or None if it doesn't. If the values contain "{}" while the user is running the lambda windows in serial,
            this will be replaced by the lambda number.
        protocol_params
            Keyword arguments passed to ProtoCaller.Protocol.Protocol.
        """
        # perform some checks and initialise some default values
        if n_processes is None:
            n_processes = 1 if single_lambda else self.lambda_size
        if n_nodes > 1 or multi or multidir:
            use_mpi = True
        if single_lambda is not None:
            replex, multi, multidir = None, False, False
        if replex is not None:
            use_mpi, multi = True, True

        if mdrun_mpi:
            MDRUNEXE = _os.path.dirname(_PC.GROMACSMPIEXE) + "/mdrun_mpi"
        elif use_mpi:
            MDRUNEXE = _PC.GROMACSMPIEXE + " mdrun"
        else:
            MDRUNEXE = _PC.GROMACSEXE + " mdrun"

        if use_mpi or mdrun_mpi:
            base_mdrun_command = f"{_PC.MPIEXE} -np {n_processes} --map-by ppr:{n_processes // n_nodes}:node {MDRUNEXE}"
        else:
            base_mdrun_command = MDRUNEXE

        gmx_kwargs = {} if gmx_kwargs is None else gmx_kwargs
        gmx_kwargs["dlb"] = "yes" if dlb else "no"
        if n_cores_per_process is not None:
            gmx_kwargs["ntomp"] = n_cores_per_process
        if gpu_id is not None:
            gmx_kwargs["gpu_id"] = gpu_id

        # this is due to incompatibility between GROMACS 2019+ and multi
        multidir = True if self._gmx_version(
            _PC.GROMACSMPIEXE) >= 2019 and multi else multidir

        # initialise the protocol
        if isinstance(use_preset, _Protocol.Protocol):
            protocol = use_preset
        else:
            protocol = _Protocol.Protocol(use_preset=use_preset,
                                          **protocol_params,
                                          **self.lambda_dict)
        self.protocols += [protocol]

        # run the simulations
        _logging.info(f"Running {name}...")
        with self._workdir:
            with _fileio.Dir(name, overwrite=False):
                # run single lambda if needed
                if single_lambda is not None:
                    it = [single_lambda]
                else:
                    it = range(self.lambda_size)

                # call grompp for each lambda
                for i in it:
                    filebase = name if multidir else f"{name}_{i}"
                    dirname = f"Lambda_{i}" if multidir else "."
                    with _fileio.Dir(dirname):
                        protocol.init_lambda_state = i
                        self.files[i]["mdp"] = _os.path.abspath(
                            protocol.write(engine="GROMACS",
                                           filebase=filebase))
                        grompp_args = {
                            "-f": "mdp",
                            "-c": "gro",
                            "-p": "top",
                            "-t": "cpt",
                        }

                        grompp_command = f"{_PC.GROMACSEXE} grompp -maxwarn 10 -o {filebase}.tpr"
                        for grompp_arg, filetype in grompp_args.items():
                            if filetype in self.files[i].keys():
                                grompp_command += f" {grompp_arg} '{self.files[i][filetype]}'"
                        _runexternal.runExternal(grompp_command,
                                                 procname="gmx grompp")

                # call the simulations consecutively for each lambda if multi is not specified
                if not (multi or multidir):
                    for i in it:
                        filebase = f"{name}_{i}"
                        gmx_kwargs["s"] = f"'{filebase}.tpr'"
                        gmx_kwargs["deffnm"] = f"'{filebase}'"

                        # run GROMACS
                        mdrun_command = self._dict_to_arguments(
                            base_mdrun_command, gmx_kwargs, i)
                        _runexternal.runExternal(mdrun_command,
                                                 procname="gmx mdrun")

                        # update files after the run
                        self._update_files(i, filebase)
                # alternatively, run all simulations in parallel
                else:
                    filebase = name if multidir else name + "_"
                    gmx_kwargs["s"] = f"'{filebase}.tpr'"
                    gmx_kwargs["deffnm"] = f"'{filebase}'"
                    if multidir:
                        gmx_kwargs["multidir"] = " ".join(f"Lambda_{i}"
                                                          for i in it)
                    else:
                        gmx_kwargs["multi"] = f"{self.lambda_size}"

                    # run replica exchange with PLUMED if needed
                    if replex is not None:
                        if plumed_file is None:
                            plumed_file = "plumed.dat"
                            open(plumed_file, "a").close()
                        gmx_kwargs["plumed"] = f"'{plumed_file}'"
                        gmx_kwargs["replex"] = replex
                        gmx_kwargs["hrex"] = None

                    # run GROMACS
                    mdrun_command = self._dict_to_arguments(
                        base_mdrun_command, gmx_kwargs, i)
                    _runexternal.runExternal(mdrun_command,
                                             procname="gmx mdrun")

                    # update files after the run
                    for i in it:
                        filebase = name if multidir else f"{name}_{i}"
                        dirname = f"Lambda_{i}" if multidir else "."
                        with _fileio.Dir(dirname):
                            self._update_files(i, filebase)
Пример #9
0
    def runSimulation(self, name, multi=False, single_lambda=None, use_mpi=False, use_preset=None, replex=None,
                      n_cores_per_process=None, n_nodes=1, n_processes=None, dlb=False, **protocol_params):
        """
        Runs a simulation in GROMACS.

        Parameters
        ----------
        name : str
            The name of the simulation.
        multi : bool, optional
            Whether to run the simulations in parallel, using -multi.
        single_lambda : None, int, optional
            An integer runs a simulation at a single lambda value and overrides multi and replex.
            None runs all lambda values.
        use_mpi : bool, optional
            Whether to use ProtoCaller.GROMACSEXE or GROMACSMPIEXE.
        use_preset : str, None
            Which default preset to use. One of: "minimisation", "equilibration_nvt", "equilibration_npt", "production"
            "vacuum".
        replex : int or None, optional
            Attempts replica exchange after replex number of steps using PLUMED. None means no replica exchange.
            Overrides use_mpi and multi if not None.
        n_cores_per_process : int or None, optional
            Number of cores used per process. Default: let GROMACS decide.
        n_nodes : int, optional
            Number of physical nodes used.
        n_processes : int, optional
            Number of processes. Default: the same as the number of simulation.
        dlb : bool
            Whether to enable dynamic load balancing. Default is False due to some possible instabilities with
            gmx_mpi mdrun in some cases.
        protocol_params
            Keyword arguments passed to ProtoCaller.Protocol.Protocol.
        """
        if n_processes is None:
            n_processes = 1 if single_lambda else self.lambda_size
        ppn = n_processes // n_nodes
        if n_nodes > 1:
            use_mpi = True
        if single_lambda is not None:
            replex = None
            multi = False
        if replex is not None:
            multi = True
            use_mpi = True
        dlb = "yes" if dlb else "no"

        protocol = _Protocol.Protocol(use_preset=use_preset, **protocol_params, **self.lambda_dict)
        self.protocols += [protocol]

        _logging.info("Running %s..." % name)
        with self._workdir:
            with _fileio.Dir(name, overwrite=True):
                # run single lambda if needed
                if single_lambda is not None:
                    it = [single_lambda]
                else:
                    it = range(self.lambda_size)

                # call grompp for every lambda
                for i in it:
                    filebase = "%s_%d" % (name, i)
                    protocol.init_lambda_state = i
                    self.files[i]["mdp"] = _os.path.abspath(protocol.write(engine="GROMACS", filebase=filebase))
                    grompp_args = {
                        "-f": "mdp",
                        "-c": "gro",
                        "-p": "top",
                        "-t": "cpt",
                    }

                    grompp_command = "%s grompp -maxwarn 10 -o %s.tpr" % (_PC.GROMACSEXE, filebase)
                    for grompp_arg, filetype in grompp_args.items():
                        if filetype in self.files[i].keys():
                            grompp_command += " %s '%s'" % (grompp_arg, self.files[i][filetype])
                    _runexternal.runExternal(grompp_command, procname="gmx grompp")

                    # call the simulation consecutively for every lambda if multi is not specified
                    if not multi:
                        if not use_mpi:
                            mdrun_command = "{0} mdrun -s '{1}.tpr' -deffnm {1} -dlb {2}".format(
                                _PC.GROMACSEXE, filebase, dlb)
                        else:
                            mdrun_command = "{0} -np {1} --map-by ppr:{2}:node {3} " \
                                            "mdrun -s '{4}.tpr' -deffnm {4} -dlb {5}".format(
                                _PC.MPIEXE, n_processes, ppn, _PC.GROMACSMPIEXE, filebase, dlb)

                        if n_cores_per_process:
                            mdrun_command += " -ntomp {}".format(n_cores_per_process)

                        _runexternal.runExternal(mdrun_command, procname="gmx mdrun")

                        # update files after the run
                        self.files[i] = {"top" : self.files[i]["top"],}
                        output_files = _glob.glob("%s.*" % filebase)
                        for output_file in output_files:
                            ext = output_file.split(".")[-1].lower()
                            if ext != "tpr":
                                self.files[i][ext] = _os.path.abspath(output_file)

                # alternatively, run all simulations in parallel
                if multi:
                    filebase = name + "_"
                    mdrun_command = "{0} -np {1} --map-by ppr:{2}:node {3} mdrun -multi {4} -s {5}.tpr " \
                                    "-deffnm {5} -dlb {6}".format(
                        _PC.MPIEXE, n_processes, ppn, _PC.GROMACSMPIEXE, self.lambda_size, filebase, dlb)

                    # run replica exchange with PLUMED if needed
                    if replex is not None:
                        open("plumed.dat", "a").close()
                        mdrun_command += " -plumed plumed.dat -replex {} -hrex".format(replex)

                    if n_cores_per_process:
                        mdrun_command += " -ntomp {}".format(n_cores_per_process)

                    _runexternal.runExternal(mdrun_command, procname="gmx mdrun")

                    # update files after the run
                    for i in range(self.lambda_size):
                        filebase = "%s_%d" % (name, i)
                        self.files[i] = {"top": self.files[i]["top"], }
                        output_files = _glob.glob("%s.*" % filebase)
                        for output_file in output_files:
                            ext = output_file.split(".")[-1].lower()
                            if ext != "tpr":
                                self.files[i][ext] = _os.path.abspath(output_file)
Пример #10
0
    def prepareComplexes(self,
                         replica_temps=None,
                         scale_dummy_bonds=1,
                         dummy_bond_smarts="[*]~[*]",
                         intermediate_files=False,
                         store_complexes=False,
                         output_files=True):
        """
        Batch prepares all complexes with an option to output files for REST(2).

        Parameters
        ----------
        replica_temps : [float] or None
            A list of replica temperatures. Everything is normalised with respect to the lowest temperature. None
            means only output the normal files.
        scale_dummy_bonds : float
            Sets the dummy bond length distance as a fraction of the real bond length distance.
        dummy_bond_smarts : str
            SMARTS string which indicates which dummy bonds are to be affected by scale_dummy_bonds.
        intermediate_files : bool
            Whether to store all intermediate files.
        store_complexes : bool
            Whether to store the final complexes as a dictionary of BioSimSpace System objects.
        output_files : bool
            Whether to write output files immediately or later via saveSystems.
        """
        # make sure the proteins / ligands are parametrised before proceeding
        with self.workdir:
            self.protein.parametrise(params=self.params, reparametrise=False)
            for morph in self.morphs:
                morph.ligand1.parametrise(params=self.params,
                                          reparametrise=False)
                morph.ligand2.parametrise(params=self.params,
                                          reparametrise=False)

            # take care of the replicas if there are any
            if replica_temps is None or len(replica_temps) == 1:
                scales = [1]
            else:
                sorted_list = sorted(replica_temps)
                if sorted_list != replica_temps:
                    _warnings.warn(
                        "Input replica temperatures were not in ascending order. Sorting..."
                    )
                    replica_temps = sorted_list
                scales = [replica_temps[0] / elem for elem in replica_temps]

            for i, morph in enumerate(self.morphs):
                if intermediate_files:
                    curdir = _fileio.Dir(morph.name, overwrite=True)
                else:
                    name = _os.path.basename(
                        _tempfile.TemporaryDirectory().name)
                    curdir = _fileio.Dir(name, overwrite=True, temp=True)

                with curdir:
                    _logging.info("Creating morph %s..." % morph.name)
                    morph_BSS, mcs = morph.alignAndCreateMorph(
                        self.protein.ligand_ref)
                    morph_BSS = _BSS._SireWrappers.System(morph_BSS)
                    box = self.protein.complex_template._sire_object.property(
                        "space")
                    morph_BSS._sire_object.setProperty("space", box)

                    # here we scale the equilibrium bond lengths if needed
                    if scale_dummy_bonds != 1:
                        n1 = morph.ligand1.molecule.GetNumAtoms()
                        n2 = morph.ligand2.molecule.GetNumAtoms()
                        inv_map = {y: x for x, y in mcs}
                        du2 = [i for i in range(n2) if i not in inv_map.keys()]
                        inv_map = {
                            **{x: n1 + y
                               for x, y in zip(du2, range(n2 - n1))},
                            **inv_map
                        }
                        mcs_smarts = _Chem.MolFromSmarts(dummy_bond_smarts)
                        matches_lig1 = morph.ligand1.molecule.\
                            GetSubstructMatches(mcs_smarts)
                        matches_lig2 = morph.ligand2.molecule.\
                            GetSubstructMatches(mcs_smarts)

                        # here we take care of the index transformation
                        matches_lig2 = [(inv_map[x[0]], inv_map[x[1]])
                                        for x in matches_lig2
                                        if set(x).issubset(inv_map.keys())]
                        matches_total = [*matches_lig1, *matches_lig2]
                        morph_BSS = _BSSwrap.rescaleBondedDummies(
                            morph_BSS, scale_dummy_bonds,
                            {"Merged_Molecule": matches_total})

                    complexes = [self.protein.complex_template + morph_BSS]

                    # solvate and save the prepared complex and morph with the appropriate box size
                    _logging.info("Solvating...")
                    complexes = [
                        _solvate.solvate(complexes[0],
                                         self.params,
                                         box_length=self.box_length_complex,
                                         shell=self.shell,
                                         neutralise=self.neutralise,
                                         ion_conc=self.ion_conc,
                                         centre=self.centre,
                                         work_dir=curdir.path,
                                         filebase="complex")
                    ]
                    morph_sol = _solvate.solvate(
                        morph_BSS,
                        self.params,
                        box_length=self.box_length_morph,
                        shell=self.shell,
                        neutralise=self.neutralise,
                        ion_conc=self.ion_conc,
                        centre=self.centre,
                        work_dir=curdir.path,
                        filebase="morph")

                    # rescale complexes for replica exchange if needed
                    if len(scales) != 1:
                        _logging.info("Creating replicas...")
                        complexes = [
                            _BSSwrap.rescaleSystemParams(
                                complexes[0],
                                scale,
                                includelist=["Merged_Molecule"])
                            for scale in scales
                        ]

                    if store_complexes:
                        self.systems_prep[morph.name] = (morph_sol, complexes)

                if output_files:
                    self.saveSystems({morph.name: (morph_sol, complexes)})
Пример #11
0
def solvate(complex,
            params=None,
            box_length=8,
            shell=0,
            neutralise=True,
            ion_conc=0.154,
            centre=True,
            work_dir=None,
            filebase="complex"):
    """
    Uses gmx solvate and gmx genion to solvate the system and (optionally) add NaCl ions. This function preserves the
    crystal water molecules.

    Parameters
    ----------
    complex : BioSimSpace.System or parmed.structure.Structure
        The input unsolvated system.
    params : ProtoCaller.Parametrise.Params
        The input force field parameters.
    box_length : float, iterable
        Size of the box in nm.
    shell : float
        Places a layer of water of the specified thickness in nm around the solute.
    neutralise : bool
        Whether to add counterions to neutralise the system.
    ion_conc : float
        Ion concentration of NaCl in mol/L.
    centre : bool
        Whether to centre the system.
    work_dir : str
        Work directory. Default: current directory.
    filebase : str
        Output base name of the file.

    Returns
    -------
    complex : BioSimSpace.System or parmed.structure.Structure
        The solvated system.
    """
    if params is None:
        params = _parametrise.Params()

    if isinstance(complex, _pmd.Structure):
        centrefunc = _pmdwrap.centre
        chargefunc = lambda x: round(sum([atom.charge for atom in x.atoms]))
        readfunc = _pmdwrap.openFilesAsParmed
    elif _PC.BIOSIMSPACE and isinstance(complex,
                                        (_BSS._SireWrappers._molecule.Molecule,
                                         _BSS._SireWrappers._system.System)):
        if isinstance(complex, _BSS._SireWrappers._molecule.Molecule):
            complex = complex.toSystem()
        centrefunc = _BSSwrap.centre
        chargefunc = lambda x: round(x.charge().magnitude())
        readfunc = _BSS.IO.readMolecules
    else:
        raise TypeError("Cannot solvate object of type %s" % type(complex))

    if not isinstance(box_length, _Iterable):
        box_length = 3 * [box_length]

    if work_dir is None:
        work_dir = _os.path.basename(_tempfile.TemporaryDirectory().name)
        temp = True
    else:
        temp = False

    with _fileio.Dir(dirname=work_dir, temp=temp):
        # centre
        if centre:
            complex, box_length, _ = centrefunc(complex, box_length)

        # solvate with gmx solvate and load unparametrised waters into memory
        files = _PC.IO.GROMACS.saveAsGromacs(filebase, complex)
        # reloading the complex fixes some problems with ParmEd
        if isinstance(complex, _pmd.Structure):
            complex = _pmdwrap.openFilesAsParmed(files)
        new_gro = filebase + "_solvated.gro"
        command = "{0} solvate -shell {1} -box {2[0]} {2[1]} {2[2]} -cp \"{3}\" -o \"{4}\"".format(
            _PC.GROMACSEXE, shell, box_length, files[1], new_gro)
        if params.water_points == 4:
            command += " -cs tip4p.gro"
        _runexternal.runExternal(command, procname="gmx solvate")
        complex_solvated = _pmd.load_file(new_gro, skip_bonds=True)
        waters = complex_solvated[":SOL"]

        # prepare waters for tleap and parametrise
        for residue in waters.residues:
            residue.name = "WAT"
        for i, atom in enumerate(waters.atoms):
            if "H" in atom.name:
                atom.name = atom.name[0] + atom.name[2]
            elif "O" in atom.name:
                atom.name = "O"
            else:
                atom.name = "EPW"

        # here we only parametrise a single water molecule in order to gain performance
        waters_prep_filenames = [
            filebase + "_waters.top", filebase + "_waters.gro"
        ]
        waters[":1"].save(filebase + "_single_wat.pdb")
        water = _parametrise.parametriseAndLoadPmd(
            params, filebase + "_single_wat.pdb", "water")
        _pmdwrap.saveFilesFromParmed(waters, [waters_prep_filenames[1]],
                                     combine="all")
        _pmdwrap.saveFilesFromParmed(water, [waters_prep_filenames[0]])
        for line in _fileinput.input(waters_prep_filenames[0], inplace=True):
            line_new = line.split()
            if len(line_new) == 2 and line_new == ["WAT", "1"]:
                line = line.replace(
                    "1",
                    "{}".format(len(waters.positions) // params.water_points))
            print(line, end="")
        waters_prep = _pmdwrap.openFilesAsParmed(waters_prep_filenames)

        waters_prep.box = _pmd.load_file(files[1], skip_bonds=True).box
        if any([neutralise, ion_conc, shell]):
            for residue in waters_prep.residues:
                residue.name = "SOL"
        _pmdwrap.saveFilesFromParmed(waters_prep, waters_prep_filenames)

        # add ions
        if any([neutralise, ion_conc, shell]):
            # write an MDP file
            _protocol.Protocol(use_preset="default").write("GROMACS", "ions")

            # neutralise if needed
            charge = chargefunc(complex) if neutralise else 0
            volume = box_length[0] * box_length[1] * box_length[2] * 10**-24
            n_Na, n_Cl = [
                int(volume * 6.022 * 10**23 * ion_conc) - abs(charge) // 2
            ] * 2
            if neutralise:
                if charge < 0:
                    n_Na -= charge
                else:
                    n_Cl += charge

            # add ions with gmx genion
            ions_prep_filenames = [
                filebase + "_ions.top", filebase + "_ions.gro"
            ]
            command = "{0} grompp -f ions.mdp -p {1} -c {2} -o \"{3}_solvated.tpr\"".format(
                _PC.GROMACSEXE, *waters_prep_filenames, filebase)
            _runexternal.runExternal(command, procname="gmx grompp")

            command = "{{ echo 2; }} | {0} genion -s \"{1}_solvated.tpr\" -o \"{2}\" -nn {3} -np {4}".format(
                _PC.GROMACSEXE, filebase, ions_prep_filenames[1], n_Cl, n_Na)
            _runexternal.runExternal(command, procname="gmx genion")

            # prepare waters for tleap
            ions = _pmd.load_file(ions_prep_filenames[1], skip_bonds=True)
            for residue in ions.residues:
                if residue.name == "SOL":
                    residue.name = "WAT"

            # here we only parametrise single ions to gain performance
            ion = ions[":WAT"][":1"] + ions[":NA"][":1"] + ions[":CL"][":1"]
            max_len = len(ion.residues)
            ion.save(filebase + "_single_ion.pdb")
            ion = _parametrise.parametriseAndLoadPmd(
                params, filebase + "_single_ion.pdb", "water")
            _pmdwrap.saveFilesFromParmed(ions, [ions_prep_filenames[1]],
                                         combine="all")
            _pmdwrap.saveFilesFromParmed(ion, [ions_prep_filenames[0]])
            mol_dict = {}
            for line in _fileinput.input(ions_prep_filenames[0], inplace=True):
                line_new = line.split()
                if len(line_new) == 2 and line_new[0] in [
                        "WAT", "NA", "CL"
                ] and line_new[1] == "1":
                    n_mols = len(ions[":{}".format(line_new[0])].positions)
                    if line_new[0] == "WAT":
                        n_mols //= params.water_points
                    line = line.replace("1", "{}".format(n_mols))
                    mol_dict[line_new[0]] = line
                    # preserve the order of water, sodium and chloride
                    if len(mol_dict) == max_len:
                        for x in ["WAT", "NA", "CL"]:
                            if x in mol_dict.keys():
                                print(mol_dict[x], end="")
                        mol_dict = {}
                else:
                    print(line, end="")

            return complex + readfunc(ions_prep_filenames)
        else:
            complex + readfunc(waters_prep_filenames)