Пример #1
0
    def test_chemical_environments_matches_RDK(self):
        """Test Topology.chemical_environment_matches"""
        from simtk.openmm import app

        toolkit_wrapper = RDKitToolkitWrapper()
        pdbfile = app.PDBFile(
            get_data_file_path(
                "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb"))
        # toolkit_wrapper = RDKitToolkitWrapper()
        # molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2',
        #                                                                      'molecules/cyclohexane.mol2')]
        molecules = []
        molecules.append(Molecule.from_smiles("CCO"))
        molecules.append(Molecule.from_smiles("C1CCCCC1"))
        topology = Topology.from_openmm(pdbfile.topology,
                                        unique_molecules=molecules)
        # Count CCO matches
        matches = topology.chemical_environment_matches(
            "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 143
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        matches = topology.chemical_environment_matches(
            "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]",
            toolkit_registry=toolkit_wrapper,
        )
        assert (len(matches) == 1716
                )  # 143 * 12 (there are 12 possible hydrogen mappings)
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        # Search for a substructure that isn't there
        matches = topology.chemical_environment_matches(
            "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 0
Пример #2
0
    def serialise_system(self):
        """Create the OpenMM system; parametrise using frost; serialise the system."""

        # Load the molecule using openforcefield
        pdb_file = app.PDBFile(f'{self.molecule.name}.pdb')

        # Now we need the connection info try using smiles string from rdkit
        rdkit = RDKit()
        molecule = Molecule.from_smiles(
            rdkit.get_smiles(f'{self.molecule.name}.pdb'))

        # Make the openMM system
        omm_topology = pdb_file.topology
        off_topology = Topology.from_openmm(omm_topology,
                                            unique_molecules=[molecule])

        # Load the smirnoff99Frosst force field.
        forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')

        # Parametrize the topology and create an OpenMM System.
        system = forcefield.create_openmm_system(off_topology)

        # Serialise the OpenMM system into the xml file
        with open('serialised.xml', 'w+') as out:
            out.write(XmlSerializer.serializeSystem(system))
Пример #3
0
 def test_chemical_environments_matches_OE(self):
     """Test Topology.chemical_environment_matches"""
     from simtk.openmm import app
     toolkit_wrapper = OpenEyeToolkitWrapper()
     pdbfile = app.PDBFile(
         get_data_file_path(
             'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))
     # toolkit_wrapper = RDKitToolkitWrapper()
     molecules = [
         Molecule.from_file(get_data_file_path(name))
         for name in ('molecules/ethanol.mol2',
                      'molecules/cyclohexane.mol2')
     ]
     topology = Topology.from_openmm(pdbfile.topology,
                                     unique_molecules=molecules)
     # Test for substructure match
     matches = topology.chemical_environment_matches(
         "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
     assert len(matches) == 143
     assert tuple(i.topology_atom_index
                  for i in matches[0]) == (1728, 1729, 1730)
     # Test for whole-molecule match
     matches = topology.chemical_environment_matches(
         "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]",
         toolkit_registry=toolkit_wrapper)
     assert len(
         matches
     ) == 1716  # 143 * 12 (there are 12 possible hydrogen mappings)
     assert tuple(i.topology_atom_index
                  for i in matches[0]) == (1728, 1729, 1730)
     # Search for a substructure that isn't there
     matches = topology.chemical_environment_matches(
         "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
     assert len(matches) == 0
Пример #4
0
    def _execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        pdb_file = app.PDBFile(self.coordinate_file_path)

        force_field_source = ForceFieldSource.from_json(self.force_field_path)

        if not isinstance(force_field_source, SmirnoffForceFieldSource):
            raise ValueError(
                "Only SMIRNOFF force fields are supported by this protocol.")

        force_field = force_field_source.to_force_field()

        unique_molecules = []
        charged_molecules = []

        if self.apply_known_charges:
            charged_molecules = self._generate_known_charged_molecules()

        # Load in any additional, user specified charged molecules.
        for charged_molecule_path in self.charged_molecule_paths:

            charged_molecule = Molecule.from_file(charged_molecule_path,
                                                  "MOL2")
            charged_molecules.append(charged_molecule)

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)

            if molecule is None:
                raise ValueError(
                    f"{component} could not be converted to a Molecule")

            unique_molecules.append(molecule)

        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        if len(charged_molecules) > 0:
            system = force_field.create_openmm_system(
                topology, charge_from_molecules=charged_molecules)
        else:
            system = force_field.create_openmm_system(topology)

        if system is None:

            raise RuntimeError(
                "Failed to create a system from the specified topology and molecules."
            )

        system_xml = openmm.XmlSerializer.serialize(system)
        self.system_path = os.path.join(directory, "system.xml")

        with open(self.system_path, "w") as file:
            file.write(system_xml)
Пример #5
0
    def test_to_from_openmm(self):
        """Test a round-trip OpenFF -> OpenMM -> OpenFF Topology."""
        from simtk.openmm.app import Aromatic

        # Create OpenFF topology with 1 ethanol and 2 benzenes.
        ethanol = Molecule.from_smiles("CCO")
        benzene = Molecule.from_smiles("c1ccccc1")
        off_topology = Topology.from_molecules(
            molecules=[ethanol, benzene, benzene])

        # Convert to OpenMM Topology.
        omm_topology = off_topology.to_openmm()

        # Check that bond orders are preserved.
        n_double_bonds = sum([b.order == 2 for b in omm_topology.bonds()])
        n_aromatic_bonds = sum(
            [b.type is Aromatic for b in omm_topology.bonds()])
        assert n_double_bonds == 6
        assert n_aromatic_bonds == 12

        # Check that there is one residue for each molecule.
        assert omm_topology.getNumResidues() == 3
        assert omm_topology.getNumChains() == 3

        # Convert back to OpenFF Topology.
        off_topology_copy = Topology.from_openmm(
            omm_topology, unique_molecules=[ethanol, benzene])

        # The round-trip OpenFF Topology is identical to the original.
        # The reference molecules are the same.
        assert (off_topology.n_reference_molecules ==
                off_topology_copy.n_reference_molecules)
        reference_molecules_copy = list(off_topology_copy.reference_molecules)
        for ref_mol_idx, ref_mol in enumerate(
                off_topology.reference_molecules):
            assert ref_mol == reference_molecules_copy[ref_mol_idx]

        # The number of topology molecules is the same.
        assert (off_topology.n_topology_molecules ==
                off_topology_copy.n_topology_molecules)

        # Check atoms.
        assert off_topology.n_topology_atoms == off_topology_copy.n_topology_atoms
        for atom_idx, atom in enumerate(off_topology.topology_atoms):
            atom_copy = off_topology_copy.atom(atom_idx)
            assert atom.atomic_number == atom_copy.atomic_number

        # Check bonds.
        for bond_idx, bond in enumerate(off_topology.topology_bonds):
            bond_copy = off_topology_copy.bond(bond_idx)
            bond_atoms = [a.atomic_number for a in bond.atoms]
            bond_atoms_copy = [a.atomic_number for a in bond_copy.atoms]
            assert bond_atoms == bond_atoms_copy
            assert bond.bond_order == bond_copy.bond_order
            assert bond.bond.is_aromatic == bond_copy.bond.is_aromatic
Пример #6
0
def make_off_system(mol, ID):  ##Now over to OpenForceField:
    drug_pdbfile = PDBFile('./processed_data/aligned_drugs/drug_' + str(ID) +
                           '.pdb')
    drug_mol = Molecule.from_smiles(Chem.MolToSmiles(mol))
    off_topology = Topology.from_openmm(openmm_topology=drug_pdbfile.topology,
                                        unique_molecules=[drug_mol])

    #actual parameterizing step:
    drug_system = ff.create_openmm_system(off_topology)

    return drug_system
Пример #7
0
    def test_from_openmm(self):
        """Test creation of an openforcefield Topology object from an OpenMM Topology and component molecules"""
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path(
                'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))

        molecules = [create_ethanol(), create_cyclohexane()]

        topology = Topology.from_openmm(pdbfile.topology,
                                        unique_molecules=molecules)
        assert topology.n_reference_molecules == 2
        assert topology.n_topology_molecules == 239
Пример #8
0
    def test_from_openmm_missing_reference(self):
        """Test creation of an openforcefield Topology object from an OpenMM Topology when missing a unique molecule"""
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path(
                'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))

        molecules = [create_ethanol()]
        with pytest.raises(
                ValueError,
                match='No match found for molecule C6H12') as excinfo:
            topology = Topology.from_openmm(pdbfile.topology,
                                            unique_molecules=molecules)
Пример #9
0
 def test_from_openmm_duplicate_unique_mol(self):
     """Check that a DuplicateUniqueMoleculeError is raised if we try to pass in two indistinguishably unique mols"""
     from simtk.openmm import app
     pdbfile = app.PDBFile(
         get_data_file_path(
             'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))
     molecules = [
         Molecule.from_file(get_data_file_path(name))
         for name in ('molecules/ethanol.mol2',
                      'molecules/ethanol_reordered.mol2',
                      'molecules/cyclohexane.mol2')
     ]
     with self.assertRaises(DuplicateUniqueMoleculeError) as context:
         topology = Topology.from_openmm(pdbfile.topology,
                                         unique_molecules=molecules)
Пример #10
0
    def test_from_openmm_missing_conect(self):
        """
        Test creation of an openforcefield Topology object from an OpenMM Topology
        when the origin PDB lacks CONECT records
        """
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path('systems/test_systems/1_ethanol_no_conect.pdb'))

        molecules = []
        molecules.append(Molecule.from_smiles('CCO'))
        with pytest.raises(
                ValueError,
                match='No match found for molecule C. This would be a '
                'very unusual molecule to try and parameterize, '
                'and it is likely that the data source it was '
                'read from does not contain connectivity '
                'information. If this molecule is coming from '
                'PDB, please ensure that the file contains CONECT '
                'records.') as excinfo:
            topology = Topology.from_openmm(pdbfile.topology,
                                            unique_molecules=molecules)
Пример #11
0
    def execute(self, directory, available_resources):

        import mdtraj

        from openforcefield.topology import Molecule, Topology

        logging.info(f'Calculating the reduced gradient potentials for {self.parameter_key}: {self._id}')

        if len(self.reference_force_field_paths) != 1 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'A single reference force field must be '
                                                         'provided when calculating the reduced '
                                                         'potentials using a subset of the full force')

        if len(self.reference_statistics_path) <= 0 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'The path to the statistics evaluated using '
                                                         'the full force field must be provided.')

        with open(self.force_field_path) as file:
            target_force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(target_force_field_source, SmirnoffForceFieldSource):

            return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                         'this protocol.')

        target_force_field = target_force_field_source.to_force_field()

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        unique_molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)
            unique_molecules.append(molecule)

        pdb_file = app.PDBFile(self.coordinate_file_path)
        topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules)

        # If we are using only a subset of the system object, load in the reference
        # statistics containing the full system energies to correct the output
        # forward and reverse potential energies.
        reference_statistics = None
        subset_energy_corrections = None

        if self.use_subset_of_force_field:
            reference_statistics = StatisticsArray.from_pandas_csv(self.reference_statistics_path)

        # Compute the reduced reference energy if any reference force field files
        # have been provided.
        self.reference_potential_paths = []

        for index, reference_force_field_path in enumerate(self.reference_force_field_paths):

            with open(reference_force_field_path) as file:
                reference_force_field_source = ForceFieldSource.parse_json(file.read())

            if not isinstance(reference_force_field_source, SmirnoffForceFieldSource):
                return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                             'this protocol.')

            reference_force_field = reference_force_field_source.to_force_field()
            reference_system, _ = self._build_reduced_system(reference_force_field, topology)

            reference_potentials_path = path.join(directory, f'reference_{index}.csv')

            self._evaluate_reduced_potential(reference_system, trajectory,
                                             reference_potentials_path,
                                             available_resources)

            self.reference_potential_paths.append(reference_potentials_path)

            if reference_statistics is not None:

                subset_energies = StatisticsArray.from_pandas_csv(reference_potentials_path)
                subset_energy_corrections = (reference_statistics[ObservableType.PotentialEnergy] -
                                             subset_energies[ObservableType.PotentialEnergy])

                subset_energies[ObservableType.PotentialEnergy] = reference_statistics[ObservableType.PotentialEnergy]
                subset_energies.to_pandas_csv(reference_potentials_path)

        # Build the slightly perturbed system.
        reverse_system, reverse_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             -self.perturbation_scale)

        forward_system, forward_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             self.perturbation_scale)

        self.reverse_parameter_value = openmm_quantity_to_pint(reverse_parameter_value)
        self.forward_parameter_value = openmm_quantity_to_pint(forward_parameter_value)

        # Calculate the reduced potentials.
        self.reverse_potentials_path = path.join(directory, 'reverse.csv')
        self.forward_potentials_path = path.join(directory, 'forward.csv')

        self._evaluate_reduced_potential(reverse_system, trajectory, self.reverse_potentials_path,
                                         available_resources, subset_energy_corrections)
        self._evaluate_reduced_potential(forward_system, trajectory, self.forward_potentials_path,
                                         available_resources, subset_energy_corrections)

        logging.info(f'Finished calculating the reduced gradient potentials.')

        return self._get_output_dictionary()
def test_component_combination():
    """Test that a system still yields the same energy after rebuilding it out of its components
    """
    from simtk import openmm
    from .utils import compare_system_energies, get_packmol_pdb_file_path

    # We've had issues where subsequent instances of a molecule might have zero charges
    # Here we'll try to catch this (and also explicitly check the charges) by re-building
    # a system out of its components

    # Create an OpenMM System from mol2 files containing a cyclohexane-ethanol mixture.
    AlkEthOH_offxml_filename = utils.get_data_file_path(
        'test_forcefields/Frosst_AlkEthOH.offxml')
    forcefield = ForceField(AlkEthOH_offxml_filename)
    pdbfile = openmm.app.PDBFile(
        get_packmol_pdb_file_path('cyclohexane_ethanol_0.4_0.6'))
    sdf_file_paths = [
        utils.get_data_file_path(
            os.path.join('systems', 'monomers', name + '.sdf'))
        for name in ('ethanol', 'cyclohexane')
    ]
    molecules = [Molecule.from_file(file_path) for file_path in sdf_file_paths]
    topology = Topology.from_openmm(pdbfile.topology,
                                    unique_molecules=molecules)
    system = forcefield.create_openmm_system(topology)

    # Convert System to a ParmEd Structure
    structure = parmed.openmm.topsystem.load_topology(topology.to_openmm(),
                                                      system,
                                                      pdbfile.positions)

    # Split the Structure into components, then re-compose it out of its components
    tmp = structure.split()
    strs, nums = [], []
    for s, n in tmp:
        strs.append(s)
        nums.append(n)
    nums = [len(n) for n in nums]

    # Re-compose Structure from components
    new_structure = strs[0] * nums[0]
    for idx in range(1, len(nums)):
        new_structure += strs[idx] * nums[idx]
    # Swap in coordinates again
    new_structure.positions = structure.positions

    # Create System
    newsys = new_structure.createSystem(nonbondedMethod=openmm.app.NoCutoff,
                                        constraints=None,
                                        implicitSolvent=None)

    # Cross check energies
    groups0, groups1, energy0, energy1 = compare_system_energies(
        pdbfile.topology,
        pdbfile.topology,
        system,
        newsys,
        pdbfile.positions,
        verbose=False)

    # Also check that that the number of components is equal to the number I expect
    if not len(nums) == 2:
        print("Error: Test system has incorrect number of components.")
        raise Exception(
            'Incorrect number of components in cyclohexane/ethanol test system.'
        )

    # Also check that none of residues have zero charge
    for resnr in range(len(structure.residues)):
        abscharges = [
            abs(structure.residues[resnr].atoms[idx].charge)
            for idx in range(len(structure.residues[resnr].atoms))
        ]
        if sum(abscharges) == 0:
            raise Exception(
                'Error: Residue %s in cyclohexane-ethanol test system has a charge of zero, which is incorrect.'
                % resnr)
Пример #13
0
    def _execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        force_field_source = ForceFieldSource.from_json(self.force_field_path)
        cutoff = pint_quantity_to_openmm(force_field_source.cutoff)

        # Load in the systems topology
        openmm_pdb_file = app.PDBFile(self.coordinate_file_path)

        # Create an OFF topology for better insight into the layout of the system
        # topology.
        unique_molecules = {}

        for component in self.substance:
            unique_molecule = Molecule.from_smiles(component.smiles)
            unique_molecules[unique_molecule.to_smiles()] = unique_molecule

        # Parameterize each component in the system.
        system_templates = {}

        for index, (smiles,
                    unique_molecule) in enumerate(unique_molecules.items()):

            if smiles in ["O", "[H]O[H]", "[H][O][H]"]:

                component_system = self._build_tip3p_system(
                    cutoff,
                    openmm_pdb_file.topology.getUnitCellDimensions(),
                )

            else:

                component_directory = os.path.join(directory, str(index))
                os.makedirs(component_directory, exist_ok=True)

                with temporarily_change_directory(component_directory):

                    component_system = self._parameterize_molecule(
                        unique_molecule, force_field_source, cutoff)

            system_templates[smiles] = component_system

        # Apply the parameters to the topology.
        topology = Topology.from_openmm(openmm_pdb_file.topology,
                                        unique_molecules.values())

        # Create the full system object from the component templates.
        system = self._create_empty_system(cutoff)

        for topology_molecule in topology.topology_molecules:

            smiles = topology_molecule.reference_molecule.to_smiles()
            system_template = system_templates[smiles]

            index_map = {}

            for index, topology_atom in enumerate(topology_molecule.atoms):
                index_map[topology_atom.atom.molecule_particle_index] = index

            # Append the component template to the full system.
            self._append_system(system, system_template, index_map)

        if openmm_pdb_file.topology.getPeriodicBoxVectors() is not None:

            system.setDefaultPeriodicBoxVectors(
                *openmm_pdb_file.topology.getPeriodicBoxVectors())

        # Serialize the system object.
        self.system_path = os.path.join(directory, "system.xml")

        with open(self.system_path, "w") as file:
            file.write(openmm.XmlSerializer.serialize(system))
Пример #14
0
    def _execute(self, directory, available_resources):

        import mdtraj
        from openforcefield.topology import Molecule, Topology

        with open(self.force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, SmirnoffForceFieldSource):

            raise ValueError(
                "Only SMIRNOFF force fields are supported by this protocol.", )

        # Load in the inputs
        force_field = force_field_source.to_force_field()

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        unique_molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)
            unique_molecules.append(molecule)

        pdb_file = app.PDBFile(self.coordinate_file_path)
        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        # Compute the difference between the energies using the reduced force field,
        # and the full force field.
        energy_corrections = None

        if self.use_subset_of_force_field:

            target_system, _ = self._build_reduced_system(
                force_field, topology)

            subset_potentials_path = os.path.join(directory, f"subset.csv")
            subset_potentials = self._evaluate_reduced_potential(
                target_system, trajectory, subset_potentials_path,
                available_resources)

            full_statistics = StatisticsArray.from_pandas_csv(
                self.statistics_path)

            energy_corrections = (
                full_statistics[ObservableType.PotentialEnergy] -
                subset_potentials[ObservableType.PotentialEnergy])

        # Build the slightly perturbed system.
        reverse_system, reverse_parameter_value = self._build_reduced_system(
            force_field, topology, -self.perturbation_scale)

        forward_system, forward_parameter_value = self._build_reduced_system(
            force_field, topology, self.perturbation_scale)

        self.reverse_parameter_value = openmm_quantity_to_pint(
            reverse_parameter_value)
        self.forward_parameter_value = openmm_quantity_to_pint(
            forward_parameter_value)

        # Calculate the reduced potentials.
        self.reverse_potentials_path = os.path.join(directory, "reverse.csv")
        self.forward_potentials_path = os.path.join(directory, "forward.csv")

        self._evaluate_reduced_potential(
            reverse_system,
            trajectory,
            self.reverse_potentials_path,
            available_resources,
            energy_corrections,
        )
        self._evaluate_reduced_potential(
            forward_system,
            trajectory,
            self.forward_potentials_path,
            available_resources,
            energy_corrections,
        )
Пример #15
0
    def execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        logging.info(
            f'Generating a system with tleap for {self.substance.identifier}: {self._id}'
        )

        with open(self.force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, TLeapForceFieldSource):

            return PropertyEstimatorException(
                directory=directory,
                message='Only TLeap force field sources are supported by this '
                'protocol.')

        # Load in the systems coordinates / topology
        openmm_pdb_file = app.PDBFile(self.coordinate_file_path)

        # Create an OFF topology for better insight into the layout of the system topology.
        unique_molecules = [
            Molecule.from_smiles(component.smiles)
            for component in self.substance.components
        ]

        topology = Topology.from_openmm(openmm_pdb_file.topology,
                                        unique_molecules)

        # Find a unique instance of each topology molecule to get the correct
        # atom orderings.
        topology_molecules = dict()

        for topology_molecule in topology.topology_molecules:
            topology_molecules[topology_molecule.reference_molecule.to_smiles(
            )] = topology_molecule

        system_templates = {}

        cutoff = pint_quantity_to_openmm(force_field_source.cutoff)

        for index, (smiles, topology_molecule) in enumerate(
                topology_molecules.items()):

            component_directory = os.path.join(directory, str(index))

            if os.path.isdir(component_directory):
                shutil.rmtree(component_directory)

            os.makedirs(component_directory, exist_ok=True)

            if smiles != 'O' and smiles != '[H]O[H]':

                initial_mol2_name = 'initial.mol2'
                initial_mol2_path = os.path.join(component_directory,
                                                 initial_mol2_name)

                self._topology_molecule_to_mol2(topology_molecule,
                                                initial_mol2_path,
                                                self.charge_backend)
                prmtop_path, _, error = self._run_tleap(
                    force_field_source, initial_mol2_name, component_directory)

                if error is not None:
                    return error

                prmtop_file = openmm.app.AmberPrmtopFile(prmtop_path)

                component_system = prmtop_file.createSystem(
                    nonbondedMethod=app.PME,
                    nonbondedCutoff=cutoff,
                    constraints=app.HBonds,
                    rigidWater=True,
                    removeCMMotion=False)

                if openmm_pdb_file.topology.getPeriodicBoxVectors(
                ) is not None:
                    component_system.setDefaultPeriodicBoxVectors(
                        *openmm_pdb_file.topology.getPeriodicBoxVectors())
            else:

                component_system = self._build_tip3p_system(
                    topology_molecule, cutoff,
                    openmm_pdb_file.topology.getUnitCellDimensions())

            system_templates[
                unique_molecules[index].to_smiles()] = component_system

            with open(os.path.join(component_directory, f'component.xml'),
                      'w') as file:
                file.write(openmm.XmlSerializer.serialize(component_system))

        # Create the full system object from the component templates.
        system = None

        for topology_molecule in topology.topology_molecules:

            system_template = system_templates[
                topology_molecule.reference_molecule.to_smiles()]

            if system is None:

                # If no system has been set up yet, just use the first template.
                system = copy.deepcopy(system_template)
                continue

            # Append the component template to the full system.
            self._append_system(system, system_template)

        # Serialize the system object.
        system_xml = openmm.XmlSerializer.serialize(system)

        self.system_path = os.path.join(directory, 'system.xml')

        with open(self.system_path, 'w') as file:
            file.write(system_xml)

        logging.info(f'System generated: {self.id}')

        return self._get_output_dictionary()
Пример #16
0
    def execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        logging.info('Generating topology: ' + self.id)

        pdb_file = app.PDBFile(self.coordinate_file_path)

        try:

            with open(self.force_field_path) as file:
                force_field_source = ForceFieldSource.parse_json(file.read())

        except Exception as e:

            return PropertyEstimatorException(
                directory=directory,
                message='{} could not load the ForceFieldSource: {}'.format(
                    self.id, e))

        if not isinstance(force_field_source, SmirnoffForceFieldSource):

            return PropertyEstimatorException(
                directory=directory,
                message='Only SMIRNOFF force fields are supported by this '
                'protocol.')

        force_field = force_field_source.to_force_field()

        unique_molecules = []
        charged_molecules = []

        if self.apply_known_charges:
            charged_molecules = self._generate_known_charged_molecules()

        # Load in any additional, user specified charged molecules.
        for charged_molecule_path in self.charged_molecule_paths:

            charged_molecule = Molecule.from_file(charged_molecule_path,
                                                  'MOL2')
            charged_molecules.append(charged_molecule)

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)

            if molecule is None:

                return PropertyEstimatorException(
                    directory=directory,
                    message='{} could not be converted to a Molecule'.format(
                        component))

            unique_molecules.append(molecule)

        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        if len(charged_molecules) > 0:
            system = force_field.create_openmm_system(
                topology, charge_from_molecules=charged_molecules)
        else:
            system = force_field.create_openmm_system(topology)

        if system is None:

            return PropertyEstimatorException(
                directory=directory,
                message='Failed to create a system from the'
                'provided topology and molecules')

        from simtk.openmm import XmlSerializer
        system_xml = XmlSerializer.serialize(system)

        self.system_path = os.path.join(directory, 'system.xml')

        with open(self.system_path, 'wb') as file:
            file.write(system_xml.encode('utf-8'))

        logging.info('Topology generated: ' + self.id)

        return self._get_output_dictionary()
Пример #17
0
    def execute(self, directory, available_resources):

        import mdtraj
        from openforcefield.topology import Molecule, Topology

        logging.info(
            f'Generating a system with LigParGen for {self.substance.identifier}: {self._id}'
        )

        with open(self.force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, LigParGenForceFieldSource):

            return PropertyEstimatorException(
                directory=directory,
                message=
                'Only LigParGen force field sources are supported by this '
                'protocol.')

        # Load in the systems coordinates / topology
        openmm_pdb_file = app.PDBFile(self.coordinate_file_path)

        # Create an OFF topology for better insight into the layout of the system topology.
        unique_molecules = [
            Molecule.from_smiles(component.smiles)
            for component in self.substance.components
        ]

        # Create a dictionary of representative topology molecules for each component.
        topology = Topology.from_openmm(openmm_pdb_file.topology,
                                        unique_molecules)

        # Create the template system objects for each component in the system.
        system_templates = {}

        cutoff = pint_quantity_to_openmm(force_field_source.cutoff)

        for index, component in enumerate(self.substance.components):

            reference_topology_molecule = None

            # Create temporary pdb files for each molecule type in the system, with their constituent
            # atoms ordered in the same way that they would be in the full system.
            topology_molecule = None

            for topology_molecule in topology.topology_molecules:

                if topology_molecule.reference_molecule.to_smiles(
                ) != unique_molecules[index].to_smiles():
                    continue

                reference_topology_molecule = topology_molecule
                break

            if reference_topology_molecule is None or topology_molecule is None:
                return PropertyEstimatorException(
                    'A topology molecule could not be matched to its reference.'
                )

            # Create the force field template using the LigParGen server.
            if component.smiles != 'O' and component.smiles != '[H]O[H]':

                force_field_path = self._parameterize_smiles(
                    component.smiles, force_field_source, directory)

                start_index = reference_topology_molecule.atom_start_topology_index
                end_index = start_index + reference_topology_molecule.n_atoms
                index_range = list(range(start_index, end_index))

                component_pdb_file = mdtraj.load_pdb(self.coordinate_file_path,
                                                     atom_indices=index_range)
                component_topology = component_pdb_file.topology.to_openmm()
                component_topology.setUnitCellDimensions(
                    openmm_pdb_file.topology.getUnitCellDimensions())

                # Create the system object.
                # noinspection PyTypeChecker
                force_field_template = app.ForceField(force_field_path)

                component_system = force_field_template.createSystem(
                    topology=component_topology,
                    nonbondedMethod=app.PME,
                    nonbondedCutoff=cutoff,
                    constraints=app.HBonds,
                    rigidWater=True,
                    removeCMMotion=False)
            else:

                component_system = self._build_tip3p_system(
                    topology_molecule, cutoff,
                    openmm_pdb_file.topology.getUnitCellDimensions())

            system_templates[
                unique_molecules[index].to_smiles()] = component_system

        # Create the full system object from the component templates.
        system = None

        for topology_molecule in topology.topology_molecules:

            system_template = system_templates[
                topology_molecule.reference_molecule.to_smiles()]

            if system is None:

                # If no system has been set up yet, just use the first template.
                system = copy.deepcopy(system_template)
                continue

            # Append the component template to the full system.
            self._append_system(system, system_template)

        # Apply the OPLS mixing rules.
        self._apply_opls_mixing_rules(system)

        # Serialize the system object.
        system_xml = openmm.XmlSerializer.serialize(system)

        self.system_path = os.path.join(directory, 'system.xml')

        with open(self.system_path, 'wb') as file:
            file.write(system_xml.encode('utf-8'))

        logging.info(f'System generated: {self.id}')

        return self._get_output_dictionary()
Пример #18
0
def run_md(molecule, solvent_name="chloroform", confId=0):
    """
    Uses the PARSLEY forcefield to compute molecule ``molecule`` in a cubic box of solvent at STP.

    Details:
        - Particle mesh Ewald summation is used (1 nm cutoff)
        - Periodic boundary conditions are employed
        - Langevin thermostat is employed to regulate temperature
        - Box size is automatically scaled to the desired number of solvent molecules

    Args:
        molecule (openforcefield.topology.Molecule): desired molecule
        solvent_name (str): either ``chloroform`` or ``benzene``, for now
        confId (int): conformer ID for autogenerated molecular conformers, 0 seems fine by default

    Returns:
        Nothing, but ``.csv``, ``.hdf5``, and ``.pdb`` files are generated in the current directory.
    """

    #### Load in the appropriate Molecule object
    off_solute = molecule.to_topology()
    omm_solute = off_solute.to_openmm()
    mdt_solute = mdt.Topology.from_openmm(omm_solute)

    #### Build solvent Molecule object
    solvent, density, mw = None, None, None
    if solvent_name == "chloroform":
        solvent = Molecule.from_smiles("C(Cl)(Cl)Cl")
        density = 1.49
        mw = 119.38
    elif solvent_name == "benzene":
        solvent = Molecule.from_smiles("c1ccccc1")
        density = 0.879
        mw = 78.11
    else:
        raise ValueError(f"Unknown solvent {solvent_name}!")

    solvent.generate_conformers()
    off_solvent = solvent.to_topology()
    omm_solvent = off_solvent.to_openmm()
    mdt_solvent = mdt.Topology.from_openmm(omm_solvent)

    #### Calculate box side length
    num, length = None, None
    if "num" in config:
        num = config["num"]
        assert isinstance(num,
                          int), "Need an integer number of solvent molecules."
        assert num > 0, "Need a positive number of solvent molecules."
        length = (1.6606 * num * mw / density)**(
            1 / 3)  # 1.6606 = 10^24 (Å**3 per mL) divided by Avogadro's number
    elif "length" in config:
        length = config["length"]
        assert isinstance(length, (int, float)), "Need a numeric side length."
        assert length > 0, "Need a positive length."
        num = (length**3) * density / (mw * 1.6606)
        num = int(num)
    else:
        raise ValueError("Need ``length`` or ``num`` in config file!")

    logger.info(
        f"{num} solvent molecules in a cube with {length:.2f} Å sides.")

    #### Write solvent and solute to ``.pdb`` files for PACKMOL
    solute_pdb = "solute.pdb"
    with open(solute_pdb, "w+") as f:
        openmm.app.pdbfile.PDBFile.writeFile(omm_solute,
                                             molecule.conformers[confId], f)

    solvent_pdb = "solvent.pdb"
    with open(solvent_pdb, "w+") as f:
        openmm.app.pdbfile.PDBFile.writeFile(omm_solvent,
                                             solvent.conformers[0], f)

    #### Use ``openmoltools`` Python wrapper for PACKMOL to fill the box appropriately
    mdt_trajectory = pack_box([solute_pdb, solvent_pdb], [1, num],
                              box_size=length)

    #### Convert back to ``openforcefield``
    omm_topology = mdt_trajectory.top.to_openmm()
    length = length / 10  # OpenMM uses nanometers for some stupid reason
    omm_topology.setPeriodicBoxVectors(
        ((length, 0, 0), (0, length, 0), (0, 0, length)))
    off_topology = Topology.from_openmm(omm_topology, [
        Molecule.from_topology(off_solute),
        Molecule.from_topology(off_solvent)
    ])

    logger.info(f"BOX VECTORS: {off_topology.box_vectors}")

    #### Set up the OpenMM system
    forcefield.get_parameter_handler('Electrostatics').method = 'PME'
    system = forcefield.create_openmm_system(off_topology)
    time_step = config["time_step"] * unit.femtoseconds
    temperature = config["temperature"] * unit.kelvin
    friction = 1 / unit.picosecond
    integrator = openmm.LangevinIntegrator(temperature, friction, time_step)

    #### Set up the simulation
    simulation = openmm.app.Simulation(omm_topology, system, integrator)
    logger.info(f"Simulation object created.")
    simulation.context.setPositions(mdt_trajectory.openmm_positions(0))
    logger.info(f"Positions loaded.")

    #    pdb_reporter = openmm.app.PDBReporter('trj.pdb', config["pdb_freq"])
    hdf5_reporter = mdt.reporters.HDF5Reporter('trj.hdf5', config["hdf5_freq"])
    state_data_reporter = openmm.app.StateDataReporter("data.csv",
                                                       config["data_freq"],
                                                       step=True,
                                                       potentialEnergy=True,
                                                       temperature=True,
                                                       density=True)
    #    simulation.reporters.append(pdb_reporter)
    simulation.reporters.append(hdf5_reporter)
    simulation.reporters.append(state_data_reporter)

    logger.info("Using Platform: " +
                simulation.context.getPlatform().getName())

    #### Clean up ``.pdb`` files
    os.remove(solute_pdb)
    os.remove(solvent_pdb)

    logger.info("Minimizing...")
    simulation.minimizeEnergy(maxIterations=25)

    logger.info("Running...")
    w_start = time.time()
    p_start = time.process_time()
    simulation.step(config["num_steps"])
    w_end = time.time()
    p_end = time.process_time()
    logger.info(
        f"Elapsed time {w_end-w_start:.2f} s (CPU: {p_end-p_start:.2f} s)")
    logger.info("Done")
Пример #19
0
    def _get_residue_names_from_role(substances, coordinate_path, role):
        """Returns a list of all of the residue names of
        components which have been assigned a given role.

        Parameters
        ----------
        substances: list of Substance
            The substances which contains the components.
        coordinate_path: str
            The path to the coordinates which describe the systems
            topology.
        role: Substance.ComponentRole
            The role of the component to identify.

        Returns
        -------
        set of str
            The identified residue names.
        """

        from simtk.openmm import app
        from openforcefield.topology import Molecule, Topology

        if role == Substance.ComponentRole.Undefined:
            return 'all'

        unique_molecules = [
            Molecule.from_smiles(component.smiles) for substance in substances
            for component in substance.components
        ]

        openmm_topology = app.PDBFile(coordinate_path).topology
        topology = Topology.from_openmm(openmm_topology, unique_molecules)

        # Determine the smiles of all molecules in the system. We need to use
        # the toolkit to re-generate the smiles as later we will compare these
        # against more toolkit generated smiles.
        components = [
            component for substance in substances
            for component in substance.components if component.role == role
        ]

        component_smiles = [
            Molecule.from_smiles(component.smiles).to_smiles()
            for component in components
        ]

        residue_names = set()

        all_openmm_atoms = list(openmm_topology.atoms())

        # Find the resiude names of the molecules which have the correct
        # role.
        for topology_molecule in topology.topology_molecules:

            molecule_smiles = topology_molecule.reference_molecule.to_smiles()

            if molecule_smiles not in component_smiles:
                continue

            molecule_residue_names = set([
                all_openmm_atoms[
                    topology_atom.topology_atom_index].residue.name
                for topology_atom in topology_molecule.atoms
            ])

            assert len(molecule_residue_names) == 1
            residue_names.update(molecule_residue_names)

        return residue_names
Пример #20
0
    def prepare(self, pbc=False, mmopts={}, **kwargs):

        """
        Prepare the calculation.  Note that we don't create the
        Simulation object yet, because that may depend on MD
        integrator parameters, thermostat, barostat etc.

        This is mostly copied and modified from openmmio.py's OpenMM.prepare(),
        but we are calling ForceField() from the OpenFF toolkit and ignoring
        AMOEBA stuff.
        """
        self.pdb = PDBFile(self.abspdb)

        # Create the OpenFF ForceField object.
        if hasattr(self, 'FF'):
            self.offxml = [self.FF.offxml]
            self.forcefield = self.FF.openff_forcefield
        else:
            self.offxml = listfiles(kwargs.get('offxml'), 'offxml', err=True)
            self.forcefield = OpenFF_ForceField(*self.offxml)

        ## Load mol2 files for smirnoff topology
        openff_mols = []
        for fnm in self.mol2:
            try:
                mol = OffMolecule.from_file(fnm)
            except Exception as e:
                logger.error("Error when loading %s" % fnm)
                raise e
            openff_mols.append(mol)
        self.off_topology = OffTopology.from_openmm(self.pdb.topology, unique_molecules=openff_mols)

        # used in create_simulation()
        self.mod = Modeller(self.pdb.topology, self.pdb.positions)

        ## OpenMM options for setting up the System.
        self.mmopts = dict(mmopts)

        ## Specify frozen atoms and restraint force constant
        if 'restrain_k' in kwargs:
            self.restrain_k = kwargs['restrain_k']
        if 'freeze_atoms' in kwargs:
            self.freeze_atoms = kwargs['freeze_atoms'][:]

        ## Set system options from ForceBalance force field options.
        fftmp = False
        if hasattr(self,'FF'):
            self.mmopts['rigidWater'] = self.FF.rigid_water
            if not all([os.path.exists(f) for f in self.FF.fnms]):
                # If the parameter files don't already exist, create them for the purpose of
                # preparing the engine, but then delete them afterward.
                fftmp = True
                self.FF.make(np.zeros(self.FF.np))

        ## Set system options from periodic boundary conditions.
        self.pbc = pbc
        ## print warning for 'nonbonded_cutoff' keywords
        if 'nonbonded_cutoff' in kwargs:
            logger.warning("nonbonded_cutoff keyword ignored because it's set in the offxml file\n")

        ## Generate OpenMM-compatible positions
        self.xyz_omms = []
        for I in range(len(self.mol)):
            position = self.mol.xyzs[I] * angstrom
            # xyz_omm = [Vec3(i[0],i[1],i[2]) for i in xyz]*angstrom
            # An extra step with adding virtual particles
            # mod = Modeller(self.pdb.topology, xyz_omm)
            # LPW commenting out because we don't have virtual sites yet.
            # mod.addExtraParticles(self.forcefield)
            if self.pbc:
                # Obtain the periodic box
                if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0:
                    logger.error('OpenMM cannot handle nonorthogonal boxes.\n')
                    raise RuntimeError
                box_omm = np.diag([self.mol.boxes[I].a, self.mol.boxes[I].b, self.mol.boxes[I].c]) * angstrom
            else:
                box_omm = None
            # Finally append it to list.
            self.xyz_omms.append((position, box_omm))

        ## Build a topology and atom lists.
        Top = self.pdb.topology
        Atoms = list(Top.atoms())
        Bonds = [(a.index, b.index) for a, b in list(Top.bonds())]

        # vss = [(i, [system.getVirtualSite(i).getParticle(j) for j in range(system.getVirtualSite(i).getNumParticles())]) \
        #            for i in range(system.getNumParticles()) if system.isVirtualSite(i)]
        self.AtomLists = defaultdict(list)
        self.AtomLists['Mass'] = [a.element.mass.value_in_unit(dalton) if a.element is not None else 0 for a in Atoms]
        self.AtomLists['ParticleType'] = ['A' if m >= 1.0 else 'D' for m in self.AtomLists['Mass']]
        self.AtomLists['ResidueNumber'] = [a.residue.index for a in Atoms]
        self.AtomMask = [a == 'A' for a in self.AtomLists['ParticleType']]
        self.realAtomIdxs = [i for i, a in enumerate(self.AtomMask) if a is True]
        if hasattr(self,'FF') and fftmp:
            for f in self.FF.fnms:
                os.unlink(f)
Пример #21
0
def minimize(dat_file,
             lst_angle,
             pdb_dir,
             sdf_dir,
             coor_dir=None,
             xml_dir=None):
    # The simulation configuration
    time_step = 2 * unit.femtoseconds  # simulation timestep
    temperature = 300 * unit.kelvin  # simulation temperature
    friction = 1 / unit.picosecond  # collision rate
    minimize_tolerance = 1e-5 * unit.kilojoule / unit.mole
    minimize_iteration_step = 1000000
    forcefield = ForceField('openff-1.1.1.offxml')

    list_name, list_atoms = read_data(dat_file)
    list_energies = []
    for i, name in enumerate(list_name):
        pdbfile = PDBFile(pdb_dir + '/' + pdb_format.format(name))
        uni_mol = Molecule.from_file(sdf_dir + '/' + sdf_format.format(name))

        list_energy = []

        previous_structure = pdbfile.getPositions()
        for angle in lst_angle:
            # Load the structure

            topo = pdbfile.topology
            topo_ff = Topology.from_openmm(topo, [uni_mol])
            system = forcefield.create_openmm_system(topo_ff)
            restrain_force = make_restrain_torsion(list_atoms[i], float(angle),
                                                   1e6)
            system.addForce(restrain_force)

            integrator = openmm.LangevinIntegrator(temperature, friction,
                                                   time_step)
            simulation = openmm.app.Simulation(topo, system, integrator)
            #positions = pdbfile.getPositions()
            #simulation.context.setPositions(positions)
            simulation.context.setPositions(previous_structure)
            #simulation.context.setVelocitiesToTemperature(temperature)
            simulation.minimizeEnergy(tolerance=minimize_tolerance,
                                      maxIterations=minimize_iteration_step)
            energy_list = extract_energy(simulation)
            sum_energy = 0.0
            for j in range(len(energy_list) - 1):
                sum_energy += energy_list[j]
            list_energy.append([sum_energy, energy_list[-1]])
            previous_structure = simulation.context.getState(
                getPositions=True).getPositions()
            if coor_dir != None:
                write_xyz(
                    previous_structure, topo,
                    coor_dir + '/' + str(name) + '_' + str(angle) + '.xyz')
            if xml_dir != None:
                write_xml(
                    topo,
                    xml_dir + '/' + str(name) + '_' + str(angle) + '.xml')
        list_energies.append(list_energy)

        # Check
        f = open('check.log', 'a')
        f.write(str(i) + '\n')
        f.close()

    return list_energies