def _generate_charges(self, molecule): """Generates a set of partial charges for a molecule using the specified charge backend. Parameters ---------- molecule: openforcefield.topology.Molecule The molecule to assign charges to. """ if self.charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye: from openforcefield.utils.toolkits import OpenEyeToolkitWrapper toolkit_wrapper = OpenEyeToolkitWrapper() elif self.charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools: from openforcefield.utils.toolkits import ( RDKitToolkitWrapper, AmberToolsToolkitWrapper, ToolkitRegistry, ) toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[ RDKitToolkitWrapper, AmberToolsToolkitWrapper ]) else: raise ValueError(f"Invalid toolkit specification.") molecule.generate_conformers(toolkit_registry=toolkit_wrapper) molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_wrapper)
def test_compute_partial_charges_net_charge(self): """Test OpenEyeToolkitWrapper compute_partial_charges() on a molecule with a net +1 charge""" toolkit_registry = ToolkitRegistry( toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper]) smiles = '[H]C([H])([H])[N+]([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_registry) molecule.generate_conformers(toolkit_registry=toolkit_registry) with pytest.raises(NotImplementedError) as excinfo: charge_model = 'notARealChargeModel' molecule.compute_partial_charges(toolkit_registry=toolkit_registry ) #, charge_model=charge_model) # TODO: Figure out why ['cm1', 'cm2'] fail for charge_model in ['gas', 'mul', 'bcc']: with pytest.raises(NotImplementedError) as excinfo: molecule.compute_partial_charges( toolkit_registry=toolkit_registry ) #, charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert 0.99 * unit.elementary_charge < charge_sum < 1.01 * unit.elementary_charge # For now, I'm just testing AM1-BCC (will test more when the SMIRNOFF spec for other charges is finalized) molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_registry) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert 0.999 * unit.elementary_charge < charge_sum < 1.001 * unit.elementary_charge
def test_compute_partial_charges(self): """Test OpenEyeToolkitWrapper compute_partial_charges()""" toolkit_registry = ToolkitRegistry( toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper]) smiles = '[H]C([H])([H])C([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_registry) molecule.generate_conformers(toolkit_registry=toolkit_registry) # TODO: Implementation of these tests is pending a decision on the API for our charge model with pytest.raises(NotImplementedError) as excinfo: charge_model = 'notARealChargeModel' molecule.compute_partial_charges(toolkit_registry=toolkit_registry ) #, charge_model=charge_model) # ['cm1', 'cm2'] for charge_model in ['gas', 'mul', 'bcc']: with pytest.raises(NotImplementedError) as excinfo: molecule.compute_partial_charges( toolkit_registry=toolkit_registry ) #, charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert charge_sum < 0.01 * unit.elementary_charge # For now, just test AM1-BCC while the SMIRNOFF spec for other charge models gets worked out molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_registry) # , charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert charge_sum < 0.002 * unit.elementary_charge
def test_register_rdkit(self): """Test creation of toolkit registry with RDKit toolkit""" # Test registration of RDKitToolkitWrapper toolkit_precedence = [RDKitToolkitWrapper] registry = ToolkitRegistry(toolkit_precedence=toolkit_precedence, register_imported_toolkit_wrappers=False) #registry.register_toolkit(RDKitToolkitWrapper) assert set([type(c) for c in registry.registered_toolkits ]) == set([RDKitToolkitWrapper]) # Test ToolkitRegistry.resolve() assert registry.resolve( 'to_smiles') == registry.registered_toolkits[0].to_smiles # Test ToolkitRegistry.call() smiles = '[H][C]([H])([H])[C]([H])([H])[H]' molecule = registry.call('from_smiles', smiles) smiles2 = registry.call('to_smiles', molecule) assert smiles == smiles2
def make_registry(toolkit: str) -> ToolkitRegistry: if toolkit.lower() == "openeye": from openforcefield.utils.toolkits import OpenEyeToolkitWrapper toolkit_registry = ToolkitRegistry( toolkit_precedence=[OpenEyeToolkitWrapper]) elif toolkit.lower() == "rdkit": from openforcefield.utils.toolkits import RDKitToolkitWrapper toolkit_registry = ToolkitRegistry( toolkit_precedence=[RDKitToolkitWrapper]) else: from openff.cli.utils.exceptions import UnsupportedToolkitError raise UnsupportedToolkitError(toolkit=toolkit) # Checks later assume that this is length 1. This should be changed if # multiple toolkits (i.e. RDKit and AmberTools) are needed at once assert len(toolkit_registry.registered_toolkit_versions) == 1 return toolkit_registry
def test_register_ambertools(self): """Test creation of toolkit registry with AmberToolsToolkitWrapper and RDKitToolkitWrapper """ # Test registration of AmberToolsToolkitWrapper toolkit_precedence = [AmberToolsToolkitWrapper, RDKitToolkitWrapper] registry = ToolkitRegistry(toolkit_precedence=toolkit_precedence, register_imported_toolkit_wrappers=False) #registry.register_toolkit(AmberToolsToolkitWrapper) assert set([type(c) for c in registry.registered_toolkits ]) == set([AmberToolsToolkitWrapper, RDKitToolkitWrapper]) # Test ToolkitRegistry.resolve() registry.resolve('compute_partial_charges') assert registry.resolve( 'compute_partial_charges' ) == registry.registered_toolkits[0].compute_partial_charges # Test ToolkitRegistry.call() registry.register_toolkit(RDKitToolkitWrapper) smiles = '[H]C([H])([H])C([H])([H])[H]' molecule = registry.call('from_smiles', smiles)
def get_conformer_energies( molecule: str, registry: ToolkitRegistry, forcefield: str, constrained: bool = False, ) -> List[Molecule]: _enforce_dependency_version("openforcefield", "0.7.0") file_format = molecule.split(".")[-1] loaded_molecules = registry.call( "from_file", molecule, file_format=file_format, ) if type(loaded_molecules) is not list: loaded_molecules = [loaded_molecules] mols = [loaded_molecules[0]] for mol in loaded_molecules[1:]: if mol == mols[-1]: for conformer in mol.conformers: mols[-1].add_conformer(conformer) else: mols.append(molecule) n_molecules = len(mols) n_conformers = sum([mol.n_conformers for mol in mols]) print( f"{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers" ) ff = _get_forcefield(forcefield, constrained) mols_with_charges = [] for mol in mols: if mol.partial_charges is not None: mols_with_charges.append(mol) # This is duplicated from generate_conformers minimized_mols = [] for mol in mols: if mol in mols_with_charges: mol_with_charge = [mol] else: mol_with_charge = [] simulation, partial_charges = _build_simulation( molecule=mol, forcefield=ff, mols_with_charge=mol_with_charge, ) mol._partial_charges = partial_charges mol.properties["minimized against: "] = forcefield conformer_property_keys = [ "original conformer energies (kcal/mol)", "minimized conformer energies (kcal/mol)", "RMSD of minimized conformers (angstrom)", ] for prop in conformer_property_keys: mol.properties[prop] = mol.n_conformers * [None] for i, conformer in enumerate(mol.conformers): simulation.context.setPositions(conformer) pre_energy, pre_positions = _get_conformer_data(simulation) mol.properties["original conformer energies (kcal/mol)"][ i] = pre_energy simulation = _minimize_conformer(simulation, conformer) min_energy, min_positions = _get_conformer_data(simulation) mol.properties["minimized conformer energies (kcal/mol)"][ i] = min_energy mol.conformers[i] = min_positions rms = _get_rms_two_conformers(mol, pre_positions, min_positions) mol.properties["RMSD of minimized conformers (angstrom)"][i] = rms minimized_mols.append(mol) return minimized_mols
def _topology_molecule_to_mol2(topology_molecule, file_name, charge_backend): """Converts an `openforcefield.topology.TopologyMolecule` into a mol2 file, generating a conformer and AM1BCC charges in the process. .. todo :: This function uses non-public methods from the Open Force Field toolkit and should be refactored when public methods become available Parameters ---------- topology_molecule: openforcefield.topology.TopologyMolecule The `TopologyMolecule` to write out as a mol2 file. The atom ordering in this mol2 will be consistent with the topology ordering. file_name: str The filename to write to. charge_backend: BuildTLeapSystem.ChargeBackend The backend to use for conformer generation and partial charge calculation. """ from openforcefield.topology import Molecule from simtk import unit as simtk_unit # Make a copy of the reference molecule so we can run conf gen / charge calc without modifying the original reference_molecule = copy.deepcopy( topology_molecule.reference_molecule) if charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye: from openforcefield.utils.toolkits import OpenEyeToolkitWrapper toolkit_wrapper = OpenEyeToolkitWrapper() reference_molecule.generate_conformers( toolkit_registry=toolkit_wrapper) reference_molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_wrapper) elif charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools: from openforcefield.utils.toolkits import RDKitToolkitWrapper, AmberToolsToolkitWrapper, ToolkitRegistry toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[ RDKitToolkitWrapper, AmberToolsToolkitWrapper ]) reference_molecule.generate_conformers( toolkit_registry=toolkit_wrapper) reference_molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_wrapper) else: raise ValueError(f'Invalid toolkit specification.') # Get access to the parent topology, so we can look up the topology atom indices later. topology = topology_molecule.topology # Make and populate a new openforcefield.topology.Molecule new_molecule = Molecule() new_molecule.name = reference_molecule.name # Add atoms to the new molecule in the correct order for topology_atom in topology_molecule.atoms: # Force the topology to cache the topology molecule start indices topology.atom(topology_atom.topology_atom_index) new_molecule.add_atom(topology_atom.atom.atomic_number, topology_atom.atom.formal_charge, topology_atom.atom.is_aromatic, topology_atom.atom.stereochemistry, topology_atom.atom.name) # Add bonds to the new molecule for topology_bond in topology_molecule.bonds: # This is a temporary workaround to figure out what the "local" atom index of # these atoms is. In other words it is the offset we need to apply to get the # index if this were the only molecule in the whole Topology. We need to apply # this offset because `new_molecule` begins its atom indexing at 0, not the # real topology atom index (which we do know). index_offset = topology_molecule._atom_start_topology_index # Convert the `.atoms` generator into a list so we can access it by index topology_atoms = list(topology_bond.atoms) new_molecule.add_bond( topology_atoms[0].topology_atom_index - index_offset, topology_atoms[1].topology_atom_index - index_offset, topology_bond.bond.bond_order, topology_bond.bond.is_aromatic, topology_bond.bond.stereochemistry, ) # Transfer over existing conformers and partial charges, accounting for the # reference/topology indexing differences new_conformers = np.zeros((reference_molecule.n_atoms, 3)) new_charges = np.zeros(reference_molecule.n_atoms) # Then iterate over the reference atoms, mapping their indices to the topology # molecule's indexing system for reference_atom_index in range(reference_molecule.n_atoms): # We don't need to apply the offset here, since _ref_to_top_index is # already "locally" indexed for this topology molecule local_top_index = topology_molecule._ref_to_top_index[ reference_atom_index] new_conformers[local_top_index, :] = reference_molecule.conformers[ 0][reference_atom_index].value_in_unit(simtk_unit.angstrom) new_charges[local_top_index] = reference_molecule.partial_charges[ reference_atom_index].value_in_unit( simtk_unit.elementary_charge) # Reattach the units new_molecule.add_conformer(new_conformers * simtk_unit.angstrom) new_molecule.partial_charges = new_charges * simtk_unit.elementary_charge # Write the molecule new_molecule.to_file(file_name, file_format='mol2')
def generate_conformers( molecule: str, registry: ToolkitRegistry, forcefield: str, constrained: bool = False, prefix: Optional[str] = None, ) -> List[Molecule]: _enforce_dependency_version("openforcefield", "0.7.1.") ff = _get_forcefield(forcefield, constrained) file_format = molecule.split(".")[-1] # TODO: This may not preserve order of loading molecules in ambiguous_stereochemistry = False try: raw_mols = registry.call( "from_file", molecule, file_format=file_format, ) except UndefinedStereochemistryError: ambiguous_stereochemistry = True raw_mols = registry.call( "from_file", molecule, file_format=file_format, allow_undefined_stereo=True, ) # When failing to parse molecules (i.e. attempting to read MOL2 with # RDKit, which is not supported) the toolkit can return an empty # list instead of raising a specific exception if raw_mols == []: from openff.cli.utils.exceptions import MoleculeParsingError raise MoleculeParsingError(toolkit_registry=registry, filename=molecule) mols = [] for i, mol in enumerate(raw_mols): if prefix is not None: mol.name = prefix elif not mol.name: mol.name = "molecule" if len(raw_mols) > 1: mol.name += str(i) mols.append(mol) mols = _collapse_conformers(mols) # TODO: How to handle names of different stereoisomers? Just act like they're different conformers? if ambiguous_stereochemistry: mols_with_unpacked_stereoisomers = [] for mol in mols: # TODO: This is a brute-force approach, it would be better to check stereo # without needing to call enumerate_stereoisomers stereoisomers = mol.enumerate_stereoisomers() if stereoisomers: for i, iso in enumerate(stereoisomers): iso.name = mol.name + "_stereoisomer" + str(i) mols_with_unpacked_stereoisomers.append(iso) else: mols_with_unpacked_stereoisomers.append(mol) mols = mols_with_unpacked_stereoisomers for mol in mols: existing_conf = None if mol.conformers is not None: existing_conf = deepcopy(mol.conformers[0]) mol.generate_conformers( toolkit_registry=registry, n_conformers=100, rms_cutoff=0.25 * unit.angstrom, ) if existing_conf is not None: mol.add_conformer(existing_conf) # TODO: What happens if some molecules in a multi-molecule file have charges, others don't? mols_with_charges = [] for mol in mols: if mol.partial_charges is not None: mols_with_charges.append(mol) mols_out = [] for mol in mols: if mol in mols_with_charges: mol_with_charge = [mol] else: mol_with_charge = [] simulation, partial_charges = _build_simulation( molecule=mol, forcefield=ff, mols_with_charge=mol_with_charge, ) mol._partial_charges = partial_charges for i, conformer in enumerate(mol.conformers): simulation = _minimize_conformer(simulation, conformer) energy, positions = _get_conformer_data(simulation) mol = _reconstruct_mol_from_conformer(mol, positions) _add_metadata_to_mol(mol, energy, registry, forcefield) mols_out.append(mol) mols_out = _sort_mols(mols_out) return mols_out