def test_match_conformers(self): matches = MoleculeStore._match_conformers( "[Cl:1][H:2]", db_conformers=[ DBConformerRecord( coordinates=numpy.array([[-1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]) ), DBConformerRecord( coordinates=numpy.array([[-2.0, 0.0, 0.0], [2.0, 0.0, 0.0]]) ), ], conformers=[ ConformerRecord( coordinates=numpy.array([[0.0, -2.0, 0.0], [0.0, 2.0, 0.0]]), partial_charges=[], bond_orders=[], ), ConformerRecord( coordinates=numpy.array([[0.0, -2.0, 0.0], [0.0, 3.0, 0.0]]), partial_charges=[], bond_orders=[], ), ConformerRecord( coordinates=numpy.array([[0.0, 0.0, 0.0], [-2.0, 0.0, 0.0]]), partial_charges=[], bond_orders=[], ), ], ) assert matches == {0: 1, 2: 0}
def test_average_partial_charges(self): record = MoleculeRecord( smiles="[C:1]([H:2])([H:3])([H:4])", conformers=[ ConformerRecord( coordinates=numpy.ones((4, 3)), partial_charges=[ PartialChargeSet(method="am1", values=[0.1, 0.2, 0.3, 0.4]), ], ), ConformerRecord( coordinates=numpy.zeros((4, 3)), partial_charges=[ PartialChargeSet(method="am1", values=[0.3, 0.4, 0.5, 0.6]), ], ), ], ) average_charges = record.average_partial_charges("am1") assert isinstance(average_charges, tuple) assert len(average_charges) == 4 assert numpy.allclose(average_charges, (0.2, 0.3, 0.4, 0.5))
def test_store_bond_order_data(self, tmp_path): store = MoleculeStore(f"{tmp_path}.sqlite") store.store( MoleculeRecord( smiles="[Cl:1][H:2]", conformers=[ ConformerRecord( coordinates=numpy.arange(6).reshape((2, 3)), bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 0.5)]) ], ) ], ) ) assert len(store) == 1 with pytest.raises( RuntimeError, match=re.escape("am1 WBOs already stored for [Cl:1][H:2]") ): store.store( MoleculeRecord( smiles="[Cl:2][H:1]", conformers=[ ConformerRecord( coordinates=numpy.arange(6).reshape((2, 3)), bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 0.5)]) ], ) ], ) ) store.store( MoleculeRecord( smiles="[Cl:2][H:1]", conformers=[ ConformerRecord( coordinates=numpy.zeros((2, 3)), bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 0.5)]) ], ) ], ) ) assert len(store) == 1 assert {*store.wbo_methods} == {"am1"} record = store.retrieve()[0] assert len(record.conformers) == 2
def test_reorder(self): original_coordinates = numpy.arange(6).reshape((2, 3)) original_record = MoleculeRecord( smiles="[Cl:2][H:1]", conformers=[ ConformerRecord( coordinates=original_coordinates, partial_charges=[PartialChargeSet(method="am1", values=[0.5, 1.5])], bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 0.2)]) ], ) ], ) reordered_record = original_record.reorder("[Cl:1][H:2]") assert reordered_record.smiles == "[Cl:1][H:2]" reordered_conformer = reordered_record.conformers[0] assert numpy.allclose( reordered_conformer.coordinates, numpy.flipud(original_coordinates) ) assert numpy.allclose(reordered_conformer.partial_charges[0].values, [1.5, 0.5]) assert numpy.allclose(reordered_conformer.bond_orders[0].values, [(1, 0, 0.2)])
def test_validate_coordinates(self, value, expected_raises): with expected_raises: record = ConformerRecord(coordinates=value) assert isinstance(record.coordinates, numpy.ndarray) assert record.coordinates.flags.writeable is False
def test_bond_orders_by_method(self): record = ConformerRecord( coordinates=numpy.ones((2, 3)), bond_orders=[WibergBondOrderSet(method="am1", values=[(0, 1, 0.1)])], ) assert record.bond_orders_by_method == {"am1": ((0, 1, 0.1),)}
def tmp_molecule_store(tmp_path) -> MoleculeStore: store = MoleculeStore(f"{tmp_path}.sqlite") expected_records = [ MoleculeRecord( smiles="[Ar:1]", conformers=[ ConformerRecord( coordinates=numpy.array([[0.0, 0.0, 0.0]]), partial_charges=[PartialChargeSet(method="am1", values=[0.5])], bond_orders=[], ) ], ), MoleculeRecord( smiles="[He:1]", conformers=[ ConformerRecord( coordinates=numpy.array([[0.0, 0.0, 0.0]]), partial_charges=[PartialChargeSet(method="am1bcc", values=[-0.5])], bond_orders=[], ) ], ), MoleculeRecord( smiles="[Cl:1][Cl:2]", conformers=[ ConformerRecord( coordinates=numpy.array([[-1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]), partial_charges=[ PartialChargeSet(method="am1", values=[0.5, -0.5]), PartialChargeSet(method="am1bcc", values=[0.75, -0.75]), ], bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 1.2)]) ], ) ], ), ] store.store(*expected_records) return store
def test_validate_partial_charges(self, value, expected_raises): with expected_raises: record = ConformerRecord( coordinates=numpy.ones((2, 3)), partial_charges=value ) assert isinstance(record.partial_charges, tuple) assert len(record.partial_charges) == len(value)
def test_partial_charges_by_method(self): record = ConformerRecord( coordinates=numpy.ones((4, 3)), partial_charges=[ PartialChargeSet(method="am1", values=[0.1, 0.2, 0.3, 0.4]), PartialChargeSet(method="am1bcc", values=[1.0, 2.0, 3.0, 4.0]), ], ) assert record.partial_charges_by_method == { "am1": (0.1, 0.2, 0.3, 0.4), "am1bcc": (1.0, 2.0, 3.0, 4.0), }
def test_data_set_from_molecule_stores(tmpdir): molecule_store = MoleculeStore(os.path.join(tmpdir, "store.sqlite")) molecule_store.store( MoleculeRecord( smiles="[Cl:1]-[H:2]", conformers=[ ConformerRecord( coordinates=numpy.array([[-1.0, 0.0, 0.0], [1.0, 0.0, 0.0]]), partial_charges=[ PartialChargeSet(method="am1", values=[0.1, -0.1]) ], bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 1.1)]) ], ) ], )) data_set = DGLMoleculeDataset.from_molecule_stores(molecule_store, "am1", "am1", [AtomConnectivity()], [BondIsInRing()]) assert len(data_set) == 1 assert data_set.n_features == 4 dgl_molecule, labels = data_set[0] assert isinstance(dgl_molecule, DGLMolecule) assert dgl_molecule.n_atoms == 2 assert "am1-charges" in labels assert labels["am1-charges"].numpy().shape == (2, ) assert "am1-wbo" in labels assert labels["am1-wbo"].numpy().shape == (1, )
def mock_data_store(self, tmpdir) -> str: store_path = os.path.join(tmpdir, "store.sqlite") store = MoleculeStore(store_path) store.store( MoleculeRecord( smiles="[Cl:1][Cl:2]", conformers=[ ConformerRecord( coordinates=numpy.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]), partial_charges=[ PartialChargeSet(method="am1bcc", values=[1.0, -1.0]) ], bond_orders=[ WibergBondOrderSet(method="am1", values=[(0, 1, 1.0)]) ], ) ], )) return store_path
def label_molecule( molecule: Union[str, "Molecule"], guess_stereochemistry: bool, partial_charge_methods: List[ChargeMethod], bond_order_methods: List[WBOMethod], n_conformers: int = 500, rms_cutoff: float = 0.05, ) -> MoleculeRecord: """Computes sets of partial charges and bond orders for an input molecule. Notes: Conformers will be pruned using the ELF10 method provided by the OpenFF toolkit Args: molecule: The molecule (or SMILES representation of the molecule) to label. guess_stereochemistry: Whether to guess the stereochemistry of the SMILES representation of the molecule if provided and if the stereochemistry of some atoms / bonds is not fully defined. partial_charge_methods: The methods to compute the partial charges using. bond_order_methods: The methods to compute the bond orders using. n_conformers: The *maximum* number of conformers to compute partial charge and bond orders using. rms_cutoff: The RMS cutoff [Å] to use when generating the conformers. Returns: The labelled molecule stored in a record object """ from simtk import unit if isinstance(molecule, str): molecule = smiles_to_molecule( molecule, guess_stereochemistry=guess_stereochemistry) else: molecule = copy.deepcopy(molecule) # Generate a diverse set of ELF10 conformers molecule.generate_conformers(n_conformers=n_conformers, rms_cutoff=rms_cutoff * unit.angstrom) molecule.apply_elf_conformer_selection() conformer_records = [] for conformer in molecule.conformers: charge_sets = [] for charge_method in partial_charge_methods: molecule.assign_partial_charges( _OPENFF_CHARGE_METHODS[charge_method], use_conformers=[conformer]) charge_sets.append( PartialChargeSet( method=charge_method, values=[ atom.partial_charge.value_in_unit( unit.elementary_charge) for atom in molecule.atoms ], )) bond_order_sets = [] for bond_order_method in bond_order_methods: molecule.assign_fractional_bond_orders( _OPENFF_WBO_METHODS[bond_order_method], use_conformers=[conformer]) bond_order_sets.append( WibergBondOrderSet( method=bond_order_method, values=[( bond.atom1_index, bond.atom2_index, bond.fractional_bond_order, ) for bond in molecule.bonds], )) conformer_records.append( ConformerRecord( coordinates=conformer.value_in_unit(unit.angstrom), partial_charges=charge_sets, bond_orders=bond_order_sets, )) return MoleculeRecord( smiles=molecule.to_smiles(isomeric=True, mapped=True), conformers=conformer_records, )
def test_store_partial_charge_data(self, tmp_path): store = MoleculeStore(f"{tmp_path}.sqlite") store.store( MoleculeRecord( smiles="[Cl:1][H:2]", conformers=[ ConformerRecord( coordinates=numpy.arange(6).reshape((2, 3)), partial_charges=[ PartialChargeSet(method="am1", values=[0.50, 1.50]) ], ) ], ) ) assert len(store) == 1 store.store( MoleculeRecord( smiles="[Cl:2][H:1]", conformers=[ ConformerRecord( coordinates=numpy.flipud(numpy.arange(6).reshape((2, 3))), partial_charges=[ PartialChargeSet(method="am1bcc", values=[0.25, 0.75]) ], ) ], ) ) assert len(store) == 1 assert {*store.charge_methods} == {"am1", "am1bcc"} record = store.retrieve()[0] assert len(record.conformers) == 1 with pytest.raises( RuntimeError, match=re.escape("am1bcc charges already stored for [Cl:1][H:2]"), ): store.store( MoleculeRecord( smiles="[Cl:2][H:1]", conformers=[ ConformerRecord( coordinates=numpy.arange(6).reshape((2, 3)), partial_charges=[ PartialChargeSet(method="am1bcc", values=[0.25, 0.75]) ], ) ], ) ) assert len(store) == 1 assert {*store.charge_methods} == {"am1", "am1bcc"} record = store.retrieve()[0] assert len(record.conformers) == 1
def test_validate_bond_orders(self, value, expected_raises): with expected_raises: record = ConformerRecord(coordinates=numpy.ones((2, 3)), bond_orders=value) assert isinstance(record.bond_orders, tuple) assert len(record.bond_orders) == len(value)