def test_optimized_only(self): mols = list( smu_utils_lib.conformer_to_molecules( self.conformer, include_initial_geometries=False, include_optimized_geometry=True, include_all_bond_topologies=False)) self.assertLen(mols, 1) self.assertEqual( mols[0].GetProp('_Name'), 'SMU 618451001 bt=618451(0/2) geom=opt', ) self.assertEqual( '[H]C(F)=C(OC([H])([H])[H])OC([H])([H])[H]', Chem.MolToSmiles(mols[0], kekuleSmiles=True, isomericSmiles=False)) # This is just two random atoms I picked from the .dat file and converted to # angstroms instead of bohr. self.assertEqual('C', mols[0].GetAtomWithIdx(1).GetSymbol()) np.testing.assert_allclose([0.540254, -3.465543, 3.456982], list(mols[0].GetConformer().GetAtomPosition(1)), atol=1e-6) self.assertEqual('H', mols[0].GetAtomWithIdx(13).GetSymbol()) np.testing.assert_allclose([2.135153, -1.817366, 0.226376], list(mols[0].GetConformer().GetAtomPosition(13)), atol=1e-6)
def test_initial_only(self): mols = list( smu_utils_lib.conformer_to_molecules( self.conformer, include_initial_geometries=True, include_optimized_geometry=False, include_all_bond_topologies=False)) self.assertLen(mols, 2) self.assertEqual([m.GetProp('_Name') for m in mols], [ 'SMU 618451001 bt=618451(0/2) geom=init(0/2)', 'SMU 618451001 bt=618451(0/2) geom=init(1/2)', ]) # This is just one random atom I picked from the .dat file and converted to # angstroms instead of bohr. self.assertEqual('C', mols[0].GetAtomWithIdx(1).GetSymbol()) np.testing.assert_allclose( [0.6643, -3.470301, 3.4766], list(mols[0].GetConformer().GetAtomPosition(1)), atol=1e-6) self.assertEqual('C', mols[1].GetAtomWithIdx(1).GetSymbol()) np.testing.assert_allclose( [664.299998, -3470.300473, 3476.600215], list(mols[1].GetConformer().GetAtomPosition(1)), atol=1e-6)
def write(self, conformer): """Writes a Conformer. Args: conformer: dataset_pb2.Conformer """ for mol in smu_utils_lib.conformer_to_molecules( conformer, include_initial_geometries=self.init_geometry, include_optimized_geometry=self.opt_geometry, include_all_bond_topologies=True): self.writer.write(mol)
def test_all_outputs(self): mols = list(smu_utils_lib.conformer_to_molecules(self.conformer)) self.assertLen(mols, 6) # 2 bond topologies * (1 opt geom + 2 init_geom) self.assertEqual([m.GetProp('_Name') for m in mols], [ 'SMU 618451001 bt=618451(0/2) geom=init(0/2)', 'SMU 618451001 bt=618451(0/2) geom=init(1/2)', 'SMU 618451001 bt=618451(0/2) geom=opt', 'SMU 618451001 bt=99999(1/2) geom=init(0/2)', 'SMU 618451001 bt=99999(1/2) geom=init(1/2)', 'SMU 618451001 bt=99999(1/2) geom=opt' ]) self.assertEqual( '[H]C(F)=C(OC([H])([H])[H])OC([H])([H])[H]', Chem.MolToSmiles(mols[0], kekuleSmiles=True, isomericSmiles=False)) self.assertEqual( '[H]C(F)=C(OC([H])([H])[H])OC([H])([H])[H]', Chem.MolToSmiles(mols[4], kekuleSmiles=True, isomericSmiles=False))
db = smu_sqlite.SMUSQLite('20220104_standard.sqlite', 'r') # We will look at one conformer that illustrates the complexities of # converting a conformer to molecules(s). Not all conformers will have # this level of complexity. conformer = db.find_by_conformer_id(8240001) # This RDKit object will be used to write SDF files to stdout to illustrate # the example. writer = Chem.SDWriter(sys.stdout) # We'll start with the simplest case that always generates a single molcule. case0_mols = list( smu_utils_lib.conformer_to_molecules(conformer, include_initial_geometries=False, include_optimized_geometry=True, include_all_bond_topologies=False)) assert len(case0_mols) == 1 # Note the three "include" arguments above # * include_initial_geometries: means to generate molecules for all the # initial geometries. There will always be at least one initial # geometry, but there can be many. # * include_optimized_geometry: means to include output for the # (single) optimized geometry. # * include_all_bond_topologies: False means to use only the first # (i.e. best matching) bond topology that fits this geometry. True # means to generate separate rdkit molcules for each topology. print( 'A single molecule comes from asking for only the optimized geometry and only',