def test_convertToDataFrame(self): sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf") lib = SmallMolLib(sdffile) df = lib.toDataFrame() self.assertIsInstance( df, core.frame.DataFrame, msg="The SmallMolLib object was not correctly converted into pandas" "DataFrame", ) cols = df.columns.tolist() ref_cols = SDF_FIELDS self.assertEqual( cols, ref_cols, msg="The fields in the SmallMolLib object was not the expected one", ) ligname_99 = df.iloc[99][0] ref_ligname = SDF_LOC_0_99 self.assertEqual(ligname_99, ref_ligname, msg="The ligand name found is not the expected one")
def test_readSmiles(self): smifile = os.path.join(self.dataDir, "fda_drugs_light.smi") lib = SmallMolLib(smifile) assert len(lib) == 100 smifile = os.path.join(self.dataDir, "fda_drugs_light.smi.gz") lib = SmallMolLib(smifile) assert len(lib) == 100
def test_appendSmallMolLib(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) lib2 = SmallMolLib(sdffile) lib.appendSmallLib(lib2) n_mol2_merged = lib.numMols self.assertEqual( n_mol2_merged, SDF_N_MOLS * 2, msg="The number of molecules in the SmallMolLib is not as expected." "The two sdf were not correctly merged. ")
def test_loaSdffile(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) n_mols = lib.numMols self.assertEqual( n_mols, SDF_N_MOLS, 'Molecules not correctly loaded. ' 'Expected: {}; Now: {}'.format(SDF_N_MOLS, n_mols)) sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf.gz') lib = SmallMolLib(sdffile) n_mols = lib.numMols self.assertEqual( n_mols, SDF_N_MOLS, 'Molecules not correctly loaded. ' 'Expected: {}; Now: {}'.format(SDF_N_MOLS, n_mols))
def test_writeSmiles(self): sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf") lib = SmallMolLib(sdffile) tmpfile = NamedTemporaryFile().name + ".smi" lib.writeSmiles(tmpfile) with open(tmpfile, "r") as f: filelines = f.readlines()[1:] with open(os.path.join(self.dataDir, "fda_drugs_light.smi"), "r") as f: reflines = f.readlines()[1:] self.assertEqual(filelines, reflines)
def test_appendSmallMol(self): mol2file = os.path.join(self.dataDir, 'benzamidine.mol2') sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) sm = SmallMol(mol2file) lib.appendSmallMol(sm) n_mol2_append = lib.numMols self.assertEqual( n_mol2_append, SDF_N_MOLS + 1, msg="The number of molecules in the SmallMolLib is not as expected." "The mol2 were not correctly append. ")
def test_getCommonStructure(self): sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf") lib = SmallMolLib(sdffile) rd_mols = lib._mols cms, cms_idxs, _ = getMaximumCommonSubstructure(rd_mols, returnAtomIdxs=True) self.assertIsInstance(cms, rdkit.Chem.rdchem.Mol, msg="The object is not a rdkit Molecule object") ref_cms_natoms = CMS_N_ATOMS ref_atomidx_mol0 = CMS_AIDX_MOL0 cms_atomidx_mol0 = cms_idxs[0] cms_natoms = len(cms_atomidx_mol0) self.assertEqual( cms_atomidx_mol0, ref_atomidx_mol0, msg="The atomidx of the CMS for the mol0 are not the expected ones", ) self.assertEqual( cms_natoms, ref_cms_natoms, msg= "The number of atom in the CMS for the mol0 are not the expected ones", )
def test_writeSdf(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) sdfname = NamedTemporaryFile().name + '.sdf' lib.writeSdf(sdfname) sdf_exists = os.path.isfile(sdfname) self.assertTrue(sdf_exists, msg="The sdf written was not found") sdf = SmallMolLib(sdfname) self.assertIsInstance( sdf, SmallMolLib, msg="The sdf written was not correctly loaded. Probably the previous" "writing went wrong")
def test_loaSdffile(self): sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf") lib = SmallMolLib(sdffile) n_mols = lib.numMols self.assertEqual( n_mols, SDF_N_MOLS, f"Molecules not correctly loaded. Expected: {SDF_N_MOLS}; Now: {n_mols}", ) sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf.gz") lib = SmallMolLib(sdffile) n_mols = lib.numMols self.assertEqual( n_mols, SDF_N_MOLS, f"Molecules not correctly loaded. Expected: {SDF_N_MOLS}; Now: {n_mols}", )
def test_cluster_shape(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols = lib._mols cl, det = cluster(mols, 'shape', returnDetails=True) n_clusters = det['numClusters'] self.assertIsInstance(n_clusters, np.int64, msg="None valid number of clusters")
def areLigandsOptimized(sdf_file): from moleculekit.smallmol.smallmollib import SmallMolLib not_optimized = [] for lig in SmallMolLib(sdf_file): ligname = lig.ligname lig = lig.toMolecule() if not isLigandOptimized(lig): not_optimized.append(ligname) return len(not_optimized) == 0, not_optimized
def test_restrained_embedding(self): from moleculekit.smallmol.tools.restrainedembed import restrainedEmbed import numpy as np atomPos = np.load(os.path.join(self.dataDir, 'restrain_positions.npy'), allow_pickle=True).item() refcoords = np.load(os.path.join(self.dataDir, 'restrain_final_coords.npy'), allow_pickle=True) sml = SmallMolLib(os.path.join(self.dataDir, 'restrain_molecule.sdf')) sm = sml[0] restrainedEmbed(sm._mol, atomPos) assert np.allclose(refcoords, sm._coords)
def test_depict(self): import IPython refimg = os.path.join(self.dataDir, 'sdf.svg') sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) img_name = NamedTemporaryFile().name + '.svg' lib.depict(sketch=True, filename=img_name) _img = lib.depict(sketch=True, ipython=True) refimg_size = os.path.getsize(refimg) sm_img_size = os.path.getsize(img_name) self.assertIsInstance( _img, IPython.core.display.SVG, msg="The object is not an IPython image as expected") self.assertEqual( sm_img_size, refimg_size, msg="The svg image does not have the same size of the reference")
def areLigandsOptimized(sdf_file, max_check=None): from moleculekit.smallmol.smallmollib import SmallMolLib not_optimized = [] for i, lig in enumerate(SmallMolLib(sdf_file)): if max_check is not None and i >= max_check: break ligname = lig.ligname lig = lig.toMolecule() if not isLigandOptimized(lig): not_optimized.append(ligname) return len(not_optimized) == 0, not_optimized
def test_cluster_pathFingerprints(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols = lib._mols cl, det = cluster(mols, 'pathFingerprints', returnDetails=True) ref_ncluster = PATHFINGERPRINTS_N_CLUSTER ref_populations = PATHFINGERPRINTS_POPULATION_CLUSTER ncluster = det['numClusters'] population = det['populations'].tolist() self.assertEqual(ncluster, ref_ncluster, msg="The number of cluster identified are not as expected") self.assertEqual(population, ref_populations, msg="The population fo the cluster are not the expected one")
def areLigandsDocked(prot_file, sdf_file, threshold=10, max_check=None): from moleculekit.smallmol.smallmollib import SmallMolLib from moleculekit.molecule import Molecule not_docked = [] prot = Molecule(prot_file) for i, lig in enumerate(SmallMolLib(sdf_file)): if max_check is not None and i >= max_check: break ligname = lig.ligname lig = lig.toMolecule() if not isLigandDocked(prot, lig, threshold): not_docked.append(ligname) return len(not_docked) == 0, not_docked
def test_restrained_embedding(self): from moleculekit.smallmol.tools.restrainedembed import restrainedEmbed import numpy as np atomPos = np.load(os.path.join(self.dataDir, "restrain_positions.npy"), allow_pickle=True).item() refcoords = np.load(os.path.join(self.dataDir, "restrain_final_coords.npy"), allow_pickle=True) sml = SmallMolLib(os.path.join(self.dataDir, "restrain_molecule.sdf")) sm = sml[0] restrainedEmbed(sm._mol, atomPos) restr_atoms = np.sort(list(atomPos.keys())) coor_diff = refcoords[restr_atoms].squeeze( ) - sm._coords[restr_atoms].squeeze() assert np.abs(coor_diff).max() < 0.1
def test_cluster_mcs(self): sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf") lib = SmallMolLib(sdffile) mols = lib._mols _, det = cluster(mols, "mcs", returnDetails=True) ref_ncluster = MCS_N_CLUSTER ref_populations = MCS_POPULATION_CLUSTER ncluster = det["numClusters"] population = det["populations"].tolist() self.assertEqual( ncluster, ref_ncluster, msg="The number of cluster identified are not as expected", ) self.assertEqual( population, ref_populations, msg="The population fo the cluster are not the expected one", )
def test_removeMols(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols_ids = SDF_IDS_DELETE ref_mols_name = SDF_MOLNAME_DELETE mols_name = [s.ligname for s in lib.getMols(mols_ids)] self.assertListEqual( mols_name, ref_mols_name, msg="The molecules at the given indexes do not match with the" "expected") lib.removeMols(mols_ids) mols_name_now = [s.ligname for s in lib.getMols(mols_ids)] self.assertFalse(mols_name_now == mols_name, msg="The molecules seem to not be deleted correctly")
def getChemblSimilarLigandsBySmile(smi, threshold=85, returnSmiles=False): """ Returns a SmallMolLib object of the ligands having a similarity with a smile of at least the specified threshold.. This molecules are retrieve from Chembl. It is possible to return also the list smiles. Parameters ---------- smi: str The smile threshold: int The threshold value to apply for the similarity search returnSmiles: bool If True, the list smiles is returned Returns ------- sm: moleculekit.smallmol.smallmol.SmallMol The SmallMol object smiles: str The list of smiles Example ------- >>> _, smile = getChemblLigandByDrugName('ibuprofen', returnSmile=True) # doctest: +SKIP >>> lib = getChemblSimilarLigandsBySmile(smile) # doctest: +SKIP >>> lib.numMols # doctest: +SKIP 4 >>> lib, smiles = getChemblSimilarLigandsBySmile(smile, returnSmiles=True) # doctest: +SKIP >>> len(smiles) # doctest: +SKIP 4 """ from moleculekit.smallmol.smallmol import SmallMol from moleculekit.smallmol.smallmollib import SmallMolLib try: from chembl_webresource_client.new_client import new_client except ImportError: raise ImportError( "You need to install the chembl_webresource package to use this function. Try using `conda install " "-c chembl chembl_webresource_client`.") smi_list = [] similarity = new_client.similarity results = similarity.filter(smiles=smi, similarity=threshold).only( ["molecule_structures"]) results = results.all() for r in range(len(results)): tmp_smi = results[r]["molecule_structures"]["canonical_smiles"] fragments = tmp_smi.split(".") fragments_len = [len(fr) for fr in fragments] fragment = fragments[fragments_len.index(max(fragments_len))] if fragment not in smi_list: smi_list.append(fragment) lib = SmallMolLib() for smi in smi_list: lib.appendSmallMol(SmallMol(smi)) if returnSmiles: return lib, smi_list return lib
rep = '{}: {}'.format(name, len(self.reps.replist)) else: rep = '{}: {}'.format(name, field) return rep rep = 'SmallMol with {} atoms and {} conformers'.format(self.numAtoms, self.numFrames) for p in sorted(self._atom_fields): if p.startswith('_'): continue rep += '\n' rep += 'Atom field - {}'.format(p) for j in sorted(self.__dict__.keys() - list(SmallMol._atom_fields)): if j[0] == '_': continue rep += '\n' rep += formatstr(j, self.__dict__[j]) return rep if __name__ == '__main__': import doctest import os from moleculekit.home import home from moleculekit.smallmol.smallmollib import SmallMolLib lib = SmallMolLib(os.path.join(home(dataDir='test-smallmol'), 'fda_drugs_light.sdf')) sm = SmallMol(os.path.join(home(dataDir='test-smallmol'), 'benzamidine.mol2')) doctest.testmod(extraglobs={'lib': lib.copy(), 'sm': sm.copy()})
rep = f"{name}: {len(self.reps.replist)}" else: rep = f"{name}: {field}" return rep rep = f"SmallMol with {self.numAtoms} atoms and {self.numFrames} conformers" for p in sorted(self._atom_fields): if p.startswith("_"): continue rep += "\n" rep += f"Atom field - {p}" for j in sorted(self.__dict__.keys() - list(SmallMol._atom_fields)): if j[0] == "_": continue rep += "\n" rep += formatstr(j, self.__dict__[j]) return rep if __name__ == "__main__": import doctest from moleculekit.home import home from moleculekit.smallmol.smallmollib import SmallMolLib lib = SmallMolLib( os.path.join(home(dataDir="test-smallmol"), "fda_drugs_light.sdf")) sm = SmallMol( os.path.join(home(dataDir="test-smallmol"), "benzamidine.mol2")) doctest.testmod(extraglobs={"lib": lib.copy(), "sm": sm.copy()})