def test_getAtoms(self): smi = SMILE_SMI sm = SmallMol(smi) element_idx_1 = sm.get("element", "idx 1")[0] neighbors_element_O = sm.get("neighbors", "element O")[0] btypes_element_O = sm.get("bondtype", "element O", convertType=False)[0] self.assertEqual( element_idx_1, PHENOL_ELEMENT_IDX_1, "Element of the first atom does not correspond" "Expect: {}; Now: {}".format(element_idx_1, PHENOL_ELEMENT_IDX_1), ) self.assertListEqual( neighbors_element_O, PHENOL_ELEMENT_NEIGHBORS_OX, "Neighbors atoms of the oxygen atom do not correspond" "Expected: {}; Now: {}".format(PHENOL_ELEMENT_NEIGHBORS_OX, neighbors_element_O), ) self.assertListEqual( btypes_element_O, PHENOL_BTYPES_OX, "Bondtypes of the oxygen atom do not correspond:" "Expeected: {}; Now: {}".format(btypes_element_O, PHENOL_BTYPES_OX), )
def test_convertToMolecule(self): from moleculekit.molecule import mol_equal sm = SmallMol(self.benzamidine_mol2) mol = sm.toMolecule(formalcharges=False) assert mol_equal(sm, mol, exceptFields=['serial', 'name'], _logger=False)
def test_isChiral(self): smi = CHIRAL_SMI sm = SmallMol(smi) ischiral, details = sm.isChiral(returnDetails=True) self.assertListEqual( details, CHIRAL_DETAILS, 'chiral atom does not match.' 'Expected: {}; Now: {}'.format(CHIRAL_DETAILS, details))
def test_generateConformers(self): sm = SmallMol(self.benzamidine_mol2) current_conformer = sm.numFrames sm.generateConformers(num_confs=10, append=False) n_conformers = sm.numFrames self.assertGreater(n_conformers, current_conformer, 'The generation of conforemr should provide at least the ' 'same amount of conformer')
def test_isChiral(self): smi = CHIRAL_SMI sm = SmallMol(smi) ischiral, details = sm.isChiral(returnDetails=True) self.assertListEqual( details, CHIRAL_DETAILS, f"chiral atom does not match.Expected: {CHIRAL_DETAILS}; Now: {details}", )
def test_align(self): from moleculekit.util import rotationMatrix import numpy as np sm = SmallMol(self.benzamidine_mol2) mol = sm.toMolecule() mol.rotateBy(rotationMatrix([1, 0, 0], 3.14)) sm.align(mol) assert (np.abs(sm._coords) - np.abs(mol.coords)).max() # I need to do the abs of the coords since it's a symmetrical molecule
def test_convertToMolecule(self): from moleculekit.molecule import mol_equal sm = SmallMol(self.benzamidine_mol2) mol = sm.toMolecule() assert mol_equal(sm, mol, exceptFields=["serial", "name"], _logger=False)
def test_foundBond(self): smi = FOUNDBOND_SMI sm = SmallMol(smi) isbond_0_N = sm.foundBondBetween('idx 0', 'element N') isbond_0_1_single = sm.foundBondBetween('idx 0', 'idx 1', bondtype=1) isbond_0_1_double, _ = sm.foundBondBetween('idx 0', 'idx 1', bondtype=2) self.assertFalse(isbond_0_N, 'Bond between atom 0 and any nitrogens should not be present') self.assertFalse(isbond_0_1_single, 'Bond between atom 0 1 should not be single') self.assertTrue(isbond_0_1_double, 'Bond between atom 0 1 should be double')
def test_convertFromMolecule(self): from moleculekit.molecule import mol_equal mol = Molecule(self.benzamidine_mol2) sm = SmallMol(mol) assert mol_equal(sm, mol, exceptFields=['serial', 'name'], _logger=False) mol = Molecule(self.indole_mol2) sm = SmallMol(mol, force_reading=True) # Force reading writes to sdf which loses atomtypes and charges assert mol_equal(sm, mol, exceptFields=['serial', 'name', 'atomtype', 'charge'], _logger=False)
def test_getAtoms(self): smi = SMILE_SMI sm = SmallMol(smi) element_idx_1 = sm.get('element', 'idx 1')[0] neighbors_element_O = sm.get('neighbors', 'element O')[0] btypes_element_O = sm.get('bondtype', 'element O', convertType=False)[0] self.assertEqual(element_idx_1, PHENOL_ELEMENT_IDX_1, 'Element of the first atom does not correspond' 'Expect: {}; Now: {}'.format(element_idx_1, PHENOL_ELEMENT_IDX_1)) self.assertListEqual(neighbors_element_O, PHENOL_ELEMENT_NEIGHBORS_OX, 'Neighbors atoms of the oxygen atom do not correspond' 'Expected: {}; Now: {}'.format(PHENOL_ELEMENT_NEIGHBORS_OX, neighbors_element_O)) self.assertListEqual(btypes_element_O, PHENOL_BTYPES_OX, 'Bondtypes of the oxygen atom do not correspond:' 'Expeected: {}; Now: {}'.format(btypes_element_O, PHENOL_BTYPES_OX))
def calcDatasetVoxel(self, protPath, ligPath, number, altProtPath, altLigPath): dataset = list() print(ligPath) try: sm = SmallMol(ligPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) fs, cs, ns = voxeldescriptors.getVoxelDescriptors( sm, center=[x, y, z], boxsize=self.boxsize) except: # if the normal file is broke, you can use an alternative format sm = SmallMol(altLigPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) fs, cs, ns = voxeldescriptors.getVoxelDescriptors( sm, center=[x, y, z], boxsize=self.boxsize) f, c, n = self.calcProtVoxel(x, y, z, protPath, number, altProtPath) feature_protein = f feature_protein_shaped = f.reshape(n[0], n[1], n[2], f.shape[1]) feature_ligand = fs feature_ligand_shaped = fs.reshape(ns[0], ns[1], ns[2], fs.shape[1]) datapoint = np.concatenate( (feature_protein_shaped, feature_ligand_shaped), axis=3).transpose([3, 0, 1, 2]) dataset.append(datapoint) return np.array(dataset), np.array(c), np.array( feature_protein), np.array(feature_ligand), np.array( feature_protein_shaped), np.array(feature_ligand_shaped)
def detectChiralCenters(mol, atom_types=None): """ Detect chiral centers Parameters ---------- mol: Molecule Molecule to detect chiral centers Return ------ results: List of tuples List of chircal centers, where the chiral centers are tuples made of an atom index and a label ('S', 'R', '?'). Examples -------- >>> from parameterize.home import home >>> from moleculekit.molecule import Molecule >>> molFile = os.path.join(home('test-param'), 'H2O2.mol2') >>> mol = Molecule(molFile) >>> detectChiralCenters(mol) [] >>> molFile = os.path.join(home('test-param'), 'fluorchlorcyclopronol.mol2') >>> mol = Molecule(molFile) >>> detectChiralCenters(mol) [(0, 'R'), (2, 'S'), (4, 'R')] >>> molFile = os.path.join(home('test-param'), 'fluorchlorcyclopronol.mol2') >>> mol = Molecule(molFile) >>> detectChiralCenters(mol, atom_types=mol.atomtype) [(0, 'R'), (2, 'S'), (4, 'R')] """ from moleculekit.molecule import Molecule from moleculekit.smallmol.smallmol import SmallMol from rdkit.Chem import AssignAtomChiralTagsFromStructure, FindMolChiralCenters if not isinstance(mol, Molecule): raise TypeError('"mol" has to be instance of {}'.format(Molecule)) if mol.numFrames != 1: raise ValueError('"mol" can have just one frame, but it has {}'.format( mol.numFrames)) # Set atom types, overwise rdkit refuse to read some MOL2 files htmd_mol = mol.copy() if atom_types is not None: htmd_mol.atomtype = atom_types # Detect chiral centers and assign their labels sm = SmallMol(htmd_mol, fixHs=False, removeHs=False, verbose=False, force_reading=True) AssignAtomChiralTagsFromStructure(sm._mol) chiral_centers = FindMolChiralCenters(sm._mol, includeUnassigned=True) return chiral_centers
def test_loadSmile(self): smi = SMILE_SMI sm = SmallMol(smi) n_atoms = sm.numAtoms self.assertEqual( n_atoms, SMILE_N_ATOMS, 'Atoms not correctly loaded. ' 'Expected: {}; Now: {}'.format(SMILE_N_ATOMS, n_atoms))
def test_loadPdbfile(self): pdbfile = os.path.join(self.dataDir, 'ligand.pdb') sm = SmallMol(pdbfile) n_atoms = sm.numAtoms self.assertEqual( n_atoms, LIGAND_N_ATOMS, 'Atoms not correctly loaded. ' 'Expected: {}; Now: {}'.format(LIGAND_N_ATOMS, n_atoms))
def sdfReader(file, removeHs, fixHs, sanitize, isgzip=False): from tqdm import tqdm from moleculekit.util import tempname if isgzip: with gzip.open(file, "rb") as f: # SDMolSupplier does not support file handles, need to write temp file file = tempname(suffix=".sdf") with open(file, "wb") as fout: fout.write(f.read()) supplier = Chem.SDMolSupplier(file, removeHs=removeHs, sanitize=sanitize) mols = [] countfailed = 0 for mol in tqdm(supplier): if mol is None: countfailed += 1 continue try: mols.append(SmallMol(mol, removeHs=removeHs, fixHs=fixHs)) except Exception: if mol.HasProp("_Name"): name = mol.GetProp("_Name") countfailed += 1 logger.warning( f"Failed to load molecule with name {name}. Skipping to next molecule." ) if countfailed: logger.info(f"Failed to load {countfailed}/{len(supplier)} molecules") return mols
def smiReader(file, removeHs, fixHs, isgzip=False): from tqdm import tqdm if isgzip: with gzip.open(file, "rb") as f: return smiReader(f, removeHs=removeHs, fixHs=fixHs, isgzip=False) if isinstance(file, str): with open(file, "r") as f: return smiReader(f, removeHs, fixHs) decode = False if isinstance(file, gzip.GzipFile): decode = True lines = file.readlines()[1:] mols = [] for i, line in enumerate(tqdm(lines)): if decode: line = line.decode("utf-8") smi, name = line.strip().split() try: mols.append(SmallMol(smi, removeHs=removeHs, fixHs=fixHs)) except Exception as e: logger.warning( "Failed to load molecule with name {} with error {}. Skipping to next molecule." .format(name, e)) return mols
def sdfReader(file, removeHs, fixHs, sanitize, isgzip=False): from tqdm import tqdm from moleculekit.util import tempname if isgzip: with gzip.open(file, 'rb') as f: # SDMolSupplier does not support file handles, need to write temp file file = tempname(suffix='.sdf') with open(file, 'wb') as fout: fout.write(f.read()) supplier = Chem.SDMolSupplier(file, removeHs=removeHs, sanitize=sanitize) mols = [] countfailed = 0 for mol in tqdm(supplier): if mol is None: countfailed += 1 continue try: mols.append(SmallMol(mol, removeHs=removeHs, fixHs=fixHs)) except: if mol.HasProp('_Name'): name = mol.GetProp('_Name') countfailed += 1 logger.warning( 'Failed to load molecule{}. Skipping to next molecule.'.format( ' with name {}'.format(name))) if countfailed: logger.info('Failed to load {}/{} molecules'.format( countfailed, len(supplier))) return mols
def filter_tautomers(tautomers, scores, threshold=2): """The function returns the tautomers as rdkit molecule objects based on the scores and the threshold Parameters ---------- tautomers: list - List of rdkit.Chem.Molecule of the tautomers identified scores: list - List of the scores for each tatutomer threshold: int - The threshold value to be used as difference from the highest one Returns ------- t_filtered: list - List of rdkit.Chem.Molecule of the tautomers filtered """ tautomers = [ mol._mol if isinstance(mol, SmallMol) else mol for mol in tautomers ] tautomers_sorted = [ x for _, x in sorted( zip(scores, tautomers), key=lambda pair: pair[0], reverse=True) ] scores.sort(reverse=True) t_filterd = [ t for t, s in zip(tautomers_sorted, scores) if s >= max(scores) - threshold ] return [SmallMol(rdmol) for rdmol in t_filterd]
def test_foundBond(self): smi = FOUNDBOND_SMI sm = SmallMol(smi) isbond_0_N = sm.foundBondBetween("idx 0", "element N") isbond_0_1_single = sm.foundBondBetween("idx 0", "idx 1", bondtype=1) isbond_0_1_double, _ = sm.foundBondBetween("idx 0", "idx 1", bondtype=2) self.assertFalse( isbond_0_N, "Bond between atom 0 and any nitrogens should not be present") self.assertFalse(isbond_0_1_single, "Bond between atom 0 1 should not be single") self.assertTrue(isbond_0_1_double, "Bond between atom 0 1 should be double")
def __call__(self, lig_pdb): """ :param lig_pdb: :return: """ slig = SmallMol( AllChem.MolFromPDBBlock(lig_pdb, sanitize=True, removeHs=False)) lig_vox, lig_centers, lig_N = getVoxelDescriptors( slig, buffer=0, voxelsize=self.obs_config.voxelsize, boxsize=self.obs_config.boxsize, center=self.obs_config.centers, method=self.obs_config.method, validitychecks=self.obs_config.validity_check) if self.obs_config.quantity == 'all' and self.obs_config.combine == 'add': x = self.reshape(lig_vox, lig_N) + self.prot_vox_t elif self.obs_config.quantity == 'ligand': x = self.reshape(lig_vox, lig_N) elif self.obs_config.quantity == 'protein': x = self.prot_vox_t else: raise ValueError("quantity not good") return np.concatenate([x], axis=1)
def fitGasteigerCharges(mol, atom_types=None): """ Fit Gasteiger atomic charges Parameters ---------- mol: Molecule Molecule to fit the charges Return ------ results: Molecule Copy of the molecule with the charges set Examples -------- >>> from parameterize.home import home >>> from moleculekit.molecule import Molecule >>> molFile = os.path.join(home('test-charge'), 'H2O.mol2') >>> mol = Molecule(molFile) >>> mol.charge[:] = 0 >>> new_mol = fitGasteigerCharges(mol) >>> assert new_mol is not mol >>> new_mol.charge # doctest: +ELLIPSIS array([-0.411509..., 0.205754..., 0.205754...], dtype=float32) >>> new_mol = fitGasteigerCharges(mol, atom_types=mol.atomtype) >>> assert new_mol is not mol >>> new_mol.charge # doctest: +ELLIPSIS array([-0.411509..., 0.205754..., 0.205754...], dtype=float32) """ from moleculekit.smallmol.smallmol import SmallMol from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges if not isinstance(mol, Molecule): raise TypeError('"mol" has to be instance of {}'.format(Molecule)) if mol.numFrames != 1: raise ValueError( '"mol" can have just one frame, but it has {}'.format(mol.numFrames) ) # Set atom types to elements, overwise rdkit refuse to read a MOL2 file htmd_mol = mol.copy() if atom_types is not None: htmd_mol.atomtype = atom_types # Compute and store Gasteiger charges sm = SmallMol( htmd_mol, fixHs=False, removeHs=False, verbose=False, force_reading=True ) ComputeGasteigerCharges(sm._mol, throwOnParamFailure=True) mol = mol.copy() mol.charge[:] = [ atom.GetDoubleProp("_GasteigerCharge") for atom in sm._mol.GetAtoms() ] return mol
def test_loadPdbfile(self): pdbfile = os.path.join(self.dataDir, "ligand.pdb") sm = SmallMol(pdbfile) n_atoms = sm.numAtoms self.assertEqual( n_atoms, LIGAND_N_ATOMS, f"Atoms not correctly loaded. Expected: {LIGAND_N_ATOMS}; Now: {n_atoms}", )
def test_loadMol2file(self): sm = SmallMol(self.benzamidine_mol2) n_atoms = sm.numAtoms self.assertEqual( n_atoms, BENZAMIDINE_N_ATOMS, "Atoms not correctly loaded. " "Expected: {}; Now: {}".format(BENZAMIDINE_N_ATOMS, n_atoms), )
def test_loadSmile(self): smi = SMILE_SMI sm = SmallMol(smi) n_atoms = sm.numAtoms self.assertEqual( n_atoms, SMILE_N_ATOMS, f"Atoms not correctly loaded. Expected: {SMILE_N_ATOMS}; Now: {n_atoms}", )
def test_convertFromMolecule(self): from moleculekit.molecule import mol_equal mol = Molecule(self.benzamidine_mol2) sm = SmallMol(mol) assert mol_equal(sm, mol, exceptFields=['serial', 'name'], _logger=False)
def test_copy(self): sm = SmallMol(self.benzamidine_mol2) sm_copy = sm.copy() coords = sm.get("coords") coords_copy = sm_copy.get("coords") assert np.array_equal(coords, coords_copy) # Ensure no hydrogens are added in the copy method sm = SmallMol(self.benzamidine_mol2, removeHs=True, fixHs=False) sm_copy = sm.copy() coords = sm.get("coords") coords_copy = sm_copy.get("coords") assert np.array_equal(coords, coords_copy)
def test_removeGenerateConformer(self): molsmile = SMILE_SMI sm = SmallMol(molsmile) sm.generateConformers(num_confs=10, append=False) n_confs = sm.numFrames sm.dropFrames([0]) n_confs_del = sm.numFrames sm.dropFrames() n_confs_zero = sm.numFrames self.assertEqual(n_confs_del, n_confs - 1, "The number of conformations after the deletion was not reduced of " "exactly one unit") self.assertEqual(n_confs_zero, 0, "The number of conformations after the deletion was not reduced to 0")
def test_ledipasvir(self): from moleculekit.smallmol.smallmol import SmallMol sm = SmallMol(os.path.join(self.testf, 'ledipasvir.mol2')) features, centers, nvoxels = getVoxelDescriptors(sm, buffer=1, version=2) reffeatures, refcenters, refnvoxels = np.load(os.path.join( self.testf, 'ledipasvir_voxres.npy'), allow_pickle=True) assert np.allclose(features, reffeatures) assert np.array_equal(centers, refcenters) assert np.array_equal(nvoxels, refnvoxels)
def test_getBonds(self): sm = SmallMol(self.benzamidine_mol2) self.assertListEqual( sm._bonds.tolist(), BENZAMIDINE_BOND_ATOMS, msg="The atoms in bonds are not the same of the reference") self.assertListEqual( sm._bondtype.tolist(), BENZAMIDINE_BONDTYPES, msg="The bonds type are not the same of the reference")
def getRCSBLigandByLigname(ligname, returnMol2=False): """ Returns a SmallMol object of a ligand by its three letter lignane. This molecule is retrieve from RCSB and a mol2 written. It is possible to return also the mol2 filename. Parameters ---------- ligname: str The three letter ligand name returnMol2: bool If True, the mol2 filename is returned Returns ------- sm: moleculekit.smallmol.smallmol.SmallMol The SmallMol object mol2filename: str The mol2 filename Example ------- >>> from moleculekit.molecule import Molecule >>> mol = Molecule('4eiy') >>> np.unique(mol.get('resname', 'not protein and not water')) array(['CLR', 'NA', 'OLA', 'OLB', 'OLC', 'PEG', 'ZMA'], dtype=object) >>> sm = getRCSBLigandByLigname('ZMA') # doctest: +ELLIPSIS SmallMol module... >>> sm.numAtoms 40 >>> sm, mol2filename = getRCSBLigandByLigname('ZMA', returnMol2=True) >>> mol2filename # doctest: +ELLIPSIS '/tmp/tmp....mol2' """ from moleculekit.support import string_to_tempfile from moleculekit.smallmol.smallmol import SmallMol from moleculekit.rcsb import _getRCSBtext from moleculekit.tools.obabel_tools import openbabelConvert url = f"https://files.rcsb.org/ligands/view/{ligname}_ideal.sdf" sdf_text = _getRCSBtext(url).decode("ascii") tempfile = string_to_tempfile(sdf_text, "sdf") mol2 = openbabelConvert(tempfile, "sdf", "mol2") sm = SmallMol(mol2) if returnMol2: return sm, mol2 return sm