def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m) max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=())) n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True)) fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m)) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \ max_ring_size, n_chiral_centers, round(fcsp3_bm, 3) except: sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n') return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def get_MCSs(test_mols, known_mols, nns_indices=None, murcko_scaff=False): if nns_indices is None: nns_indices = [np.arange(len(known_mols))]*len(test_mols) if murcko_scaff: f = lambda x: GetScaffoldForMol(x) else: f = lambda x: x known_mols = np.array(known_mols) MCSs, MCS_matches, NN_mols, NN_MCS_matches = [], [], [], [] for query_mol, nn_i in list(zip(test_mols, nns_indices)): known_subset = known_mols[nn_i] query_MCS = [get_mcs(query_mol, m, f) for m in tqdm(known_subset)] query_MCS_sim = [m.numAtoms for m in query_MCS] NN_mol = known_subset[np.argmax(query_MCS_sim)] mcs = query_MCS[np.argmax(query_MCS_sim)] mcs_mol = MolFromSmarts(mcs.smartsString) NN_mol_match = NN_mol.GetSubstructMatch(mcs_mol) query_mol_match = query_mol.GetSubstructMatch(mcs_mol) MCSs.append(mcs) MCS_matches.append(query_mol_match) NN_mols.append(NN_mol) NN_MCS_matches.append(NN_mol_match) return MCSs, MCS_matches, NN_mols, NN_MCS_matches
def test2MurckoScaffold(self): # Test the functionality on a larger test set for testMol in self.testMolecules2: mol = Chem.MolFromSmiles(testMol.smiles) calcScaffold = Chem.MolToSmiles(GetScaffoldForMol(mol)) actualScaffold = Chem.MolToSmiles(Chem.MolFromSmiles(testMol.scaffold)) self.assertEqual(calcScaffold, actualScaffold)
def test_ReferenceImplementation(self): # Check that the C++ implementation is equivalent to the Python reference implementation for testMol in self.testMolecules: mol = Chem.MolFromSmiles(testMol.smiles) calcScaffold1 = Chem.MolToSmiles(GetScaffoldForMol(mol)) calcScaffold2 = Chem.MolToSmiles(_pyGetScaffoldForMol(mol)) self.assertEqual(calcScaffold1, calcScaffold2)
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused except: sys.stderr.write( f'molecule {name} was omitted due to an error in calculation of some descriptors\n' ) return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m: scaff = Chem.MolToSmiles(GetScaffoldForMol(m), isomericSmiles=False) return name, scaff else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def find_boundary_bonds(mol, scaffold_atom_indices=None): if not scaffold_atom_indices: scaffold = GetScaffoldForMol(mol) scaffold_atom_indices = mol.GetSubstructMatch(scaffold) return [ bond for atom_idx in scaffold_atom_indices for bond in mol.GetAtomWithIdx(atom_idx).GetBonds() if bond.GetOtherAtomIdx(atom_idx) not in scaffold_atom_indices ]
def test_scheme_1(): """SCHEME 1: Scheme 1 refers to murcko scaffold construction where excocyclic double bonds and double bonds directly attached to the linker ("exolinker double bonds") are kept """ # Flucloxacillin test_smiles = 'CC1=C(C(=NO1)C2=C(C=CC=C2Cl)F)C(=O)NC3C4N(C3=O)C(C(S4)(C)C)C(=O)O' result_smiles = canon('O=C(NC1C(=O)N2CCSC12)c1conc1-c1ccccc1') scaffold = Scaffold(GetScaffoldForMol(Chem.MolFromSmiles(test_smiles))) assert scaffold.smiles == result_smiles
def murcko_scaffold(smiles, generic, isomeric): from rdkit.Chem.Scaffolds.MurckoScaffold import ( GetScaffoldForMol, MakeScaffoldGeneric, ) assert isinstance(generic, bool) assert isinstance(isomeric, bool) mol = MolFromSmiles(smiles) mol = GetScaffoldForMol(mol) if generic: mol = MakeScaffoldGeneric(mol) return MolToSmiles(mol, canonical=True, isomericSmiles=isomeric)
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3) else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def calcFingerprints(smiles,preproc=True): global global_mol_dict try: if preproc: precalculated = global_mol_dict[smiles] else: precalculated = global_mol_dict_no_preproc[smiles] if precalculated is not None: return precalculated else: raise PreprocessViolation(' Molecule preprocessing violation') except KeyError: m1 = Chem.MolFromSmiles(smiles) if preproc: m1 = preprocessMolecule(m1) if not m1: global_mol_dict[smiles] = None raise PreprocessViolation(' Molecule preprocessing violation') scaf = Chem.MolToSmiles(MakeScaffoldGeneric(GetScaffoldForMol(m1))) fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048) bitstring = list(map(int,list(fp.ToBitString()))) if preproc: global_mol_dict[smiles] = [bitstring, scaf] else: global_mol_dict_no_preproc[smiles] = [bitstring, scaf] return bitstring, scaf
def getScaffold(mol): try: return Chem.MolToSmiles(GetScaffoldForMol(mol)) except: return None