def testOrderBug2(self): from rdkit.Chem import Randomize from rdkit import DataStructs probes = ['Oc1nc(Oc2ncccc2)ccc1'] for smi in probes: m1 = Chem.MolFromSmiles(smi) #m1.Debug() sig1 = Generate.Gen2DFingerprint(m1,self.factory) csmi = Chem.MolToSmiles(m1) m2 = Chem.MolFromSmiles(csmi) #m2.Debug() sig2 = Generate.Gen2DFingerprint(m2,self.factory) self.failUnless(list(sig1.GetOnBits())==list(sig2.GetOnBits()),'%s %s'%(smi,csmi)) self.failUnlessEqual(DataStructs.DiceSimilarity(sig1,sig2),1.0) self.failUnlessEqual(sig1,sig2) for i in range(10): m2 = Randomize.RandomizeMol(m1) sig2 = Generate.Gen2DFingerprint(m2,self.factory) if sig2!=sig1: Generate._verbose=True print '----------------' sig1 = Generate.Gen2DFingerprint(m1,self.factory) print '----------------' sig2 = Generate.Gen2DFingerprint(m2,self.factory) print '----------------' print Chem.MolToMolBlock(m1) print '----------------' print Chem.MolToMolBlock(m2) print '----------------' s1 = set(sig1.GetOnBits()) s2= set(sig2.GetOnBits()) print s1.difference(s2) self.failUnlessEqual(sig1,sig2)
def compute_anti_fp(mols_smiles, sig_fac, antifp_old=None): """ Computes an anti-fingerprint from the given molecules. It is possible to specify an existing anti-fingerprint for an update. In this case, the returned fingerprint will be the result of doing a bitwise :samp:`or` between the old fingerprint and the one generated from the supplied structures. :param mols_smiles: SMILES of the molecules to generate the anti-fingerprint from :param sig_fac: RDKit's signature factory used in the 2D pharmacophore fingerprint computation :param antifp_old: an old anti-fingerprint to update :return: new or updated anti-fingerprint """ antifp_new = antifp_old for smiles in mols_smiles: mol = Chem.MolFromSmiles(smiles) if not antifp_new: antifp_new = Generate.Gen2DFingerprint(mol, sig_fac) else: antifp_new = antifp_new | Generate.Gen2DFingerprint(mol, sig_fac) return antifp_new
def pharmacophore(mol, target): i = 0 print('mol/target', mol, target) mol.standardize() target.standardize() mol = str(mol) mol = mol.replace('N(=O)O', '[N+](=O)[O-]') mol = mol.replace('N(O)=O', '[N+]([O-])=O') mol = mol.replace('n(O)', '[n+]([O-])') target = str(target) target = target.replace('N(=O)O', '[N+](=O)[O-]') target = target.replace('N(O)=O', '[N+]([O-])=O') target = target.replace('n(O)', '[n+]([O-])') featfactory = load_factory() sigfactory = SigFactory(featfactory, minPointCount=2, maxPointCount=3, trianglePruneBins=False) sigfactory.SetBins([(0, 2), (2, 5), (5, 8)]) sigfactory.Init() mol1 = Chem.MolFromSmiles(mol) mol2 = Chem.MolFromSmiles(target) if mol1 and mol2: fp1 = Generate.Gen2DFingerprint(mol1, sigfactory) fp2 = Generate.Gen2DFingerprint(mol2, sigfactory) sims = DataStructs.TanimotoSimilarity(fp1, fp2) return sims else: i = i + 1 print('ошибка', i, mol) return -100
def testOrderBug(self): sdFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D', 'test_data', 'orderBug.sdf') suppl = Chem.SDMolSupplier(sdFile) m1 = next(suppl) m2 = next(suppl) sig1 = Generate.Gen2DFingerprint(m1, self.factory) sig2 = Generate.Gen2DFingerprint(m2, self.factory) self.assertEqual(sig1, sig2)
def testOrderBug(self): sdFile = os.path.join(RDConfig.RDCodeDir,'Chem','Pharm2D','test_data','orderBug.sdf') suppl = Chem.SDMolSupplier(sdFile) m1 =suppl.next() m2 = suppl.next() sig1 = Generate.Gen2DFingerprint(m1,self.factory) sig2 = Generate.Gen2DFingerprint(m2,self.factory) ob1 = set(sig1.GetOnBits()) ob2 = set(sig2.GetOnBits()) self.failUnlessEqual(sig1,sig2)
def calc_phore_descs(mols, significant_bits=None, testing=False): fp_holding = [] accumulated_bits_set = {} for mol in mols: fp = Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory) fp_holding.append(fp) if significant_bits is not None: bits_set = list(fp.GetOnBits()) for fp_bit in bits_set: if fp_bit not in accumulated_bits_set.keys(): accumulated_bits_set[fp_bit] = 1 else: accumulated_bits_set[ fp_bit] = accumulated_bits_set[fp_bit] + 1 if significant_bits is not None: phore_descs = np.zeros((len(mols), len(significant_bits))) for mol_num in range(len(mols)): for bit_num in range(len(significant_bits)): if significant_bits[bit_num] in fp_holding[mol_num].GetOnBits( ): phore_descs[mol_num, bit_num] = 1 if testing: return "significant_bits: %d" % len( significant_bits), "fp_descriptors: %s" % str( phore_descs.shape) print("significant_bits:", len(significant_bits)) print("fp_descriptors:", phore_descs.shape) return phore_descs else: return fp_holding
def make_fingerprints(data, length=512, verbose=False): fp_list = [ fingerprint(Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect, "Torsion "), fingerprint(lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length), "Morgan"), fingerprint(FingerprintMol, "Estate (1995)"), fingerprint(lambda x: GetAvalonFP(x, nBits=length), "Avalon bit based (2006)"), fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)), "Avalon+mol. weight"), fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)"), fingerprint(lambda x: RDKFingerprint(x, fpSize=length), "RDKit fingerprint"), fingerprint(lambda x: MACCSkeys.GenMACCSKeys(x), "MACCS fingerprint"), fingerprint(lambda x: get_fingerprint(x,fp_type='pubchem'), "PubChem"), # fingerprint(lambda x: get_fingerprint(x, fp_type='FP4'), "FP4") fingerprint(lambda x: Generate.Gen2DFingerprint(x,Gobbi_Pharm2D.factory,dMat=Chem.Get3DDistanceMatrix(x)), "3D pharmacophore"), ] for fp in fp_list: if (verbose): print("doing", fp.name) fp.apply_fp(data) return fp_list
def GetPharmacoPFPs(mol, bins=[(i, i + 1) for i in range(20)], minPointCount=2, maxPointCount=2, return_bitInfo=False): ''' Note: maxPointCont with 3 is slowly bins = [(i,i+1) for i in range(20)], maxPonitCount=2 for large-scale computation ''' MysigFactory = SigFactory(featFactory, trianglePruneBins=False, minPointCount=minPointCount, maxPointCount=maxPointCount) MysigFactory.SetBins(bins) MysigFactory.Init() res = Generate.Gen2DFingerprint(mol, MysigFactory) arr = np.array(list(res)).astype(np.bool) if return_bitInfo: description = [] for i in range(len(res)): description.append(MysigFactory.GetBitDescription(i)) return arr, description return arr
def calcfp(self, fptype="rdkit", opt=None): """Calculate a molecular fingerprint. Optional parameters: fptype -- the fingerprint type (default is "rdkit"). See the fps variable for a list of of available fingerprint types. opt -- a dictionary of options for fingerprints. Currently only used for radius and bitInfo in Morgan fingerprints. """ if opt == None: opt = {} fptype = fptype.lower() if fptype=="rdkit": fp = Fingerprint(Chem.RDKFingerprint(self.Mol)) elif fptype=="layered": fp = Fingerprint(Chem.LayeredFingerprint(self.Mol)) elif fptype=="maccs": fp = Fingerprint(Chem.MACCSkeys.GenMACCSKeys(self.Mol)) elif fptype=="atompairs": # Going to leave as-is. See Atom Pairs documentation. fp = Chem.AtomPairs.Pairs.GetAtomPairFingerprintAsIntVect(self.Mol) elif fptype=="torsions": # Going to leave as-is. fp = Chem.AtomPairs.Torsions.GetTopologicalTorsionFingerprintAsIntVect(self.Mol) elif fptype == "morgan": info = opt.get('bitInfo', None) radius = opt.get('radius', 4) fp = Fingerprint(Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(self.Mol,radius,bitInfo=info)) elif fptype == "pharm2d": fp = Fingerprint(Generate.Gen2DFingerprint(self.Mol,Gobbi_Pharm2D.factory)) else: raise ValueError, "%s is not a recognised RDKit Fingerprint type" % fptype return fp
def fingerprints_from_mols(cls, mols): fps = [Generate.Gen2DFingerprint(mol, factory) for mol in mols] size = 4096 X = np.zeros((len(mols), size)) for i, fp in enumerate(fps): for k, v in fp.GetNonzeroElements().items(): idx = k % size X[i, idx] = v return X
def test8MultiPointMatches(self): factory = self.factory factory.SetBins([(1, 3), (3, 7), (7, 10)]) factory.minPointCount = 2 factory.maxPointCount = 3 factory.Init() mol = Chem.MolFromSmiles('O=Cc1ccccc1') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.failUnlessEqual(bs, (3, )) mol = Chem.MolFromSmiles('O=CCCCCCCCCc1ccccc1') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.failUnlessEqual(bs, ())
def test5SimpleSig(self): factory = self.factory factory.SetBins([(1, 3), (3, 7), (7, 10)]) factory.minPointCount = 2 factory.maxPointCount = 3 factory.Init() mol = Chem.MolFromSmiles('O=CCC=O') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.failUnlessEqual(bs, (1, )) mol = Chem.MolFromSmiles('O=CC(CC=O)CCC=O') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.failUnlessEqual(bs, (1, 2, 67))
def BuildPharm2DFP(mol): global sigFactory from rdkit.Chem.Pharm2D import Generate try: fp = Generate.Gen2DFingerprint(mol, sigFactory) except IndexError: print('FAIL:', Chem.MolToSmiles(mol, True)) raise return fp
def testBitInfo(self): m = Chem.MolFromSmiles('OCC=CC(=O)O') bi = {} sig = Generate.Gen2DFingerprint(m, Gobbi_Pharm2D.factory, bitInfo=bi) self.assertEqual(sig.GetNumOnBits(), len(bi)) self.assertEqual(list(sig.GetOnBits()), sorted(bi.keys())) self.assertEqual(sorted(bi.keys()), [23, 30, 150, 154, 157, 185, 28878, 30184]) self.assertEqual(sorted(bi[28878]), [[(0, ), (5, ), (6, )]]) self.assertEqual(sorted(bi[157]), [[(0, ), (6, )], [(5, ), (0, )]])
def tanimoto(self, mol): try: with Timeout(seconds=1): fp = Generate.Gen2DFingerprint(mol, self.sigFactory) return DataStructs.TanimotoSimilarity(fp, self.query_fp) except TimeoutError: logging.debug("SMILES Pharmacophore timeout: ", Chem.MolToSmiles(mol, isomericSmiles=False)) return 0
def test2Sigs(self): probes = [('O=CCC=O', (149, )), ('OCCC=O', (149, 156)), ('OCCC(=O)O', (22, 29, 149, 154, 156, 184, 28822, 30134)), ] for smi, tgt in probes: sig = Generate.Gen2DFingerprint(Chem.MolFromSmiles(smi), self.factory) self.assertEqual(len(sig), 39972) bs = tuple(sig.GetOnBits()) self.assertEqual(len(bs), len(tgt)) self.assertEqual(bs, tgt)
def test9BondOrderSigs(self): # test sigs where bond order is used factory = self.factory factory.SetBins([(1, 4), (4, 7), (7, 10)]) factory.minPointCount = 2 factory.maxPointCount = 3 factory.Init() mol = Chem.MolFromSmiles('[O-]CCC(=O)') sig = Generate.Gen2DFingerprint(mol, self.factory) self.assertEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.assertEqual(bs, (1, )) self.factory.includeBondOrder = True sig = Generate.Gen2DFingerprint(mol, self.factory) self.assertEqual(len(sig), 990) bs = tuple(sig.GetOnBits()) self.assertEqual(bs, (0, ))
def test2Bug28(self): smi = 'Cc([s]1)nnc1SCC(\CS2)=C(/C([O-])=O)N3C(=O)[C@H]([C@@H]23)NC(=O)C[n]4cnnn4' mol = Chem.MolFromSmiles(smi) factory = Gobbi_Pharm2D.factory factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) sig = Generate.Gen2DFingerprint(mol, factory) onBits = sig.GetOnBits() for bit in onBits: atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) self.assertTrue(len(atoms))
def genmol_sdf(ms): suppl = Chem.SDMolSupplier(ms) ret = [] for m in suppl: if m is None: continue try: if not options.gobbifp: ret.append(AllChem.GetMorganFingerprintAsBitVect(m,options.radius, nBits=options.bits)) else: ret.append(Generate.Gen2DFingerprint(m,Gobbi_Pharm2D.factory)) except: pass return ret
def numpy_pp_fps(mols): """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays :param mols: {list} list of molecules (RDKit mols) :return: numpy array containing row-wise fingerprints for every molecule """ feat_fact = ChemicalFeatures.BuildFeatureFactory() sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3) sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)]) sig_fact.Init() return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
def _ph_rdkit(mols_tup): mol, name, act, _ = mols_tup ph = Generate.Gen2DFingerprint(mol, sigFactory) tmp = pd.DataFrame(columns=range(ph.GetNumBits())) ph_bits = list(ph.GetOnBits()) for n_bit in ph_bits: tmp.loc[name, n_bit] = 1 tmp.loc[name, 'mol_id'] = name tmp.loc[name, 'act'] = act tmp = tmp.fillna(0) return tmp
def fingerprint(mol, fp_type="DL"): if fp_type == "DL": return FingerprintMols.FingerprintMol(mol) elif fp_type == "circular": return AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024) elif fp_type == "MACCS": return MACCSkeys.GenMACCSKeys(mol) elif fp_type == "torsions": return Pairs.GetAtomPairFingerprintAsBitVect(mol) elif fp_type == "pharm": return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
def _one_cats(mol): """ Function to calculate the CATS pharmacophore descriptor for one molecule. Descriptions of the individual features can be obtained from the function ``get_cats_sigfactory``. :param mol: {RDKit molecule} molecule to calculate the descriptor for :return: {numpy.ndarray} calculated descriptor vector """ factory = get_cats_factory() arr = np.zeros((1,)) ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr) scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten() return np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0).astype('float32')
def similarityMeasure(fps, neg, mol2): fps2 = Generate.Gen2DFingerprint(mol2, sigFactory) similarityPos = DataStructs.FingerprintSimilarity( fps, fps2, metric=DataStructs.TanimotoSimilarity) similarityNeg = DataStructs.FingerprintSimilarity( neg, fps2, metric=DataStructs.TanimotoSimilarity) # if similarityPos>=0.75: print Chem.MolToSmiles(mol2), similarityPos, similarityNeg return similarityPos, similarityPos - similarityNeg
def get_gobbi_similarity(correct_ligand, mol_to_fix, type_fp='normal', use_features=False): # ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') ref = Chem.MolFromSmiles( 'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O') # mol1 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1DWD_ligand.pdb') mol1 = AllChem.AssignBondOrdersFromTemplate(ref, correct_ligand) # mol2 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1PPC_ligand.pdb') mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol_to_fix) factory = Gobbi_Pharm2D.factory fp1 = Generate.Gen2DFingerprint(mol1, factory, dMat=Chem.Get3DDistanceMatrix(mol1)) fp2 = Generate.Gen2DFingerprint(mol2, factory, dMat=Chem.Get3DDistanceMatrix(mol2)) # Tanimoto similarity tani = DataStructs.TanimotoSimilarity(fp1, fp2) print('GOBBI similarity is ------> ', tani)
def test6SimpleSigCounts(self): factory = self.factory factory.SetBins([(1, 3), (3, 7), (7, 10)]) factory.minPointCount = 2 factory.maxPointCount = 3 factory.useCounts = True factory.Init() mol = Chem.MolFromSmiles('O=CCC=O') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(sig.GetLength(), 990) cs = tuple(sig.GetNonzeroElements().iteritems()) self.failUnlessEqual(cs, ((1, 1), )) mol = Chem.MolFromSmiles('O=CC(CC=O)CCC=O') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(sig.GetLength(), 990) elems = sig.GetNonzeroElements() bs = elems.keys() bs.sort() cs = [(x, elems[x]) for x in bs] self.failUnlessEqual(tuple(cs), ((1, 2), (2, 1), (67, 1)))
def test7SimpleSigSkip(self): factory = self.factory factory.SetBins([(1, 3), (3, 7), (7, 10)]) factory.minPointCount = 2 factory.maxPointCount = 3 factory.skipFeats = 'Acceptor' factory.Init() mol = Chem.MolFromSmiles('O=CCC=O') sig = Generate.Gen2DFingerprint(mol, factory) self.failUnlessEqual(len(sig), 570) bs = tuple(sig.GetOnBits()) self.failUnlessEqual(bs, ())
def get_distance_func(name): if name == 'RDK/T': make_representation = (lambda chem: Chem.RDKFingerprint(chem.mol)) distf = lambda x, y: 1.0 - DataStructs.FingerprintSimilarity(x, y) return (make_representation, distf) elif name == 'GOBI/T': make_representation = lambda chem: Generate.Gen2DFingerprint( chem.mol, Gobbi_Pharm2D.factory) distf = lambda x, y: 1.0 - DataStructs.FingerprintSimilarity(x, y) return (make_representation, distf) else: raise Exception('Unknown similarity measure: %s' % job.sim_measure)
def _cats_corr(mols, q): """ private cats descriptor function to be used in multiprocessing :param mols: {list/array} molecules (RDKit mol) to calculate the descriptor for :param q: {queue} multiprocessing queue instance :return: {numpy.ndarray} calculated descriptor vectors """ factory = get_cats_factory() fps = [] for mol in mols: arr = np.zeros((1,)) ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr) scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten() fps.append(np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0)) q.put(np.array(fps).reshape((len(mols), 210)).astype('float32'))
def test3Roundtrip(self): # longer-running Bug 28 test nToDo = 20 with open(os.path.join(RDConfig.RDDataDir, 'NCI', 'first_5K.smi'), 'r') as inF: inD = inF.readlines()[:nToDo] factory = Gobbi_Pharm2D.factory factory.SetBins([(2, 3), (3, 4), (4, 5), (5, 8), (8, 100)]) for line in inD: smi = line.split('\t')[0] mol = Chem.MolFromSmiles(smi) sig = Generate.Gen2DFingerprint(mol, factory) onBits = sig.GetOnBits() for bit in onBits: atoms = Matcher.GetAtomsMatchingBit(factory, bit, mol, justOne=1) assert len(atoms), f'bit {bit} failed to match for smi {smi}'