def testCountBounds(self): m = Chem.MolFromSmiles( 'COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1') fp1 = rdFingerprintGenerator.GetRDKitFPGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetRDKitFPGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetTopologicalTorsionGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetTopologicalTorsionGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetMorganGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetMorganGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) fp1 = rdFingerprintGenerator.GetAtomPairGenerator( fpSize=2048, countSimulation=True).GetFingerprint(m) fp2 = rdFingerprintGenerator.GetAtomPairGenerator( fpSize=2048, countSimulation=True, countBounds=(1, 8, 16, 32)).GetFingerprint(m) self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
def make_fp_generator(fp_type, settings): if fp_type == 'morgan': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetMorganGenerator(**arguments) elif fp_type == 'atom_pair': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetAtomPairGenerator(**arguments) elif fp_type == 'rdkit': arguments = {} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetRDKitFPGenerator(**arguments) elif fp_type == 'toplogical': arguments = {'includeChirality': True} for arg in settings: arguments[arg] = settings[arg] fp_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator( **arguments) else: fp_gen = False return fp_gen
def testAdditionalOutput(self): m1 = Chem.MolFromSmiles('CCO') g = rdFingerprintGenerator.GetAtomPairGenerator() ao = rdFingerprintGenerator.AdditionalOutput() ao.AllocateAtomCounts() fp = g.GetFingerprint(m1, additionalOutput=ao) self.assertEqual(ao.GetAtomCounts(), (2, 2, 2)) self.assertIsNone(ao.GetAtomToBits()) self.assertIsNone(ao.GetBitInfoMap()) self.assertIsNone(ao.GetBitPaths()) ao = rdFingerprintGenerator.AdditionalOutput() ao.AllocateAtomToBits() fp = g.GetFingerprint(m1, additionalOutput=ao) self.assertIsNone(ao.GetAtomCounts()) self.assertEqual(ao.GetAtomToBits(), ((351, 479), (351, 399), (479, 399))) self.assertIsNone(ao.GetBitInfoMap()) self.assertIsNone(ao.GetBitPaths()) ao = rdFingerprintGenerator.AdditionalOutput() ao.AllocateBitInfoMap() fp = g.GetFingerprint(m1, additionalOutput=ao) self.assertIsNone(ao.GetAtomCounts()) self.assertIsNone(ao.GetAtomToBits()) self.assertEqual(ao.GetBitInfoMap(), { 351: ((0, 1), ), 399: ((1, 2), ), 479: ((0, 2), ) }) self.assertIsNone(ao.GetBitPaths())
def get_atom_pair(mols): gen_ap = rdFingerprintGenerator.GetAtomPairGenerator() fps = list() for mol in mols: fp = np.array(gen_ap.GetFingerprint(mol)) fps.append(fp) fps = np.array(fps) return sparse.csr_matrix(fps).astype('int')
def generate_fingeprints(smiles): # Load these here so they're only needed on the worker machines. from rdkit import Chem from rdkit.Chem import rdFingerprintGenerator morgan_fp = '' rdkit_fp = '' atompair_fp = '' tt_fp = '' try: mol = Chem.MolFromSmiles(smiles) # Morgan morgan_fp = rdFingerprintGenerator.GetMorganGenerator().GetFingerprint( mol).ToBase64() # Feature Morgan # TODO # RDKit rdkit_fp = rdFingerprintGenerator.GetRDKitFPGenerator().GetFingerprint( mol).ToBase64() # Layered # TODO # Atom pairs atompair_fp = rdFingerprintGenerator.GetAtomPairGenerator( ).GetFingerprint(mol).ToBase64() # MACCS # TODO # Topological Torsion tt_fp = rdFingerprintGenerator.GetTopologicalTorsionGenerator( ).GetFingerprint(mol).ToBase64() # Pattern # TODO # E-state # TODO except Exception as e: print(f'Exception {e} processing {smiles}') return {} # NOTE: add any new fingerprints to fingerprint_columns. return { 'morgan_fp': morgan_fp, 'rdkit_fp': rdkit_fp, 'atompair_fp': atompair_fp, 'tt_fp': tt_fp }
def testAtomPairGenerator(self): m = Chem.MolFromSmiles('CCC') g = rdFingerprintGenerator.GetAtomPairGenerator() fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 2) fp = g.GetCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 2) fp = g.GetSparseFingerprint(m) nzc = fp.GetNumOnBits() self.assertEqual(nzc, 3) fp = g.GetFingerprint(m) nzc = fp.GetNumOnBits() self.assertEqual(nzc, 3) g = rdFingerprintGenerator.GetAtomPairGenerator( atomInvariantsGenerator=rdFingerprintGenerator. GetAtomPairAtomInvGen()) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 2) g = rdFingerprintGenerator.GetAtomPairGenerator(minDistance=2) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 1) g = rdFingerprintGenerator.GetAtomPairGenerator(maxDistance=1) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() self.assertEqual(len(nz), 1) g = rdFingerprintGenerator.GetAtomPairGenerator( useCountSimulation=False) fp = g.GetSparseFingerprint(m) nzc = fp.GetNumOnBits() self.assertEqual(nzc, 2) invGen = rdFingerprintGenerator.GetAtomPairAtomInvGen( includeChirality=False) invGenChirality = rdFingerprintGenerator.GetAtomPairAtomInvGen( includeChirality=True) g = rdFingerprintGenerator.GetAtomPairGenerator( includeChirality=False, atomInvariantsGenerator=invGen) gChirality = rdFingerprintGenerator.GetAtomPairGenerator( includeChirality=True, atomInvariantsGenerator=invGenChirality) fp = g.GetSparseCountFingerprint(m) nz = fp.GetNonzeroElements() fpChirality = gChirality.GetSparseCountFingerprint(m) nzChirality = fpChirality.GetNonzeroElements() self.assertNotEqual(nz.keys(), nzChirality.keys())
def calculate_fingerprint(mol, method='morgan'): rdmol = to_rdkit_Mol(mol) rdmol.UpdatePropertyCache(strict = False) Chem.GetSymmSSSR(rdmol) Dict = { 'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=64), 'morgan': rdFingerprintGenerator.GetMorganGenerator(fpSize=64), 'topological-torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=64), 'atom-pairs': rdFingerprintGenerator.GetAtomPairGenerator(fpSize=64), } rep = [] fp = [Dict[method].GetFingerprint(rdmol, fromAtoms=[i]) for i in range(len(mol.atoms))] for atomic_fp in fp: arr = np.zeros((1, )) DataStructs.ConvertToNumpyArray(atomic_fp, arr) rep.append(arr) return np.array(rep)
def fingerprint_atompair(fpSize=2048, count=False): """Atom pair fingerprint (list of int). Args: fpSize: Size of the generated fingerprint (defaults to 2048). count: The default value of False will generate fingerprint bits (0 or 1) whereas a value of True will generate the count of each fingerprint value. """ generator = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=fpSize) if count: fingerprint_fn = _fingerprint_fn_count(generator) else: fingerprint_fn = _fingerprint_fn_bits(generator) fingerprint_fn.__name__ = 'fingerprint_atompair(' + \ f'fpSize={fpSize},count={count})' return fingerprint_fn
def testBulk(self): m1 = Chem.MolFromSmiles('CCC') m2 = Chem.MolFromSmiles('OCCCCC') m3 = Chem.MolFromSmiles('CCCCC') g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetSparseCountFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetSparseCountFingerprint(m1)) self.assertEqual(results[1], g.GetSparseCountFingerprint(m2)) self.assertEqual(results[2], g.GetSparseCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetSparseFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetSparseFingerprint(m1)) self.assertEqual(results[1], g.GetSparseFingerprint(m2)) self.assertEqual(results[2], g.GetSparseFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetAtomPairGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.AtomPairFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetMorganGenerator(2) results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.MorganFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetRDKitFPGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.RDKitFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3) g = rdFingerprintGenerator.GetTopologicalTorsionGenerator() results = rdFingerprintGenerator.GetCountFPs( [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP) self.assertEqual(results[0], g.GetCountFingerprint(m1)) self.assertEqual(results[1], g.GetCountFingerprint(m2)) self.assertEqual(results[2], g.GetCountFingerprint(m3)) self.assertEqual(len(results), 3)