示例#1
0
 def testCountBounds(self):
     m = Chem.MolFromSmiles(
         'COc1ccc(CCNC(=O)c2ccccc2C(=O)NCCc2ccc(OC)cc2)cc1')
     fp1 = rdFingerprintGenerator.GetRDKitFPGenerator(
         fpSize=2048, countSimulation=True).GetFingerprint(m)
     fp2 = rdFingerprintGenerator.GetRDKitFPGenerator(
         fpSize=2048, countSimulation=True,
         countBounds=(1, 8, 16, 32)).GetFingerprint(m)
     self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
     fp1 = rdFingerprintGenerator.GetTopologicalTorsionGenerator(
         fpSize=2048, countSimulation=True).GetFingerprint(m)
     fp2 = rdFingerprintGenerator.GetTopologicalTorsionGenerator(
         fpSize=2048, countSimulation=True,
         countBounds=(1, 8, 16, 32)).GetFingerprint(m)
     self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
     fp1 = rdFingerprintGenerator.GetMorganGenerator(
         fpSize=2048, countSimulation=True).GetFingerprint(m)
     fp2 = rdFingerprintGenerator.GetMorganGenerator(
         fpSize=2048, countSimulation=True,
         countBounds=(1, 8, 16, 32)).GetFingerprint(m)
     self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
     fp1 = rdFingerprintGenerator.GetAtomPairGenerator(
         fpSize=2048, countSimulation=True).GetFingerprint(m)
     fp2 = rdFingerprintGenerator.GetAtomPairGenerator(
         fpSize=2048, countSimulation=True,
         countBounds=(1, 8, 16, 32)).GetFingerprint(m)
     self.assertNotEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
def make_fp_generator(fp_type, settings):
    if fp_type == 'morgan':
        arguments = {'includeChirality': True}
        for arg in settings:
            arguments[arg] = settings[arg]
        fp_gen = rdFingerprintGenerator.GetMorganGenerator(**arguments)

    elif fp_type == 'atom_pair':
        arguments = {'includeChirality': True}
        for arg in settings:
            arguments[arg] = settings[arg]

        fp_gen = rdFingerprintGenerator.GetAtomPairGenerator(**arguments)

    elif fp_type == 'rdkit':
        arguments = {}
        for arg in settings:
            arguments[arg] = settings[arg]

        fp_gen = rdFingerprintGenerator.GetRDKitFPGenerator(**arguments)

    elif fp_type == 'toplogical':
        arguments = {'includeChirality': True}
        for arg in settings:
            arguments[arg] = settings[arg]
        fp_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(
            **arguments)
    else:
        fp_gen = False

    return fp_gen
 def _get_morgan(self):
     gen_mo = rdFingerprintGenerator.GetMorganGenerator()
     fps = list()
     for mol in self.df['mols']:
         fp = np.array(gen_mo.GetFingerprint(mol))
         fps.append(fp)
     fps = np.array(fps)
     return sparse.csr_matrix(fps).astype('int')
def get_morgan_features(mols):
    invGen =rdFingerprintGenerator.GetMorganFeatureAtomInvGen()
    gen_mo = rdFingerprintGenerator.GetMorganGenerator(atomInvariantsGenerator=invGen)
    fps = list()
    for mol in mols:
        fp = np.array(gen_mo.GetFingerprint(mol))
        fps.append(fp)
    fps = np.array(fps)
    return sparse.csr_matrix(fps).astype('int')
示例#5
0
def generate_fingeprints(smiles):
    # Load these here so they're only needed on the worker machines.
    from rdkit import Chem
    from rdkit.Chem import rdFingerprintGenerator

    morgan_fp = ''
    rdkit_fp = ''
    atompair_fp = ''
    tt_fp = ''

    try:
        mol = Chem.MolFromSmiles(smiles)

        # Morgan
        morgan_fp = rdFingerprintGenerator.GetMorganGenerator().GetFingerprint(
            mol).ToBase64()

        # Feature Morgan
        # TODO

        # RDKit
        rdkit_fp = rdFingerprintGenerator.GetRDKitFPGenerator().GetFingerprint(
            mol).ToBase64()

        # Layered
        # TODO

        # Atom pairs
        atompair_fp = rdFingerprintGenerator.GetAtomPairGenerator(
        ).GetFingerprint(mol).ToBase64()

        # MACCS
        # TODO

        # Topological Torsion
        tt_fp = rdFingerprintGenerator.GetTopologicalTorsionGenerator(
        ).GetFingerprint(mol).ToBase64()

        # Pattern
        # TODO

        # E-state
        # TODO

    except Exception as e:
        print(f'Exception {e} processing {smiles}')
        return {}
    # NOTE: add any new fingerprints to fingerprint_columns.
    return {
        'morgan_fp': morgan_fp,
        'rdkit_fp': rdkit_fp,
        'atompair_fp': atompair_fp,
        'tt_fp': tt_fp
    }
示例#6
0
  def testMorganGenerator(self):
    m = Chem.MolFromSmiles('CCCC(=O)O')
    g = rdFingerprintGenerator.GetMorganGenerator(3)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 14)

    invgen = rdFingerprintGenerator.GetMorganAtomInvGen()
    g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 14)

    invgen = rdFingerprintGenerator.GetMorganFeatureAtomInvGen()
    g = rdFingerprintGenerator.GetMorganGenerator(radius=3, atomInvariantsGenerator=invgen)
    fp = g.GetSparseCountFingerprint(m)
    nz = fp.GetNonzeroElements()
    self.assertEqual(len(nz), 13)

    ms = [Chem.MolFromSmiles(x, sanitize=False) for x in ('C1=CC=CN=N1', 'C1C=CC=NN=1')]
    for m in ms:
      m.UpdatePropertyCache()
      Chem.GetSymmSSSR(m)

    g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=True)
    self.assertNotEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1]))
    g = rdFingerprintGenerator.GetMorganGenerator(radius=2, useBondTypes=False)
    self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g.GetSparseCountFingerprint(ms[1]))

    binvgen = rdFingerprintGenerator.GetMorganBondInvGen(useBondTypes=False)
    g2 = rdFingerprintGenerator.GetMorganGenerator(radius=2, bondInvariantsGenerator=binvgen)
    self.assertEqual(g.GetSparseCountFingerprint(ms[0]), g2.GetSparseCountFingerprint(ms[0]))
    self.assertEqual(g.GetSparseCountFingerprint(ms[1]), g2.GetSparseCountFingerprint(ms[1]))
示例#7
0
def write_fingerprints(sesh):

    option = st.selectbox('What fingerprint?', (
        'choose one',
        'Morgan',
    ))

    if option in ['Morgan', 'MACCS']:
        if st.button('Generate fingerprints'):
            pbar = st.progress(0)
            gen_mo = rdFingerprintGenerator.GetMorganGenerator(512)
            for count, mol in enumerate(sesh.df['mols']):
                fp = rdFingerprintGenerator
                sesh.fp.append(gen_mo.GetFingerprint(mol))
                pbar.progress(int((count + 1) / len(sesh.df) * 100))
            sesh.fp = np.array(sesh.fp)
示例#8
0
文件: utils.py 项目: ajr15/Torina
def calculate_fingerprint(mol, method='morgan'):
    rdmol = to_rdkit_Mol(mol)
    rdmol.UpdatePropertyCache(strict = False)
    Chem.GetSymmSSSR(rdmol)
    Dict = {
        'rdkit': rdFingerprintGenerator.GetRDKitFPGenerator(fpSize=64),
        'morgan': rdFingerprintGenerator.GetMorganGenerator(fpSize=64),
        'topological-torsion': rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=64),
        'atom-pairs': rdFingerprintGenerator.GetAtomPairGenerator(fpSize=64),
    }
    rep = []
    fp = [Dict[method].GetFingerprint(rdmol, fromAtoms=[i]) for i in range(len(mol.atoms))]
    for atomic_fp in fp:
        arr = np.zeros((1, ))
        DataStructs.ConvertToNumpyArray(atomic_fp, arr)
        rep.append(arr)
    return np.array(rep)
示例#9
0
 def generate_fingerprints_and_create_list(self):
     #generate fingerprints of predicted ligands and known ligands:
     gen_mo = rdFingerprintGenerator.GetMorganGenerator(fpSize=2048,
                                                        radius=2)
     predicted_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.predicted['molecules']
     ]
     true_fps = [
         gen_mo.GetFingerprint(mol) for mol in self.true_pos['molecules']
     ]
     similarities = list()
     for count, mol in enumerate(predicted_fps):
         tanimoto_values = ([
             DataStructs.TanimotoSimilarity(mol, i) for i in true_fps
         ])
         index_of_highest = np.argmax(tanimoto_values)
         similarities.append(tanimoto_values[index_of_highest])
     #module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score
     sa_score = [
         sascorer.calculateScore(i)
         for i in list(self.predicted['molecules'])
     ]
     #create a list holding the QED drug-likeness score
     #reference: https://doi.org/10.1038/nchem.1243
     qeds = [qed(mol) for mol in self.predicted['molecules']]
     #create a list holding logp:
     logp = [Descriptors.MolLogP(m) for m in self.predicted['molecules']]
     #filter catalog usage instructions are here: https://github.com/rdkit/rdkit/pull/536
     params = FilterCatalogParams()
     params.AddCatalog(FilterCatalogParams.FilterCatalogs.BRENK)
     catalog = FilterCatalog(params)
     self.brenk = np.array(
         [catalog.HasMatch(m) for m in self.predicted['molecules']])
     #add these lists as columns to the 'predicted' pd.DataFrame
     self.predicted['similarities'] = similarities
     self.predicted['sa_score'] = sa_score
     self.predicted['qeds'] = qeds
     self.predicted['logp'] = logp
     print(self.predicted['logp'] < 6)
     shortlist_mask = ((self.predicted['similarities'] < 0.2) &
                       (self.predicted['sa_score'] < 4) &
                       (self.predicted['qeds'] > 0.25) &
                       (self.predicted['logp'] < 6) & (~self.brenk))
def fingerprint_morgan(radius, fpSize=2048, count=False):
    """Morgan fingerprint of the specified size (list of int).

    Args:
        radius: The number of iterations to grow the fingerprint.
        fpSize: Size of the generated fingerprint (defaults to 2048).
        count: The default value of False will generate fingerprint bits
            (0 or 1) whereas a value of True will generate the count of each
            fingerprint value.
    """
    generator = rdFingerprintGenerator.GetMorganGenerator(radius=radius,
                                                          fpSize=fpSize)

    if count:
        fingerprint_fn = _fingerprint_fn_count(generator)
    else:
        fingerprint_fn = _fingerprint_fn_bits(generator)

    fingerprint_fn.__name__ = f'fingerprint_morgan(radius={radius},' + \
                              f'fpSize={fpSize},count={count})'
    return fingerprint_fn
    def __init__(self, radius, fpSize, IC50function, molFile):
        self.fpgen = rdFingerprintGenerator.GetMorganGenerator(
            radius=radius, fpSize=fpSize)
        self.getIC50 = IC50function
        self.molFile = molFile

        # Open SMILES file and convert each sequence to rdkit molecule
        with open(self.molFile) as f:
            raw_text = f.read()

        raw_data = raw_text.split("\n")
        mol_list = [Chem.MolFromSmiles(x) for x in raw_data[:1000]]
        self.ms = [rdMolStandardize.FragmentParent(x) for x in mol_list]

        # Get a count of the BRICS bonds within the molecules
        cntr = Counter()
        for m in self.ms:
            bbnds = BRICS.FindBRICSBonds(m)
            for aids, lbls in bbnds:
                cntr[lbls] += 1
        freqs = sorted([(y, x) for x, y in cntr.items()], reverse=True)

        # Keep the top 10 bonds
        self.bondsToKeep = [y for x, y in freqs]
示例#12
0
    def testBulk(self):
        m1 = Chem.MolFromSmiles('CCC')
        m2 = Chem.MolFromSmiles('OCCCCC')
        m3 = Chem.MolFromSmiles('CCCCC')

        g = rdFingerprintGenerator.GetAtomPairGenerator()
        results = rdFingerprintGenerator.GetSparseCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.AtomPairFP)
        self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetMorganGenerator(2)
        results = rdFingerprintGenerator.GetSparseCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.MorganFP)
        self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetRDKitFPGenerator()
        results = rdFingerprintGenerator.GetSparseCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.RDKitFP)
        self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
        results = rdFingerprintGenerator.GetSparseCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP)
        self.assertEqual(results[0], g.GetSparseCountFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseCountFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetAtomPairGenerator()
        results = rdFingerprintGenerator.GetSparseFPs(
            [m1, m2, m3], rdFingerprintGenerator.AtomPairFP)
        self.assertEqual(results[0], g.GetSparseFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetMorganGenerator(2)
        results = rdFingerprintGenerator.GetSparseFPs(
            [m1, m2, m3], rdFingerprintGenerator.MorganFP)
        self.assertEqual(results[0], g.GetSparseFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetRDKitFPGenerator()
        results = rdFingerprintGenerator.GetSparseFPs(
            [m1, m2, m3], rdFingerprintGenerator.RDKitFP)
        self.assertEqual(results[0], g.GetSparseFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
        results = rdFingerprintGenerator.GetSparseFPs(
            [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP)
        self.assertEqual(results[0], g.GetSparseFingerprint(m1))
        self.assertEqual(results[1], g.GetSparseFingerprint(m2))
        self.assertEqual(results[2], g.GetSparseFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetAtomPairGenerator()
        results = rdFingerprintGenerator.GetCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.AtomPairFP)
        self.assertEqual(results[0], g.GetCountFingerprint(m1))
        self.assertEqual(results[1], g.GetCountFingerprint(m2))
        self.assertEqual(results[2], g.GetCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetMorganGenerator(2)
        results = rdFingerprintGenerator.GetCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.MorganFP)
        self.assertEqual(results[0], g.GetCountFingerprint(m1))
        self.assertEqual(results[1], g.GetCountFingerprint(m2))
        self.assertEqual(results[2], g.GetCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetRDKitFPGenerator()
        results = rdFingerprintGenerator.GetCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.RDKitFP)
        self.assertEqual(results[0], g.GetCountFingerprint(m1))
        self.assertEqual(results[1], g.GetCountFingerprint(m2))
        self.assertEqual(results[2], g.GetCountFingerprint(m3))
        self.assertEqual(len(results), 3)

        g = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
        results = rdFingerprintGenerator.GetCountFPs(
            [m1, m2, m3], rdFingerprintGenerator.TopologicalTorsionFP)
        self.assertEqual(results[0], g.GetCountFingerprint(m1))
        self.assertEqual(results[1], g.GetCountFingerprint(m2))
        self.assertEqual(results[2], g.GetCountFingerprint(m3))
        self.assertEqual(len(results), 3)
示例#13
0
 def testMorganGenerator(self):
     m = Chem.MolFromSmiles('CCCCC')
     g = rdFingerprintGenerator.GetMorganGenerator(3)
     fp = g.GetSparseCountFingerprint(m)
     nz = fp.GetNonzeroElements()
     self.assertEqual(len(nz), 7)
示例#14
0
def fingerprint_molecules(mols):
    fps = [
        rdFingerprintGenerator.GetMorganGenerator().GetFingerprint(mol)
        for mol in mols
    ]
    return fps