Exemplo n.º 1
0
 def testRootedAtomPairs(self):
     m = Chem.MolFromSmiles('Oc1ccccc1')
     fp1 = rdMD.GetAtomPairFingerprint(m)
     fp2 = rdMD.GetAtomPairFingerprint(m, fromAtoms=(0, ))
     nz1 = fp1.GetNonzeroElements()
     nz2 = fp2.GetNonzeroElements()
     for k, v in nz2.items():
         self.assertTrue(v <= nz1[k])
Exemplo n.º 2
0
    def testAtomPairs(self):
        m = Chem.MolFromSmiles('CCC')
        fp1 = rdMD.GetAtomPairFingerprint(m)
        fp2 = rdMD.GetAtomPairFingerprint(m, minLength=1, maxLength=2)
        nz1 = fp1.GetNonzeroElements()
        self.assertEqual(len(nz1), 2)
        nz2 = fp2.GetNonzeroElements()
        self.assertEqual(len(nz2), 2)

        fp2 = rdMD.GetAtomPairFingerprint(m, minLength=1, maxLength=1)
        nz2 = fp2.GetNonzeroElements()
        self.assertEqual(len(nz2), 1)
Exemplo n.º 3
0
    def testAtomPairTypesChirality(self):
        mols = [
            Chem.MolFromSmiles(x)
            for x in ("CC(F)Cl", "C[C@@H](F)Cl", "C[C@H](F)Cl")
        ]
        self.assertEqual(rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1)),
                         rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1)))
        self.assertEqual(rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1)),
                         rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1)))
        self.assertEqual(
            rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1),
                                     includeChirality=True),
            rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1)))
        self.assertNotEqual(
            rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1),
                                     includeChirality=True),
            rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1),
                                     includeChirality=True))
        self.assertNotEqual(
            rdMD.GetAtomPairAtomCode(mols[0].GetAtomWithIdx(1),
                                     includeChirality=True),
            rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1),
                                     includeChirality=True))
        self.assertNotEqual(
            rdMD.GetAtomPairAtomCode(mols[1].GetAtomWithIdx(1),
                                     includeChirality=True),
            rdMD.GetAtomPairAtomCode(mols[2].GetAtomWithIdx(1),
                                     includeChirality=True))

        fps = [rdMD.GetAtomPairFingerprint(x) for x in mols]
        chiralFps = [
            rdMD.GetAtomPairFingerprint(x, includeChirality=True) for x in mols
        ]
        for mol, fp, cfp in zip(mols, fps, chiralFps):
            ac0 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(0))
            ac1 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(1))
            self.assertTrue(
                rdMD.GetAtomPairCode(ac0, ac1, 1) in fp.GetNonzeroElements())
            ac0 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(0),
                                           includeChirality=True)
            ac1 = rdMD.GetAtomPairAtomCode(mol.GetAtomWithIdx(1),
                                           includeChirality=True)
            self.assertFalse(
                rdMD.GetAtomPairCode(ac0, ac1, 1, includeChirality=True) in
                fp.GetNonzeroElements())
            self.assertTrue(
                rdMD.GetAtomPairCode(ac0, ac1, 1, includeChirality=True) in
                cfp.GetNonzeroElements())
Exemplo n.º 4
0
def GetAtomPairFingerprintAsBitVect(mol):
    """ Returns the Atom-pair fingerprint for a molecule as
  a SparseBitVect. Note that this doesn't match the standard
  definition of atom pairs, which uses counts of the
  pairs, not just their presence.

  **Arguments**:

    - mol: a molecule

  **Returns**: a SparseBitVect

  >>> m = Chem.MolFromSmiles('CCC')
  >>> v = [ pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(1),1),
  ...       pyScorePair(m.GetAtomWithIdx(0),m.GetAtomWithIdx(2),2),
  ...     ]
  >>> v.sort()
  >>> fp = GetAtomPairFingerprintAsBitVect(m)
  >>> list(fp.GetOnBits())==v
  True
  
  """
    res = DataStructs.SparseBitVect(fpLen)
    fp = rdMolDescriptors.GetAtomPairFingerprint(mol)
    for val in fp.GetNonzeroElements().keys():
        res.SetBit(val)
    return res
Exemplo n.º 5
0
def GenerateAtomPairsFingerprints(Mols):
    """Generate AtomPairs fingerprints."""

    MiscUtil.PrintInfo("\nGenerating AtomPairs fingerprints...")

    MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"]
    MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"]
    UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"][
        "UseChirality"]

    if OptionsInfo["GenerateBitVectFingerints"]:
        # Generate ExplicitBitVect fingerprints...
        FPSize = 2048
        BitsPerHash = 4
        MolsFingerprints = [
            rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
                Mol,
                minLength=MinLength,
                maxLength=MaxLength,
                includeChirality=UseChirality,
                nBits=FPSize,
                nBitsPerEntry=BitsPerHash) for Mol in Mols
        ]
    else:
        # Generate IntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetAtomPairFingerprint(
                Mol,
                minLength=MinLength,
                maxLength=MaxLength,
                includeChirality=UseChirality) for Mol in Mols
        ]

    return MolsFingerprints
Exemplo n.º 6
0
 def testPairValues(self):
   import base64
   testD=(('CCCO',b'AQAAAAQAAAAAAIAABgAAACGECAABAAAAIoQIAAEAAABBhAgAAQAAACNEGAABAAAAQUQYAAEAAABC\nRBgAAQAAAA==\n'),
          ('CNc1ccco1',b'AQAAAAQAAAAAAIAAEAAAACOECgABAAAAJIQKAAIAAABBhQoAAgAAAEKFCgABAAAAIsQKAAEAAABB\nxQoAAQAAAELFCgACAAAAIYQQAAEAAABChRAAAQAAAEOFEAACAAAAYYUQAAEAAAAjhBoAAQAAAEGF\nGgABAAAAQoUaAAIAAABhhRoAAQAAAEKIGgABAAAA\n'),
          )
   for smi,txt in testD:
     pkl = base64.decodestring(txt)
     fp = rdMD.GetAtomPairFingerprint(Chem.MolFromSmiles(smi))
     fp2 = DataStructs.IntSparseIntVect(pkl)
     self.assertEqual(DataStructs.DiceSimilarity(fp,fp2),1.0)
     self.assertEqual(fp,fp2)
Exemplo n.º 7
0
  def testAtomPairOptions(self):
    m1 = Chem.MolFromSmiles('c1ccccc1')
    m2 = Chem.MolFromSmiles('c1ccccn1')

    fp1 = rdMD.GetAtomPairFingerprint(m1)
    fp2 = rdMD.GetAtomPairFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)
def GenerateAtomPairsFingerprints(Mols):
    """Generate AtomPairs fingerprints."""

    MiscUtil.PrintInfo("\nGenerating AtomPairs %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"])
    
    MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"]
    MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"]
    UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"]["UseChirality"]
    FPSize = OptionsInfo["FingerprintsParams"]["AtomPairs"]["FPSize"]
    BitsPerHash = OptionsInfo["FingerprintsParams"]["AtomPairs"]["BitsPerHash"]

    if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I):
        # Generate ExplicitBitVect fingerprints...
        MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash))
        MolsFingerprints = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols]
    else:
        # Generate IntSparseIntVect fingerprints...
        MolsFingerprints = [rdMolDescriptors.GetAtomPairFingerprint(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality) for Mol in Mols]

    return MolsFingerprints
Exemplo n.º 9
0
  and fingerprint function.

  Parameters:
    probeMol -- the probe molecule
    fpFunction -- the fingerprint function
    predictionFunction -- the prediction function of the ML model
    kwargs -- additional arguments for drawing
  """
  weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction)
  weights, maxWeight = GetStandardizedWeights(weights)
  fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs)
  return fig, maxWeight
  

apDict = {}
apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia)

# usage:   lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry)
def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4):
  """
  Calculates the atom pairs fingerprint with the torsions of atomId removed.

  Parameters:
    mol -- the molecule of interest
    atomId -- the atom to remove the pairs for (if -1, no pair is removed)
    fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
    nBits -- the size of the bit vector (only for fpType='bv')
    minLength -- the minimum path length for an atom pair
    maxLength -- the maxmimum path length for an atom pair
Exemplo n.º 10
0
 def calculateMol(self, m, smiles, internalParsing=False):
     return list(rd.GetAtomPairFingerprint(m, minLength=self.minPathLen, 
                                                   maxLength=self.maxPathLen, nBits=self.nbits))
Exemplo n.º 11
0
embed_graph = graph.values

#molecular fingerprint
#https://www.rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf
finger_mqn = []
finger_morgan = []
finger_maccs = []
finger_ap = []

for i in smiles:
    mol = AllChem.MolFromSmiles(i)

    finger_mqn.append(np.array(Descriptors.MQNs_(mol)))
    finger_maccs.append(np.array(Descriptors.GetMACCSKeysFingerprint((mol))))
    #finger_morgan.append(np.array(Descriptors.GetMorganFingerprint((mol))))
    finger_ap.append(np.array(Descriptors.GetAtomPairFingerprint((mol))))

###
names = 'vec_spec,vec_smiles,embed_fn,finger_mqn,finger_maccs,finger_ap,embed_graph'.split(
    ',')
data = [
    vec_spec, vec_smiles, embed_fn, finger_mqn, finger_maccs, finger_ap,
    embed_graph
]
counter = 0
for i in data:
    try:
        res = do_pca(i)
        plt.scatter(res[0], res[1], label=counter, alpha=.4)
        plt.savefig('figs/pca_%s.pdf' % names[counter])
        plt.clf()