Пример #1
0
    def computeFP(self, typeFP):

        from rdkit.Chem.Fingerprints import FingerprintMols
        from rdkit.Chem import MACCSkeys
        from rdkit.Chem.AtomPairs import Pairs, Torsions
        from rdkit.Chem import AllChem

        if not "smiclean" in self.__dict__:
            self.log = self.log + "No smiles prepared\n"
            return 1
        else:
            self.mol = Chem.MolFromSmiles(self.smiclean)
            #print self.smiclean

        dFP = {}
        if typeFP == "Mol" or typeFP == "All":
            dFP["Mol"] = FingerprintMols.FingerprintMol(self.mol)
        if typeFP == "MACCS" or typeFP == "All":
            dFP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol)
        if typeFP == "pairs" or typeFP == "All":
            dFP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol)
        if typeFP == "Torsion" or typeFP == "All":
            dFP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint(
                self.mol)
        if typeFP == "Morgan" or typeFP == "All":
            dFP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2)

        self.FP = dFP
        return 0
Пример #2
0
 def testGetTopologicalTorsionFingerprintAsIds(self):
   mol = Chem.MolFromSmiles('C1CCCCN1')
   tt = Torsions.GetTopologicalTorsionFingerprint(mol)
   self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2})
   tt = Torsions.GetTopologicalTorsionFingerprintAsIds(mol)
   self.assertEqual(
     sorted(tt), [4437590049, 4437590049, 4445978657, 4445978657, 8732557345, 8732557345])
   tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
   self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2})
Пример #3
0
def getCountInfo(m, fpType):
    #     m = Chem.MolFromSmiles(formula)
    fp = None
    if fpType == 'AtomPair' or fpType.lower() == 'atom':
        fp = Pairs.GetAtomPairFingerprint(m)
        return fp.GetNonzeroElements()
    elif fpType.lower() == 'morgan' or fpType.lower() == 'circular':
        fp = AllChem.GetMorganFingerprint(m, 2)
        return fp.GetNonzeroElements()
    elif fpType == 'Topological' or fpType.lower() == 'topo':
        fp = Torsions.GetTopologicalTorsionFingerprint(m)
        Dict = fp.GetNonzeroElements()
        convertedDict = {}
        for elem in Dict:
            convertedDict[int(elem)] = Dict[elem]
        return convertedDict
Пример #4
0
def Fingerprints(mols, fingerprint):

    # Indigo fingerprints
    if fingerprint in indigofps:
        return [mol.fingerprint(fingerprint) for mol in mols]

    # RDKit fingerprints
    if fingerprint in rdkitfps:
        if fingerprint == "atompair":
            return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols]
        elif fingerprint == "avalon":
            return [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
        elif fingerprint == "daylight":
            return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols]
        elif fingerprint == "maccs":
            return [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
        elif fingerprint == "morgan":
            return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
                    for mol in mols]
        elif fingerprint == "pharm2d":
            return [
                Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
                for mol in mols
            ]
        elif fingerprint == "topological":
            return [FingerprintMols.FingerprintMol(mol) for mol in mols]

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps:
        if fingerprint == "sheridan":
            return [Sheridan.GetBPFingerprint(mol) for mol in mols]
        elif fingerprint == "topotorsion":
            return [
                Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols
            ]

    # E-state fingerprints
    if fingerprint in rdkitestatefps:
        if fingerprint == "estate1":
            return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols]
        elif fingerprint == "estate2":
            return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols]

    # unknown fingerprint
    return None
Пример #5
0
    def computeFP(self, typeFP):

        if not "mol" in self.__dict__:
            self.log = self.log + "No smiles prepared\n"
            self.err = 1
        else:
            d_FP = {}
            if typeFP == "Mol" or typeFP == "All":
                d_FP["Mol"] = FingerprintMols.FingerprintMol(self.mol)
            if typeFP == "MACCS" or typeFP == "All":
                d_FP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol)
            if typeFP == "pairs" or typeFP == "All":
                d_FP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol)
            if typeFP == "Torsion" or typeFP == "All":
                d_FP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint(self.mol)
            if typeFP == "Morgan" or typeFP == "All":
                d_FP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2)
            
            self.d_FP = d_FP
Пример #6
0
def CalculateTopologicalTorsionFingerprint(
        mol: Chem.Mol,
        rtype: str = 'countstring',
        bits: int = 2048) -> Tuple[str, dict, Any]:
    """Calculate Topological Torsion fingerprints.

    :param rtype: Type of output, may either be:
                  countstring (default), returns a binary string
                  rdkit, return the native rdkit DataStructs
                  dict, for a dict of bits turned on
    :param bits: Number of folded bits (ignored if rtype != 'countstring')
    """
    res = Torsions.GetTopologicalTorsionFingerprint(mol)
    if rtype == 'rdkit':
        return res
    counts = res.GetNonzeroElements()
    if rtype == 'dict':
        return {f'TopolTorsions_{k}': v for k, v in counts.items()}
    folded = np.zeros(bits)
    for k, v in counts.items():
        folded[k % bits] += v
    return ';'.join(folded.tolist())
Пример #7
0
def CalculateTopologicalTorsionFingerprint(mol):
    """
    #################################################################
    Calculate Topological Torsion Fingerprints

    Usage:

        result=CalculateTopologicalTorsionFingerprint(mol)

        Input: mol is a molecule object.

        Output: result is a tuple form. The first is the number of

        fingerprints. The second is a dict form whose keys are the

        position which this molecule has some substructure. The third

        is the DataStructs which is used for calculating the similarity.
    #################################################################
    """
    res = Torsions.GetTopologicalTorsionFingerprint(mol)

    return res.GetLength(), res.GetNonzeroElements(), res
def calculate_similarity_vector(smile_pair):
    """
    Calculate fingerprints between two smile terms using different fingerprinters,
    and use different similarity metrics to calculate the difference between those fingerprints.
    """
    #    smile1, smile2 = smile_pair.split('_')
    smile1, smile2 = smile_pair

    mol1 = Chem.MolFromSmiles(smile1)
    mol2 = Chem.MolFromSmiles(smile2)

    molecule_similarity = list()

    # RDK topological fingerprint for a molecule
    fp1 = Chem.RDKFingerprint(mol1)
    fp2 = Chem.RDKFingerprint(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.KulczynskiSimilarity(fp1,fp2)

    ## LayeredFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.LayeredFingerprint(mol1)
    #fp2 = Chem.LayeredFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    # PatternFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.PatternFingerprint(mol1)
    #fp2 = Chem.PatternFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Topological Fingerprints
    # Uses Chem.RDKFingerprint internally, but with different parameters, I guess...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#topological-fingerprints
    from rdkit.Chem.Fingerprints import FingerprintMols
    fp1 = FingerprintMols.FingerprintMol(mol1)
    fp2 = FingerprintMols.FingerprintMol(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # MACCS Keys
    # There is a SMARTS-based implementation of the 166 public MACCS keys.
    # http://www.rdkit.org/docs/GettingStartedInPython.html#maccs-keys
    from rdkit.Chem import MACCSkeys
    fp1 = MACCSkeys.GenMACCSKeys(mol1)
    fp2 = MACCSkeys.GenMACCSKeys(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Atom Pairs and Topological Torsions
    # Atom-pair descriptors [3] are available in several different forms.
    # The standard form is as fingerprint including counts for each bit instead of just zeros and ones:
    # http://www.rdkit.org/docs/GettingStartedInPython.html#atom-pairs-and-topological-torsions
    from rdkit.Chem.AtomPairs import Pairs
    fp1 = Pairs.GetAtomPairFingerprintAsBitVect(mol1)
    fp2 = Pairs.GetAtomPairFingerprintAsBitVect(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.DiceSimilarity(fp1,fp2)
    from rdkit.Chem.AtomPairs import Torsions
    fp1 = Torsions.GetTopologicalTorsionFingerprint(mol1)
    fp2 = Torsions.GetTopologicalTorsionFingerprint(mol2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Morgan Fingerprints (Circular Fingerprints)
    #This family of fingerprints, better known as circular fingerprints [5],
    #is built by applying the Morgan algorithm to a set of user-supplied atom invariants.
    #When generating Morgan fingerprints, the radius of the fingerprint must also be provided...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#morgan-fingerprints-circular-fingerprints
    from rdkit.Chem import rdMolDescriptors
    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2, useFeatures=True)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2, useFeatures=True)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    return molecule_similarity