def computeFP(self, typeFP): from rdkit.Chem.Fingerprints import FingerprintMols from rdkit.Chem import MACCSkeys from rdkit.Chem.AtomPairs import Pairs, Torsions from rdkit.Chem import AllChem if not "smiclean" in self.__dict__: self.log = self.log + "No smiles prepared\n" return 1 else: self.mol = Chem.MolFromSmiles(self.smiclean) #print self.smiclean dFP = {} if typeFP == "Mol" or typeFP == "All": dFP["Mol"] = FingerprintMols.FingerprintMol(self.mol) if typeFP == "MACCS" or typeFP == "All": dFP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol) if typeFP == "pairs" or typeFP == "All": dFP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol) if typeFP == "Torsion" or typeFP == "All": dFP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint( self.mol) if typeFP == "Morgan" or typeFP == "All": dFP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2) self.FP = dFP return 0
def testGetTopologicalTorsionFingerprintAsIds(self): mol = Chem.MolFromSmiles('C1CCCCN1') tt = Torsions.GetTopologicalTorsionFingerprint(mol) self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2}) tt = Torsions.GetTopologicalTorsionFingerprintAsIds(mol) self.assertEqual( sorted(tt), [4437590049, 4437590049, 4445978657, 4445978657, 8732557345, 8732557345]) tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol) self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2})
def getCountInfo(m, fpType): # m = Chem.MolFromSmiles(formula) fp = None if fpType == 'AtomPair' or fpType.lower() == 'atom': fp = Pairs.GetAtomPairFingerprint(m) return fp.GetNonzeroElements() elif fpType.lower() == 'morgan' or fpType.lower() == 'circular': fp = AllChem.GetMorganFingerprint(m, 2) return fp.GetNonzeroElements() elif fpType == 'Topological' or fpType.lower() == 'topo': fp = Torsions.GetTopologicalTorsionFingerprint(m) Dict = fp.GetNonzeroElements() convertedDict = {} for elem in Dict: convertedDict[int(elem)] = Dict[elem] return convertedDict
def Fingerprints(mols, fingerprint): # Indigo fingerprints if fingerprint in indigofps: return [mol.fingerprint(fingerprint) for mol in mols] # RDKit fingerprints if fingerprint in rdkitfps: if fingerprint == "atompair": return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols] elif fingerprint == "avalon": return [pyAvalonTools.GetAvalonFP(mol) for mol in mols] elif fingerprint == "daylight": return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols] elif fingerprint == "maccs": return [MACCSkeys.GenMACCSKeys(mol) for mol in mols] elif fingerprint == "morgan": return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)) for mol in mols] elif fingerprint == "pharm2d": return [ Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory) for mol in mols ] elif fingerprint == "topological": return [FingerprintMols.FingerprintMol(mol) for mol in mols] # RDKit non-bit (integer or float) fingerprints if fingerprint in rdkitnonbitfps: if fingerprint == "sheridan": return [Sheridan.GetBPFingerprint(mol) for mol in mols] elif fingerprint == "topotorsion": return [ Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols ] # E-state fingerprints if fingerprint in rdkitestatefps: if fingerprint == "estate1": return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols] elif fingerprint == "estate2": return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols] # unknown fingerprint return None
def computeFP(self, typeFP): if not "mol" in self.__dict__: self.log = self.log + "No smiles prepared\n" self.err = 1 else: d_FP = {} if typeFP == "Mol" or typeFP == "All": d_FP["Mol"] = FingerprintMols.FingerprintMol(self.mol) if typeFP == "MACCS" or typeFP == "All": d_FP["MACCS"] = MACCSkeys.GenMACCSKeys(self.mol) if typeFP == "pairs" or typeFP == "All": d_FP["pairs"] = Pairs.GetAtomPairFingerprint(self.mol) if typeFP == "Torsion" or typeFP == "All": d_FP["Torsion"] = Torsions.GetTopologicalTorsionFingerprint(self.mol) if typeFP == "Morgan" or typeFP == "All": d_FP["Morgan"] = AllChem.GetMorganFingerprint(self.mol, 2) self.d_FP = d_FP
def CalculateTopologicalTorsionFingerprint( mol: Chem.Mol, rtype: str = 'countstring', bits: int = 2048) -> Tuple[str, dict, Any]: """Calculate Topological Torsion fingerprints. :param rtype: Type of output, may either be: countstring (default), returns a binary string rdkit, return the native rdkit DataStructs dict, for a dict of bits turned on :param bits: Number of folded bits (ignored if rtype != 'countstring') """ res = Torsions.GetTopologicalTorsionFingerprint(mol) if rtype == 'rdkit': return res counts = res.GetNonzeroElements() if rtype == 'dict': return {f'TopolTorsions_{k}': v for k, v in counts.items()} folded = np.zeros(bits) for k, v in counts.items(): folded[k % bits] += v return ';'.join(folded.tolist())
def CalculateTopologicalTorsionFingerprint(mol): """ ################################################################# Calculate Topological Torsion Fingerprints Usage: result=CalculateTopologicalTorsionFingerprint(mol) Input: mol is a molecule object. Output: result is a tuple form. The first is the number of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = Torsions.GetTopologicalTorsionFingerprint(mol) return res.GetLength(), res.GetNonzeroElements(), res
def calculate_similarity_vector(smile_pair): """ Calculate fingerprints between two smile terms using different fingerprinters, and use different similarity metrics to calculate the difference between those fingerprints. """ # smile1, smile2 = smile_pair.split('_') smile1, smile2 = smile_pair mol1 = Chem.MolFromSmiles(smile1) mol2 = Chem.MolFromSmiles(smile2) molecule_similarity = list() # RDK topological fingerprint for a molecule fp1 = Chem.RDKFingerprint(mol1) fp2 = Chem.RDKFingerprint(mol2) molecule_similarity.extend(get_similarity_all(fp1, fp2)) #print 'RDK fingerprint: ', DataStructs.KulczynskiSimilarity(fp1,fp2) ## LayeredFingerprint, a fingerprint using SMARTS patterns #fp1 = Chem.LayeredFingerprint(mol1) #fp2 = Chem.LayeredFingerprint(mol2) #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2) # PatternFingerprint, a fingerprint using SMARTS patterns #fp1 = Chem.PatternFingerprint(mol1) #fp2 = Chem.PatternFingerprint(mol2) #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2) ############################################################################### # Topological Fingerprints # Uses Chem.RDKFingerprint internally, but with different parameters, I guess... # http://www.rdkit.org/docs/GettingStartedInPython.html#topological-fingerprints from rdkit.Chem.Fingerprints import FingerprintMols fp1 = FingerprintMols.FingerprintMol(mol1) fp2 = FingerprintMols.FingerprintMol(mol2) molecule_similarity.extend(get_similarity_all(fp1, fp2)) #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2) ############################################################################### # MACCS Keys # There is a SMARTS-based implementation of the 166 public MACCS keys. # http://www.rdkit.org/docs/GettingStartedInPython.html#maccs-keys from rdkit.Chem import MACCSkeys fp1 = MACCSkeys.GenMACCSKeys(mol1) fp2 = MACCSkeys.GenMACCSKeys(mol2) molecule_similarity.extend(get_similarity_all(fp1, fp2)) #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2) ############################################################################### # Atom Pairs and Topological Torsions # Atom-pair descriptors [3] are available in several different forms. # The standard form is as fingerprint including counts for each bit instead of just zeros and ones: # http://www.rdkit.org/docs/GettingStartedInPython.html#atom-pairs-and-topological-torsions from rdkit.Chem.AtomPairs import Pairs fp1 = Pairs.GetAtomPairFingerprintAsBitVect(mol1) fp2 = Pairs.GetAtomPairFingerprintAsBitVect(mol2) molecule_similarity.extend(get_similarity_all(fp1, fp2)) #print "RDK fingerprint: ", DataStructs.DiceSimilarity(fp1,fp2) from rdkit.Chem.AtomPairs import Torsions fp1 = Torsions.GetTopologicalTorsionFingerprint(mol1) fp2 = Torsions.GetTopologicalTorsionFingerprint(mol2) molecule_similarity.extend(get_similarity_subset(fp1, fp2)) #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2) ############################################################################### # Morgan Fingerprints (Circular Fingerprints) #This family of fingerprints, better known as circular fingerprints [5], #is built by applying the Morgan algorithm to a set of user-supplied atom invariants. #When generating Morgan fingerprints, the radius of the fingerprint must also be provided... # http://www.rdkit.org/docs/GettingStartedInPython.html#morgan-fingerprints-circular-fingerprints from rdkit.Chem import rdMolDescriptors fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2) fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2) molecule_similarity.extend(get_similarity_subset(fp1, fp2)) fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2, useFeatures=True) fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2, useFeatures=True) molecule_similarity.extend(get_similarity_subset(fp1, fp2)) #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2) ############################################################################### return molecule_similarity