def lipinski_rule(mol): fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) return [ Lipinski.NHOHCount(mol) <= 5, Lipinski.NOCount(mol) <= 10, Descriptors.ExactMolWt(mol) <= 500, LogP('logP').run(fingerprint) <= 5]
def GenerateAtomPairsFingerprints(Mols): """Generate AtomPairs fingerprints.""" MiscUtil.PrintInfo("\nGenerating AtomPairs fingerprints...") MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"] MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"] UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"][ "UseChirality"] if OptionsInfo["GenerateBitVectFingerints"]: # Generate ExplicitBitVect fingerprints... FPSize = 2048 BitsPerHash = 4 MolsFingerprints = [ rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( Mol, minLength=MinLength, maxLength=MaxLength, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate IntSparseIntVect fingerprints... MolsFingerprints = [ rdMolDescriptors.GetAtomPairFingerprint( Mol, minLength=MinLength, maxLength=MaxLength, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def __init__(self, fp_type, fp_bits=2048): """ :param fp_type: fingerprint type :param fp_bits: number of fingerprint bits """ self.fp_type = fp_type self.fp_dict = {} self.fp_dict['morgan2'] = [ lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 2, nBits=fp_bits), fp_bits ] self.fp_dict['morgan3'] = [ lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 3, nBits=fp_bits), fp_bits ] self.fp_dict['ap'] = [ lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect( m, nBits=fp_bits), fp_bits ] self.fp_dict['rdk5'] = [ lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=fp_bits, nBitsPerHash=2), fp_bits ] if self.fp_dict.get(fp_type): self.fp_function = self.fp_dict[fp_type] else: print("invalid fingerprint type: %s" % fp_type) sys.exit(0)
def pka_similarities(smile, mol_set, n): mol = Chem.MolFromSmiles(smile) mol_fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) similarity = [] for molecule in mol_set: sim = DataStructs.DiceSimilarity(mol_fp, molecule[2]) similarity.append([sim, molecule[1]]) return np.asarray(sorted(similarity)[:n]).flatten()
def get_atompairs(molecule, length=512): try: atompairs = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( molecule, nBits=length) except Exception as e: print(e) print("error" + str(molecule)) atompairs = np.nan return atompairs
def testHashedAtomPairs(self): m = Chem.MolFromSmiles('c1ccccc1') fp1 = rdMD.GetHashedAtomPairFingerprint(m, 2048) fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 3) self.assertTrue(fp1 == fp2) fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 2) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0) m = Chem.MolFromSmiles('c1ccccn1') fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0) m = Chem.MolFromSmiles('c1ccccc1') fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048) m = Chem.MolFromSmiles('c1ccccn1') fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0)
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) return list( rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits))
def _encode(smi: str, fingerprint: str, radius: int, length: int) -> T_comp: """fingerprint functions must be wrapped in a static function so that they may be pickled for parallel processing Parameters ---------- smi : str the SMILES string of the molecule to encode fingerprint : str the the type of fingerprint to generate radius : int the radius of the fingerprint length : int the length of the fingerprint Returns ------- T_comp the compressed feature representation of the molecule """ mol = Chem.MolFromSmiles(smi) if fingerprint == 'morgan': return rdmd.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=length, useChirality=True) if fingerprint == 'pair': return rdmd.GetHashedAtomPairFingerprintAsBitVect(mol, minLength=1, maxLength=1 + radius, nBits=length) if fingerprint == 'rdkit': return rdmd.RDKFingerprint(mol, minPath=1, maxPath=1 + radius, fpSize=length) if fingerprint == 'maccs': return rdmd.GetMACCSKeysFingerprint(mol) if fingerprint == 'map4': return map4.MAP4Calculator(dimensions=length, radius=radius, is_folded=True).calculate(mol) raise NotImplementedError(f'Unrecognized fingerprint: "{fingerprint}"')
def predict(self, mol, selected_descriptors): options = [0, 0, 0, 0, 0] return_properties = {} for option in selected_descriptors: if option == 'logP': options[0] = 1 elif option == 'sol': options[0] = 1 options[1] = 1 elif option == 'mp': options[0] = 1 options[1] = 1 options[2] = 1 elif option == 'pka': options[3] = 1 elif option == 'mol_wt': options[4] = 1 fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) if options[0]: logP = self.logP_model.run(fp) return_properties['logP'] = logP if options[1]: logP_sol = self.logP_solubility_model.run(logP) atom_pair_sol = self.atom_pair_sol_model.run(fp) combined_sol = self.combined_model.run(mol, logP, logP_sol, atom_pair_sol) mg_ml_sol = logs_to_mg_ml(combined_sol, mol) return_properties['sol'] = mg_ml_sol if options[2]: mp = self.melting_point_model.run(combined_sol, logP) return_properties['mp'] = mp if options[3]: avalon = GetAvalonFP(mol) maacs = MACCSkeys.GenMACCSKeys(mol) pka = self.pKa_model.run(avalon + maacs + fp) return_properties['pka'] = pka if options[4]: wt = rdMolDescriptors.CalcExactMolWt(mol) return_properties['mol_wt'] = wt return return_properties
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.counting: return count_fp(rdMol.GetHashedAtomPairFingerprint(x, nBits=self.n_bits), dim=self.n_bits) else: return list(rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits, nBitsPerEntry=self.bit_per_entry))
def testAtomPairOptions(self): m1 = Chem.MolFromSmiles('c1ccccc1') m2 = Chem.MolFromSmiles('c1ccccn1') fp1 = rdMD.GetAtomPairFingerprint(m1) fp2 = rdMD.GetAtomPairFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2)
def GenerateAtomPairsFingerprints(Mols): """Generate AtomPairs fingerprints.""" MiscUtil.PrintInfo("\nGenerating AtomPairs %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"] MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"] UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"]["UseChirality"] FPSize = OptionsInfo["FingerprintsParams"]["AtomPairs"]["FPSize"] BitsPerHash = OptionsInfo["FingerprintsParams"]["AtomPairs"]["BitsPerHash"] if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): # Generate ExplicitBitVect fingerprints... MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) MolsFingerprints = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols] else: # Generate IntSparseIntVect fingerprints... MolsFingerprints = [rdMolDescriptors.GetAtomPairFingerprint(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality) for Mol in Mols] return MolsFingerprints
def __init__(self, fp_type_list, num_fp_bits=1024): self.num_fp_bits = num_fp_bits self.fp_function_list = [] self.fp_type_list = fp_type_list self.fp_dict = {} self.des_names = [name[0] for name in Descriptors._descList] des_calculator = MoleculeDescriptors.MolecularDescriptorCalculator( self.des_names) self.fp_dict['descriptors'] = [ lambda m: des_calculator.CalcDescriptors(m), -1 ] self.fp_dict['morgan2'] = [ lambda m: rdmd.GetMorganFingerprintAsBitVect( m, 2, nBits=self.num_fp_bits), self.num_fp_bits ] self.fp_dict['morgan3'] = [ lambda m: rdmd.GetMorganFingerprintAsBitVect( m, 3, nBits=self.num_fp_bits), self.num_fp_bits ] self.fp_dict['ap'] = [ lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect( m, nBits=self.num_fp_bits), self.num_fp_bits ] self.fp_dict['rdk5'] = [ lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=self.num_fp_bits, nBitsPerHash=2), self.num_fp_bits ] self.fp_names = [] for fp_type in fp_type_list: if self.fp_dict.get(fp_type): self.fp_function_list.append(self.fp_dict[fp_type]) if fp_type == "descriptors": self.fp_names += self.des_names else: self.fp_names += self.get_names(self.num_fp_bits) else: print("invalid fingerprint type: %s" % fp_type) sys.exit(1)
from chemical_models import AtomPairSolubility, LogP, LogPSolubility data = open('data/water_solubility/aqsol.txt', 'r') logP_model = LogP('logP') logP_solubility_model = LogPSolubility('logS_logP') atom_pair_sol_model = AtomPairSolubility('water_solubility') X = [] Y = [] for line in data.readlines(): split = line.split(' ') mol = Chem.MolFromSmiles(split[0]) fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) logP = logP_model.run(fingerprint) logP_sol = logP_solubility_model.run(logP) atom_pair_sol = atom_pair_sol_model.run(fingerprint) # Additional ESOL empirical model to increase accuracy mw = Descriptors.ExactMolWt(mol) rb = rdMolDescriptors.CalcNumRotatableBonds(mol) ap = len(mol.GetSubstructMatches( Chem.MolFromSmarts('[a]'))) / mol.GetNumHeavyAtoms() esol = 0.16 - 0.63 * logP - 0.0062 * mw + 0.066 * rb - 0.74 * ap X.append([logP_sol, atom_pair_sol, esol]) Y.append(float(split[1][:-1]))
# Usage: python3 run_model.py model_name.pkl scaler_name.pkl SMILE import pickle, sys import numpy as np from rdkit import Chem from rdkit.Chem import rdMolDescriptors model = pickle.load(open(sys.argv[1], 'rb')) scaler = pickle.load(open(sys.argv[2], 'rb')) compound = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( Chem.MolFromSmiles(sys.argv[3])) print(model.predict(scaler.transform(np.asarray(compound).reshape(1, -1))))
# Simple benzodiazepine classfication model based on similarity # to other benzodiazepines from rdkit import Chem from rdkit.Chem import rdMolDescriptors from rdkit import DataStructs from sklearn.model_selection import train_test_split data = open('data/benzodiazepine_activator/total_smiles.txt', 'r') molecules = [] for line in data.readlines(): compound = Chem.MolFromSmiles(line[:-1]) molecules.append( (rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(compound), compound)) def model(mol, active_mols): similarities = [] for active_mol in active_mols: similarities.append(DataStructs.DiceSimilarity(mol[0], active_mol[0])) return max(similarities) train_mols, test_mols = train_test_split(molecules, test_size=0.1, random_state=1) for test_mol in test_mols:
fpdict['ecfp4'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 2, nBits=nbits) fpdict['ecfp6'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 3, nBits=nbits) fpdict['fcfp2'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 1, useFeatures=True, nBits=nbits) fpdict['fcfp4'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 2, useFeatures=True, nBits=nbits) fpdict['fcfp6'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 3, useFeatures=True, nBits=nbits) fpdict['maccs'] = MACCSkeys.GenMACCSKeys(smiles) fpdict['ap'] = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( smiles, nBits=nbits) fpdict[ 'tt'] = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( smiles, nBits=nbits) fpdict['rdk5'] = Chem.RDKFingerprint(smiles, maxPath=5, fpSize=nbits, nBitsPerHash=2) fpdict['rdk6'] = Chem.RDKFingerprint(smiles, maxPath=6, fpSize=nbits, nBitsPerHash=2) fpdict['rdk7'] = Chem.RDKFingerprint(smiles, maxPath=7, fpSize=nbits, nBitsPerHash=2)
# Creates a png of a list of SMILES # Sorts the molecules by taking first molecule and then arranging the rest # from most to least similar to that molecule from rdkit.Chem import Draw from rdkit import Chem from rdkit import DataStructs from rdkit.Chem import rdMolDescriptors data = open('data/benzodiazepine_activator/total_smiles.txt', 'r') molecules = [] for line in data.readlines(): mol = Chem.MolFromSmiles(line[:-1]) combined = (rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol), mol) molecules.append(combined) sorted_mols = sorted( molecules[1:], key=lambda x: DataStructs.DiceSimilarity(molecules[0][0], x[0])) img = Draw.MolsToGridImage([x[1] for x in sorted_mols], molsPerRow=10, subImgSize=(200, 200)) img.save('molecules.png')
import rdkit from rdkit.Chem import AllChem from rdkit.Chem import MACCSkeys from rdkit.Chem import rdMolDescriptors import sys smi = Chem.SmilesMolSupplier(sys.argv[1], delimiter=',', titleLine=True) fps = [ AllChem.GetMorganFingerprintAsBitVect(x, 2, useBondTypes=False, nBits=1024) for x in smi ] ### ECFP4 fps2 = [ AllChem.GetMorganFingerprintAsBitVect(x, 1, useBondTypes=False, nBits=1024) for x in smi ] ### ECFP2 maccs = [MACCSkeys.GenMACCSKeys(x) for x in smi] dl = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(x) for x in smi] print("D1,D2,ECFP4,ECFP2,MACCS,DL,AVG,Weighted") seen = [] for i in range(len(fps)): d1 = smi[i].GetProp('_Name') for ii in range(len(fps)): d2 = smi[ii].GetProp('_Name') dist = DataStructs.FingerprintSimilarity(fps[i], fps[ii]) dist2 = DataStructs.FingerprintSimilarity(fps2[i], fps2[ii]) distMACCS = DataStructs.FingerprintSimilarity(maccs[i], maccs[ii]) distDL = DataStructs.FingerprintSimilarity(dl[i], dl[ii]) weightedavg = dist * .3 + dist2 * .3 + distDL * .3 + distMACCS * .1 avg = (dist + dist2 + distDL + distMACCS) / 4 print( str(d1) + "," + str(d2) + "," + str(dist) + "," + str(dist2) + "," + str(distMACCS) + "," + str(distDL) + "," + str(avg) + "," +
# dictionary fpFunc_dict = {} fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits) fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits) fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits) fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits) fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits) fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits) fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits) fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits) fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits) fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m) fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits) fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits) fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['tpatf'] = lambda m: get_tpatf(m) fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m) long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'} fps_to_generate = ['fcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashap', 'avalon', 'laval', 'rdk7'] ModFileName_LoadedModel_dict = {}
Parameters: probeMol -- the probe molecule fpFunction -- the fingerprint function predictionFunction -- the prediction function of the ML model kwargs -- additional arguments for drawing """ weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) weights, maxWeight = GetStandardizedWeights(weights) fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) return fig, maxWeight apDict = {} apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry) def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4): """ Calculates the atom pairs fingerprint with the torsions of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the pairs for (if -1, no pair is removed) fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each pair """
fpdict["lecfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect( m, 3, nBits=longbits ) fpdict["lfcfp4"] = lambda m: AllChem.GetMorganFingerprintAsBitVect( m, 2, useFeatures=True, nBits=longbits ) fpdict["lfcfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect( m, 3, useFeatures=True, nBits=longbits ) fpdict["maccs"] = lambda m: MACCSkeys.GenMACCSKeys(m) fpdict["ap"] = lambda m: Pairs.GetAtomPairFingerprint(m) fpdict["tt"] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m) fpdict[ "hashap" ] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( m, nBits=nbits ) fpdict[ "hashap_cas_length" ] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( m, nBits=n_cas_bits ) fpdict[ "hashtt" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=nbits ) fpdict[ "hashtt_cas_length" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=n_cas_bits
""" Predict the pKa of an acid from SMILES string Returns both the fingerprint model prediction and similarity model predictions """ from chemical_models import AcidSimilarity, AcidpKa from rdkit import Chem from rdkit.Chem import rdMolDescriptors, MACCSkeys from rdkit.Avalon.pyAvalonTools import GetAvalonFP import sys # Load models sim_model = AcidSimilarity('acid_sim') fp_model = AcidpKa('pKa_acid') # Set of acids required for similarity model acid_data = open('data/pKa/formatted_acidic.txt', 'r') acids = [] mol = Chem.MolFromSmiles(sys.argv[1]) # Read acids from file for line in acid_data.readlines(): split = line.split(' ') acids.append([split[0], float(split[1][:-1]), rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Chem.MolFromSmiles(split[0]))]) # Run the models and print results print("Similarity based model: " + str(sim_model.run(sys.argv[1], acids))) print("Fingerprint based model: " + str(fp_model.run(GetAvalonFP(mol) + MACCSkeys.GenMACCSKeys(mol) + rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol))))
def pair_fingerprinter(mol): fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( mol, nBits=fpSize, minLength=minLength, maxLength=maxLength) return _fp_to_bytes(fp)
import numpy, cPickle from rdkit import Chem, DataStructs from rdkit.Chem import rdMolDescriptors as rdmd # global variables num_act = 1528 num_dcy = 293606 num_rep = 50 num_percent = 0.1 # fingerprint dictionary fp_dict = {} fp_dict['morgan2'] = lambda m: rdmd.GetMorganFingerprintAsBitVect( m, 2, nBits=1024) fp_dict['ap'] = lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect( m, nBits=2048) fp_dict['rdk5'] = lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=2048, nBitsPerHash=2) def getNumpyFP(smiles, fpname, fptype): m = Chem.MolFromSmiles(smiles) if m is not None: # calculate fingerprint fp = fp_dict[fpname](m) # convert to numpy array if fptype == 'bool': arr = numpy.zeros((1, ), numpy.bool) elif fptype == 'float': arr = numpy.zeros((1, ), numpy.float32) else: