def build_deltaFP(reactions): PerturbationFingerprints = [[ "Perturbation", "Reaction_SMILES", "ligandA_SMILES", "ligandB_SMILES", "Member_Similarity (Dice)", "Perturbation Fingerprint (256 bits)"]] for reaction_members in reactions: pert = str(reaction_members[0]) # take mol object from each member member1 = Chem.MolFromSmiles(reaction_members[2]) member2 = Chem.MolFromSmiles(reaction_members[3]) # create bitstring of 256 bits for each member. Max values between 1 and 3 FP1 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member1, 256, 1, 3)) FP2 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member2, 256, 1, 3)) similarity = DataStructs.DiceSimilarity(FP1, FP2) # subtract and return reaction FP (=deltaFP) as list deltaFP = np.array(list(FP2)) - np.array(list(FP1)) # print("Perturbation FP for " + pert +" is:") # print(deltaFP) # join all the data together into one list and append to output: result = reaction_members + ([str(similarity)]) + deltaFP.tolist() PerturbationFingerprints.append(result) print(str(reaction_members[0]) + ":") print(reaction_members[1]) print("##########") return PerturbationFingerprints
def calculateMol(self, m, smiles, internalParsing=False): counts = list( rd.GetHashedAtomPairFingerprint(m, minLength=self.minPathLen, maxLength=self.maxPathLen, nBits=self.nbits)) counts = [clip(x, smiles) for x in counts] return counts
def AtomPairFingerprint(molecule_smile): #dic={} ms=Chem.MolFromSmiles(molecule_smile) desc = rdMolDescriptors.GetHashedAtomPairFingerprint(ms) #int(desc.GetLength()) #for x in range(desc.GetLength()): # dic['itens']=desc.__getitem__(x) #arr = np.array(desc) for x in range(int(desc.GetLength())): atompair.append(desc.__getitem__(x))
def build_deltaFP(reactions): print("Building FPs and writing to CSV..") FP_column = np.arange(0, 256).tolist() FP_column = ["pfp" + str(item) for item in FP_column] PerturbationFingerprints = [ "Perturbation", "Reaction_SMILES", "fullmember1", "fullmember2", "Member_Similarity (Dice)", ] PerturbationFingerprints = [PerturbationFingerprints + FP_column] for reaction_members in reactions: pert = str(reaction_members[0]) # deconstruct reaction smiles back into members: head, sep, tail = reaction_members[1].partition(">>") # take mol object from each member, retain hydrogens and override valency discrepancies member1 = Chem.MolFromSmiles(head, sanitize=False) member2 = Chem.MolFromSmiles(tail, sanitize=False) member1.UpdatePropertyCache(strict=False) member2.UpdatePropertyCache(strict=False) # create bitstring of 256 bits for each member. FP1 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member1, 256)) FP2 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member2, 256)) similarity = DataStructs.DiceSimilarity(FP1, FP2) # subtract and return reaction FP (=deltaFP) as list deltaFP = np.array(list(FP2)) - np.array(list(FP1)) # print("Perturbation FP for " + pert +" (" + str(reaction_members[1]) + ") is:") # print(deltaFP) # join all the data together into one list and append to output: result = reaction_members + ([str(similarity)]) + deltaFP.tolist() PerturbationFingerprints.append(result) # print("##########################################################################") return PerturbationFingerprints
def testHashedAtomPairs(self): m = Chem.MolFromSmiles('c1ccccc1') fp1 = rdMD.GetHashedAtomPairFingerprint(m, 2048) fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 3) self.assertTrue(fp1 == fp2) fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 2) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0) m = Chem.MolFromSmiles('c1ccccn1') fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0) m = Chem.MolFromSmiles('c1ccccc1') fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048) m = Chem.MolFromSmiles('c1ccccn1') fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048) sim = DataStructs.DiceSimilarity(fp1, fp2) self.assertTrue(sim > 0.0 and sim < 1.0)
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('cannot convert Mol from SMILES %s' % x_) if self.counting: return count_fp(rdMol.GetHashedAtomPairFingerprint(x, nBits=self.n_bits), dim=self.n_bits) else: return list(rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits, nBitsPerEntry=self.bit_per_entry))
Parameters: probeMol -- the probe molecule fpFunction -- the fingerprint function predictionFunction -- the prediction function of the ML model kwargs -- additional arguments for drawing """ weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction) weights, maxWeight = GetStandardizedWeights(weights) fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs) return fig, maxWeight apDict = {} apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia) apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry) def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4): """ Calculates the atom pairs fingerprint with the torsions of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the pairs for (if -1, no pair is removed) fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each pair
def calculateMol(self, m, smiles, internalParsing=False): return clip_sparse(rd.GetHashedAtomPairFingerprint(m, minLength=self.minPathLen, maxLength=self.maxPathLen, nBits=self.nbits), self.nbits)