Exemplo n.º 1
0
def build_deltaFP(reactions):
    PerturbationFingerprints = [[
    "Perturbation", 
    "Reaction_SMILES", 
    "ligandA_SMILES", 
    "ligandB_SMILES", 
    "Member_Similarity (Dice)", 
    "Perturbation Fingerprint (256 bits)"]]

    for reaction_members in reactions:
        pert = str(reaction_members[0])
    # take mol object from each member
        member1 = Chem.MolFromSmiles(reaction_members[2])
        member2 = Chem.MolFromSmiles(reaction_members[3])

    # create bitstring of 256 bits for each member. Max values between 1 and 3
        FP1 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member1, 256, 1, 3))
        FP2 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member2, 256, 1, 3))
        similarity = DataStructs.DiceSimilarity(FP1, FP2)

    # subtract and return reaction FP (=deltaFP) as list
        deltaFP = np.array(list(FP2)) - np.array(list(FP1))
#        print("Perturbation FP for " + pert +" is:")
#        print(deltaFP)

    # join all the data together into one list and append to output:
        result = reaction_members + ([str(similarity)]) + deltaFP.tolist()
        PerturbationFingerprints.append(result)
        
        print(str(reaction_members[0]) + ":")
        print(reaction_members[1])
        print("##########")
    return PerturbationFingerprints
Exemplo n.º 2
0
 def calculateMol(self, m, smiles, internalParsing=False):
     counts = list(
         rd.GetHashedAtomPairFingerprint(m,
                                         minLength=self.minPathLen,
                                         maxLength=self.maxPathLen,
                                         nBits=self.nbits))
     counts = [clip(x, smiles) for x in counts]
     return counts
Exemplo n.º 3
0
def AtomPairFingerprint(molecule_smile):
    #dic={}
    ms=Chem.MolFromSmiles(molecule_smile)
    desc = rdMolDescriptors.GetHashedAtomPairFingerprint(ms)
    #int(desc.GetLength())
    #for x in range(desc.GetLength()):
    #    dic['itens']=desc.__getitem__(x)
    #arr = np.array(desc) 
    for x in range(int(desc.GetLength())):
        atompair.append(desc.__getitem__(x))
Exemplo n.º 4
0
def build_deltaFP(reactions):
    print("Building FPs and writing to CSV..")
    FP_column = np.arange(0, 256).tolist()
    FP_column = ["pfp" + str(item) for item in FP_column]

    PerturbationFingerprints = [
        "Perturbation",
        "Reaction_SMILES",
        "fullmember1",
        "fullmember2",
        "Member_Similarity (Dice)",
    ]
    PerturbationFingerprints = [PerturbationFingerprints + FP_column]
    for reaction_members in reactions:
        pert = str(reaction_members[0])
        # deconstruct reaction smiles back into members:
        head, sep, tail = reaction_members[1].partition(">>")

        # take mol object from each member, retain hydrogens and override valency discrepancies
        member1 = Chem.MolFromSmiles(head, sanitize=False)
        member2 = Chem.MolFromSmiles(tail, sanitize=False)
        member1.UpdatePropertyCache(strict=False)
        member2.UpdatePropertyCache(strict=False)

        # create bitstring of 256 bits for each member.
        FP1 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member1, 256))
        FP2 = (rdMolDescriptors.GetHashedAtomPairFingerprint(member2, 256))
        similarity = DataStructs.DiceSimilarity(FP1, FP2)

        # subtract and return reaction FP (=deltaFP) as list
        deltaFP = np.array(list(FP2)) - np.array(list(FP1))
        #        print("Perturbation FP for " + pert +" (" + str(reaction_members[1]) + ") is:")
        #        print(deltaFP)

        # join all the data together into one list and append to output:
        result = reaction_members + ([str(similarity)]) + deltaFP.tolist()

        PerturbationFingerprints.append(result)


#        print("##########################################################################")
    return PerturbationFingerprints
Exemplo n.º 5
0
    def testHashedAtomPairs(self):
        m = Chem.MolFromSmiles('c1ccccc1')
        fp1 = rdMD.GetHashedAtomPairFingerprint(m, 2048)
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 3)
        self.assertTrue(fp1 == fp2)
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 2)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)

        m = Chem.MolFromSmiles('c1ccccn1')
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)

        m = Chem.MolFromSmiles('c1ccccc1')
        fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048)
        m = Chem.MolFromSmiles('c1ccccn1')
        fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)
Exemplo n.º 6
0
 def featurize(self, x):
     if self.input_type == 'smiles':
         x_ = x
         x = Chem.MolFromSmiles(x)
         if x is None:
             raise ValueError('cannot convert Mol from SMILES %s' % x_)
     if self.input_type == 'any':
         if not isinstance(x, Chem.rdchem.Mol):
             x_ = x
             x = Chem.MolFromSmiles(x)
             if x is None:
                 raise ValueError('cannot convert Mol from SMILES %s' % x_)
     if self.counting:
         return count_fp(rdMol.GetHashedAtomPairFingerprint(x, nBits=self.n_bits), dim=self.n_bits)
     else:
         return list(rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits,
                                                                 nBitsPerEntry=self.bit_per_entry))
Exemplo n.º 7
0
  Parameters:
    probeMol -- the probe molecule
    fpFunction -- the fingerprint function
    predictionFunction -- the prediction function of the ML model
    kwargs -- additional arguments for drawing
  """
  weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction)
  weights, maxWeight = GetStandardizedWeights(weights)
  fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs)
  return fig, maxWeight
  

apDict = {}
apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia)

# usage:   lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry)
def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4):
  """
  Calculates the atom pairs fingerprint with the torsions of atomId removed.

  Parameters:
    mol -- the molecule of interest
    atomId -- the atom to remove the pairs for (if -1, no pair is removed)
    fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
    nBits -- the size of the bit vector (only for fpType='bv')
    minLength -- the minimum path length for an atom pair
    maxLength -- the maxmimum path length for an atom pair
    nBitsPerEntry -- the number of bits available for each pair
Exemplo n.º 8
0
 def calculateMol(self, m, smiles, internalParsing=False):
     return clip_sparse(rd.GetHashedAtomPairFingerprint(m, minLength=self.minPathLen, 
                                                        maxLength=self.maxPathLen, nBits=self.nbits),
                        self.nbits)