Exemplo n.º 1
0
def lipinski_rule(mol):
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    return [
        Lipinski.NHOHCount(mol) <= 5,
        Lipinski.NOCount(mol) <= 10,
        Descriptors.ExactMolWt(mol) <= 500,
        LogP('logP').run(fingerprint) <= 5]
Exemplo n.º 2
0
def GenerateAtomPairsFingerprints(Mols):
    """Generate AtomPairs fingerprints."""

    MiscUtil.PrintInfo("\nGenerating AtomPairs fingerprints...")

    MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"]
    MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"]
    UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"][
        "UseChirality"]

    if OptionsInfo["GenerateBitVectFingerints"]:
        # Generate ExplicitBitVect fingerprints...
        FPSize = 2048
        BitsPerHash = 4
        MolsFingerprints = [
            rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
                Mol,
                minLength=MinLength,
                maxLength=MaxLength,
                includeChirality=UseChirality,
                nBits=FPSize,
                nBitsPerEntry=BitsPerHash) for Mol in Mols
        ]
    else:
        # Generate IntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetAtomPairFingerprint(
                Mol,
                minLength=MinLength,
                maxLength=MaxLength,
                includeChirality=UseChirality) for Mol in Mols
        ]

    return MolsFingerprints
Exemplo n.º 3
0
 def __init__(self, fp_type, fp_bits=2048):
     """
     :param fp_type: fingerprint type
     :param fp_bits: number of fingerprint bits
     """
     self.fp_type = fp_type
     self.fp_dict = {}
     self.fp_dict['morgan2'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 2, nBits=fp_bits),
         fp_bits
     ]
     self.fp_dict['morgan3'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 3, nBits=fp_bits),
         fp_bits
     ]
     self.fp_dict['ap'] = [
         lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect(
             m, nBits=fp_bits), fp_bits
     ]
     self.fp_dict['rdk5'] = [
         lambda m: Chem.RDKFingerprint(
             m, maxPath=5, fpSize=fp_bits, nBitsPerHash=2), fp_bits
     ]
     if self.fp_dict.get(fp_type):
         self.fp_function = self.fp_dict[fp_type]
     else:
         print("invalid fingerprint type: %s" % fp_type)
         sys.exit(0)
Exemplo n.º 4
0
def pka_similarities(smile, mol_set, n):
    mol = Chem.MolFromSmiles(smile)
    mol_fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    similarity = []
    for molecule in mol_set:
        sim = DataStructs.DiceSimilarity(mol_fp, molecule[2])
        similarity.append([sim, molecule[1]])

    return np.asarray(sorted(similarity)[:n]).flatten()
def get_atompairs(molecule, length=512):
    try:
        atompairs = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
            molecule, nBits=length)
    except Exception as e:
        print(e)
        print("error" + str(molecule))
        atompairs = np.nan
    return atompairs
Exemplo n.º 6
0
    def testHashedAtomPairs(self):
        m = Chem.MolFromSmiles('c1ccccc1')
        fp1 = rdMD.GetHashedAtomPairFingerprint(m, 2048)
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 3)
        self.assertTrue(fp1 == fp2)
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048, 1, 2)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)

        m = Chem.MolFromSmiles('c1ccccn1')
        fp2 = rdMD.GetHashedAtomPairFingerprint(m, 2048)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)

        m = Chem.MolFromSmiles('c1ccccc1')
        fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048)
        m = Chem.MolFromSmiles('c1ccccn1')
        fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m, 2048)
        sim = DataStructs.DiceSimilarity(fp1, fp2)
        self.assertTrue(sim > 0.0 and sim < 1.0)
Exemplo n.º 7
0
 def featurize(self, x):
     if self.input_type == 'smiles':
         x_ = x
         x = Chem.MolFromSmiles(x)
         if x is None:
             raise ValueError('can not convert Mol from SMILES %s' % x_)
     if self.input_type == 'any':
         if not isinstance(x, Chem.rdchem.Mol):
             x_ = x
             x = Chem.MolFromSmiles(x)
             if x is None:
                 raise ValueError('can not convert Mol from SMILES %s' % x_)
     return list(
         rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits))
Exemplo n.º 8
0
    def _encode(smi: str, fingerprint: str, radius: int,
                length: int) -> T_comp:
        """fingerprint functions must be wrapped in a static function
        so that they may be pickled for parallel processing
        
        Parameters
        ----------
        smi : str
            the SMILES string of the molecule to encode
        fingerprint : str
            the the type of fingerprint to generate
        radius : int
            the radius of the fingerprint
        length : int
            the length of the fingerprint
        
        Returns
        -------
        T_comp
            the compressed feature representation of the molecule
        """
        mol = Chem.MolFromSmiles(smi)
        if fingerprint == 'morgan':
            return rdmd.GetMorganFingerprintAsBitVect(mol,
                                                      radius=radius,
                                                      nBits=length,
                                                      useChirality=True)

        if fingerprint == 'pair':
            return rdmd.GetHashedAtomPairFingerprintAsBitVect(mol,
                                                              minLength=1,
                                                              maxLength=1 +
                                                              radius,
                                                              nBits=length)

        if fingerprint == 'rdkit':
            return rdmd.RDKFingerprint(mol,
                                       minPath=1,
                                       maxPath=1 + radius,
                                       fpSize=length)

        if fingerprint == 'maccs':
            return rdmd.GetMACCSKeysFingerprint(mol)

        if fingerprint == 'map4':
            return map4.MAP4Calculator(dimensions=length,
                                       radius=radius,
                                       is_folded=True).calculate(mol)

        raise NotImplementedError(f'Unrecognized fingerprint: "{fingerprint}"')
Exemplo n.º 9
0
    def predict(self, mol, selected_descriptors):
        options = [0, 0, 0, 0, 0]
        return_properties = {}

        for option in selected_descriptors:
            if option == 'logP':
                options[0] = 1
            elif option == 'sol':
                options[0] = 1
                options[1] = 1
            elif option == 'mp':
                options[0] = 1
                options[1] = 1
                options[2] = 1
            elif option == 'pka':
                options[3] = 1
            elif option == 'mol_wt':
                options[4] = 1

        fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

        if options[0]:
            logP = self.logP_model.run(fp)
            return_properties['logP'] = logP

        if options[1]:
            logP_sol = self.logP_solubility_model.run(logP)
            atom_pair_sol = self.atom_pair_sol_model.run(fp)
            combined_sol = self.combined_model.run(mol, logP,
                                                   logP_sol, atom_pair_sol)
            mg_ml_sol = logs_to_mg_ml(combined_sol, mol)
            return_properties['sol'] = mg_ml_sol

        if options[2]:
            mp = self.melting_point_model.run(combined_sol, logP)
            return_properties['mp'] = mp

        if options[3]:
            avalon = GetAvalonFP(mol)
            maacs = MACCSkeys.GenMACCSKeys(mol)
            pka = self.pKa_model.run(avalon + maacs + fp)
            return_properties['pka'] = pka

        if options[4]:
            wt = rdMolDescriptors.CalcExactMolWt(mol)
            return_properties['mol_wt'] = wt

        return return_properties
Exemplo n.º 10
0
 def featurize(self, x):
     if self.input_type == 'smiles':
         x_ = x
         x = Chem.MolFromSmiles(x)
         if x is None:
             raise ValueError('cannot convert Mol from SMILES %s' % x_)
     if self.input_type == 'any':
         if not isinstance(x, Chem.rdchem.Mol):
             x_ = x
             x = Chem.MolFromSmiles(x)
             if x is None:
                 raise ValueError('cannot convert Mol from SMILES %s' % x_)
     if self.counting:
         return count_fp(rdMol.GetHashedAtomPairFingerprint(x, nBits=self.n_bits), dim=self.n_bits)
     else:
         return list(rdMol.GetHashedAtomPairFingerprintAsBitVect(x, nBits=self.n_bits,
                                                                 nBitsPerEntry=self.bit_per_entry))
Exemplo n.º 11
0
  def testAtomPairOptions(self):
    m1 = Chem.MolFromSmiles('c1ccccc1')
    m2 = Chem.MolFromSmiles('c1ccccn1')

    fp1 = rdMD.GetAtomPairFingerprint(m1)
    fp2 = rdMD.GetAtomPairFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)
Exemplo n.º 12
0
def GenerateAtomPairsFingerprints(Mols):
    """Generate AtomPairs fingerprints."""

    MiscUtil.PrintInfo("\nGenerating AtomPairs %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"])
    
    MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"]
    MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"]
    UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"]["UseChirality"]
    FPSize = OptionsInfo["FingerprintsParams"]["AtomPairs"]["FPSize"]
    BitsPerHash = OptionsInfo["FingerprintsParams"]["AtomPairs"]["BitsPerHash"]

    if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I):
        # Generate ExplicitBitVect fingerprints...
        MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash))
        MolsFingerprints = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols]
    else:
        # Generate IntSparseIntVect fingerprints...
        MolsFingerprints = [rdMolDescriptors.GetAtomPairFingerprint(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality) for Mol in Mols]

    return MolsFingerprints
Exemplo n.º 13
0
 def __init__(self, fp_type_list, num_fp_bits=1024):
     self.num_fp_bits = num_fp_bits
     self.fp_function_list = []
     self.fp_type_list = fp_type_list
     self.fp_dict = {}
     self.des_names = [name[0] for name in Descriptors._descList]
     des_calculator = MoleculeDescriptors.MolecularDescriptorCalculator(
         self.des_names)
     self.fp_dict['descriptors'] = [
         lambda m: des_calculator.CalcDescriptors(m), -1
     ]
     self.fp_dict['morgan2'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(
             m, 2, nBits=self.num_fp_bits), self.num_fp_bits
     ]
     self.fp_dict['morgan3'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(
             m, 3, nBits=self.num_fp_bits), self.num_fp_bits
     ]
     self.fp_dict['ap'] = [
         lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect(
             m, nBits=self.num_fp_bits), self.num_fp_bits
     ]
     self.fp_dict['rdk5'] = [
         lambda m: Chem.RDKFingerprint(
             m, maxPath=5, fpSize=self.num_fp_bits, nBitsPerHash=2),
         self.num_fp_bits
     ]
     self.fp_names = []
     for fp_type in fp_type_list:
         if self.fp_dict.get(fp_type):
             self.fp_function_list.append(self.fp_dict[fp_type])
             if fp_type == "descriptors":
                 self.fp_names += self.des_names
             else:
                 self.fp_names += self.get_names(self.num_fp_bits)
         else:
             print("invalid fingerprint type: %s" % fp_type)
             sys.exit(1)
Exemplo n.º 14
0
from chemical_models import AtomPairSolubility, LogP, LogPSolubility

data = open('data/water_solubility/aqsol.txt', 'r')

logP_model = LogP('logP')
logP_solubility_model = LogPSolubility('logS_logP')
atom_pair_sol_model = AtomPairSolubility('water_solubility')

X = []
Y = []

for line in data.readlines():
    split = line.split(' ')

    mol = Chem.MolFromSmiles(split[0])
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

    logP = logP_model.run(fingerprint)
    logP_sol = logP_solubility_model.run(logP)
    atom_pair_sol = atom_pair_sol_model.run(fingerprint)

    # Additional ESOL empirical model to increase accuracy
    mw = Descriptors.ExactMolWt(mol)
    rb = rdMolDescriptors.CalcNumRotatableBonds(mol)
    ap = len(mol.GetSubstructMatches(
        Chem.MolFromSmarts('[a]'))) / mol.GetNumHeavyAtoms()
    esol = 0.16 - 0.63 * logP - 0.0062 * mw + 0.066 * rb - 0.74 * ap

    X.append([logP_sol, atom_pair_sol, esol])
    Y.append(float(split[1][:-1]))
Exemplo n.º 15
0
# Usage: python3 run_model.py model_name.pkl scaler_name.pkl SMILE
import pickle, sys
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors

model = pickle.load(open(sys.argv[1], 'rb'))
scaler = pickle.load(open(sys.argv[2], 'rb'))

compound = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    Chem.MolFromSmiles(sys.argv[3]))
print(model.predict(scaler.transform(np.asarray(compound).reshape(1, -1))))
Exemplo n.º 16
0
# Simple benzodiazepine classfication model based on similarity
#   to other benzodiazepines
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit import DataStructs
from sklearn.model_selection import train_test_split

data = open('data/benzodiazepine_activator/total_smiles.txt', 'r')

molecules = []

for line in data.readlines():
    compound = Chem.MolFromSmiles(line[:-1])
    molecules.append(
        (rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(compound),
         compound))


def model(mol, active_mols):
    similarities = []
    for active_mol in active_mols:
        similarities.append(DataStructs.DiceSimilarity(mol[0], active_mol[0]))

    return max(similarities)


train_mols, test_mols = train_test_split(molecules,
                                         test_size=0.1,
                                         random_state=1)

for test_mol in test_mols:
Exemplo n.º 17
0
fpdict['ecfp4'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 2, nBits=nbits)
fpdict['ecfp6'] = AllChem.GetMorganFingerprintAsBitVect(smiles, 3, nBits=nbits)
fpdict['fcfp2'] = AllChem.GetMorganFingerprintAsBitVect(smiles,
                                                        1,
                                                        useFeatures=True,
                                                        nBits=nbits)
fpdict['fcfp4'] = AllChem.GetMorganFingerprintAsBitVect(smiles,
                                                        2,
                                                        useFeatures=True,
                                                        nBits=nbits)
fpdict['fcfp6'] = AllChem.GetMorganFingerprintAsBitVect(smiles,
                                                        3,
                                                        useFeatures=True,
                                                        nBits=nbits)
fpdict['maccs'] = MACCSkeys.GenMACCSKeys(smiles)
fpdict['ap'] = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    smiles, nBits=nbits)
fpdict[
    'tt'] = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
        smiles, nBits=nbits)
fpdict['rdk5'] = Chem.RDKFingerprint(smiles,
                                     maxPath=5,
                                     fpSize=nbits,
                                     nBitsPerHash=2)
fpdict['rdk6'] = Chem.RDKFingerprint(smiles,
                                     maxPath=6,
                                     fpSize=nbits,
                                     nBitsPerHash=2)
fpdict['rdk7'] = Chem.RDKFingerprint(smiles,
                                     maxPath=7,
                                     fpSize=nbits,
                                     nBitsPerHash=2)
Exemplo n.º 18
0
# Creates a png of a list of SMILES
# Sorts the molecules by taking first molecule and then arranging the rest
#   from most to least similar to that molecule
from rdkit.Chem import Draw
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import rdMolDescriptors

data = open('data/benzodiazepine_activator/total_smiles.txt', 'r')

molecules = []

for line in data.readlines():
    mol = Chem.MolFromSmiles(line[:-1])
    combined = (rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol),
                mol)
    molecules.append(combined)

sorted_mols = sorted(
    molecules[1:],
    key=lambda x: DataStructs.DiceSimilarity(molecules[0][0], x[0]))

img = Draw.MolsToGridImage([x[1] for x in sorted_mols],
                           molsPerRow=10,
                           subImgSize=(200, 200))
img.save('molecules.png')
import rdkit
from rdkit.Chem import AllChem
from rdkit.Chem import MACCSkeys
from rdkit.Chem import rdMolDescriptors
import sys
smi = Chem.SmilesMolSupplier(sys.argv[1], delimiter=',', titleLine=True)
fps = [
    AllChem.GetMorganFingerprintAsBitVect(x, 2, useBondTypes=False, nBits=1024)
    for x in smi
]  ### ECFP4
fps2 = [
    AllChem.GetMorganFingerprintAsBitVect(x, 1, useBondTypes=False, nBits=1024)
    for x in smi
]  ### ECFP2
maccs = [MACCSkeys.GenMACCSKeys(x) for x in smi]
dl = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(x) for x in smi]
print("D1,D2,ECFP4,ECFP2,MACCS,DL,AVG,Weighted")
seen = []
for i in range(len(fps)):
    d1 = smi[i].GetProp('_Name')
    for ii in range(len(fps)):
        d2 = smi[ii].GetProp('_Name')
        dist = DataStructs.FingerprintSimilarity(fps[i], fps[ii])
        dist2 = DataStructs.FingerprintSimilarity(fps2[i], fps2[ii])
        distMACCS = DataStructs.FingerprintSimilarity(maccs[i], maccs[ii])
        distDL = DataStructs.FingerprintSimilarity(dl[i], dl[ii])
        weightedavg = dist * .3 + dist2 * .3 + distDL * .3 + distMACCS * .1
        avg = (dist + dist2 + distDL + distMACCS) / 4
        print(
            str(d1) + "," + str(d2) + "," + str(dist) + "," + str(dist2) +
            "," + str(distMACCS) + "," + str(distDL) + "," + str(avg) + "," +
Exemplo n.º 20
0
# dictionary
fpFunc_dict = {}
fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits)
fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits)
fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits)
fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits)
fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits)
fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits)
fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits)
fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits)
fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits)
fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m)
fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits)
fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits)
fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['tpatf'] = lambda m: get_tpatf(m)
fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m)

long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'}
fps_to_generate = ['fcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashap', 'avalon', 'laval', 'rdk7']

ModFileName_LoadedModel_dict = {}

Exemplo n.º 21
0
  Parameters:
    probeMol -- the probe molecule
    fpFunction -- the fingerprint function
    predictionFunction -- the prediction function of the ML model
    kwargs -- additional arguments for drawing
  """
  weights = GetAtomicWeightsForModel(probeMol, fpFunction, predictionFunction)
  weights, maxWeight = GetStandardizedWeights(weights)
  fig = GetSimilarityMapFromWeights(probeMol, weights, **kwargs)
  return fig, maxWeight
  

apDict = {}
apDict['normal'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetAtomPairFingerprint(m, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['hashed'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprint(m, nBits=bits, minLength=minl, maxLength=maxl, ignoreAtoms=ia)
apDict['bv'] = lambda m, bits, minl, maxl, bpe, ia: rdMD.GetHashedAtomPairFingerprintAsBitVect(m, nBits=bits, minLength=minl, maxLength=maxl, nBitsPerEntry=bpe, ignoreAtoms=ia)

# usage:   lambda m,i: GetAPFingerprint(m, i, fpType, nBits, minLength, maxLength, nBitsPerEntry)
def GetAPFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, minLength=1, maxLength=30, nBitsPerEntry=4):
  """
  Calculates the atom pairs fingerprint with the torsions of atomId removed.

  Parameters:
    mol -- the molecule of interest
    atomId -- the atom to remove the pairs for (if -1, no pair is removed)
    fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
    nBits -- the size of the bit vector (only for fpType='bv')
    minLength -- the minimum path length for an atom pair
    maxLength -- the maxmimum path length for an atom pair
    nBitsPerEntry -- the number of bits available for each pair
  """
fpdict["lecfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 3, nBits=longbits
)
fpdict["lfcfp4"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 2, useFeatures=True, nBits=longbits
)
fpdict["lfcfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 3, useFeatures=True, nBits=longbits
)
fpdict["maccs"] = lambda m: MACCSkeys.GenMACCSKeys(m)
fpdict["ap"] = lambda m: Pairs.GetAtomPairFingerprint(m)
fpdict["tt"] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
fpdict[
    "hashap"
] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=nbits
)
fpdict[
    "hashap_cas_length"
] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=n_cas_bits
)
fpdict[
    "hashtt"
] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
    m, nBits=nbits
)
fpdict[
    "hashtt_cas_length"
] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
    m, nBits=n_cas_bits
Exemplo n.º 23
0
"""
Predict the pKa of an acid from SMILES string
Returns both the fingerprint model prediction and similarity model predictions
"""
from chemical_models import AcidSimilarity, AcidpKa
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors, MACCSkeys
from rdkit.Avalon.pyAvalonTools import GetAvalonFP
import sys

# Load models
sim_model = AcidSimilarity('acid_sim')
fp_model = AcidpKa('pKa_acid')

# Set of acids required for similarity model
acid_data = open('data/pKa/formatted_acidic.txt', 'r')
acids = []

mol = Chem.MolFromSmiles(sys.argv[1])

# Read acids from file
for line in acid_data.readlines():
    split = line.split(' ')
    acids.append([split[0], float(split[1][:-1]),
                  rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Chem.MolFromSmiles(split[0]))])

# Run the models and print results
print("Similarity based model: " + str(sim_model.run(sys.argv[1], acids)))
print("Fingerprint based model: " + str(fp_model.run(GetAvalonFP(mol) + MACCSkeys.GenMACCSKeys(mol) +
                                                     rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol))))
Exemplo n.º 24
0
 def pair_fingerprinter(mol):
     fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
         mol, nBits=fpSize, minLength=minLength, maxLength=maxLength)
     return _fp_to_bytes(fp)
Exemplo n.º 25
0
import numpy, cPickle
from rdkit import Chem, DataStructs
from rdkit.Chem import rdMolDescriptors as rdmd

# global variables
num_act = 1528
num_dcy = 293606

num_rep = 50
num_percent = 0.1

# fingerprint dictionary
fp_dict = {}
fp_dict['morgan2'] = lambda m: rdmd.GetMorganFingerprintAsBitVect(
    m, 2, nBits=1024)
fp_dict['ap'] = lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=2048)
fp_dict['rdk5'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=5, fpSize=2048, nBitsPerHash=2)


def getNumpyFP(smiles, fpname, fptype):
    m = Chem.MolFromSmiles(smiles)
    if m is not None:
        # calculate fingerprint
        fp = fp_dict[fpname](m)
        # convert to numpy array
        if fptype == 'bool':
            arr = numpy.zeros((1, ), numpy.bool)
        elif fptype == 'float':
            arr = numpy.zeros((1, ), numpy.float32)
        else: