示例#1
0
    def compute_tsne(self):
        Database = self.Database2
        smiles = list(Database["SMILES"])
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in smi]
        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]
        distance_matrix = np.subtract(1, similarity_matrix)

        TSNE_sim = TSNE(
            n_components=2,
            init='pca',
            random_state=1992,
            angle=0.3,
            perplexity=self.perplexity).fit_transform(distance_matrix)
        tsne_result = pd.DataFrame(data=TSNE_sim, columns=["PC1", "PC2"])
        tsne_result["LIBRARY"] = list(Database.LIBRARY)
        tsne_result["TIPO"] = list(Database.LIBRARY)
        tsne_result["SMILES"] = list(Database.SMILES)
        tsne_result["NAME"] = list(Database.NAME)
        self.tsne_result = tsne_result.set_index('TIPO')
示例#2
0
    def compute_pca(self):
        Database = self.Database2
        smiles = list(Database.SMILES)
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in smi]
        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]

        sklearn_pca = sklearn.decomposition.PCA(n_components=2,
                                                svd_solver="full",
                                                whiten=True)
        sklearn_pca.fit(similarity_matrix)
        variance = list(sklearn_pca.explained_variance_ratio_)
        a = round(variance[0] * 100, 2)
        b = round(variance[1] * 100, 2)
        pca_result = pd.DataFrame(sklearn_pca.transform(similarity_matrix),
                                  columns=['PC1', 'PC2'])
        pca_result["LIBRARY"] = Database.LIBRARY
        pca_result["TIPO"] = Database.LIBRARY
        pca_result["SMILES"] = Database.SMILES
        pca_result["NAME"] = Database.NAME
        self.pca_result = pca_result.set_index('TIPO')
        variance = list(sklearn_pca.explained_variance_ratio_)
        self.a = round(variance[0] * 100, 2)
        self.b = round(variance[1] * 100, 2)

        return pca_result
示例#3
0
def atom_pairs():
    """ Atom pair fingerprints, atom descriptor
    
    """

    # Generate molecules
    ms = [
        Chem.MolFromSmiles('C1CCC1OCC'),
        Chem.MolFromSmiles('CC(C)OCC'),
        Chem.MolFromSmiles('CCOCC')
    ]
    pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms]

    # Get the list of bits and their counts for each fingerprint as a dictionary
    d = pairFps[-1].GetNonzeroElements()
    print(d)

    # Explanation of the bitscore.
    print(Pairs.ExplainPairScore(558115))

    # Dice similarity; The usual metric for similarity between atom-pair fingerprints
    print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1]))

    # Atom decriptor without count
    pairFps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
    print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1]))
示例#4
0
def fingerprint_smile(smile, fp_type):
    murcko = get_murcko_smile(smile)
    mol = Chem.MolFromSmiles(murcko)
    if fp_type == "atom-pair":
        fps = Pairs.GetAtomPairFingerprintAsBitVect(mol)
    elif fp_type == "maccs":
        fps = MACCSkeys.GenMACCSKeys(mol)
    else:
        fps = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)
    return fps
def fingerprint(mol, fp_type="DL"):
    if fp_type == "DL":
        return FingerprintMols.FingerprintMol(mol)
    elif fp_type == "circular":
        return AllChem.GetMorganFingerprintAsBitVect(mol, 3, nBits=1024)
    elif fp_type == "MACCS":
        return MACCSkeys.GenMACCSKeys(mol)
    elif fp_type == "torsions":
        return Pairs.GetAtomPairFingerprintAsBitVect(mol)
    elif fp_type == "pharm":
        return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
示例#6
0
def atom_fp(Library):
        ms = list()
        sim = list()
        y = list()
        random.seed(43)
        N=round(len(Library)*.2)
        X = random.sample(Library,N)
        ms=[Chem.MolFromSmiles(i) for i in X]
        fps_atom = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
        Atom = [DataStructs.FingerprintSimilarity(y,x) for x,y in it.combinations(fps_atom,2)]
        Atom.sort()
        sim = Atom    
        y= np.arange(1, len(sim) + 1)/ len(sim)
        return sim, y
示例#7
0
 def calculate_atom_pair_fp(molecular_df, col):
     """
     Calculates atom pair fingerprint
     :param molecular_df: pandas data frame containing molecules
     :param col: column with molecules present
     :return:
     """
     fps = []
     for index, row in molecular_df.iterrows():
         try:
             mol = Chem.MolFromSmiles(row[col])
             fp = Pairs.GetAtomPairFingerprintAsBitVect(mol)
             fps.append(fp)
         except:
             fps.append('N/A')
     molecular_df['atom_pair_fp'] = fps
     return molecular_df
示例#8
0
def Fingerprints(mols, fingerprint):

    # Indigo fingerprints
    if fingerprint in indigofps:
        return [mol.fingerprint(fingerprint) for mol in mols]

    # RDKit fingerprints
    if fingerprint in rdkitfps:
        if fingerprint == "atompair":
            return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols]
        elif fingerprint == "avalon":
            return [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
        elif fingerprint == "daylight":
            return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols]
        elif fingerprint == "maccs":
            return [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
        elif fingerprint == "morgan":
            return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
                    for mol in mols]
        elif fingerprint == "pharm2d":
            return [
                Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
                for mol in mols
            ]
        elif fingerprint == "topological":
            return [FingerprintMols.FingerprintMol(mol) for mol in mols]

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps:
        if fingerprint == "sheridan":
            return [Sheridan.GetBPFingerprint(mol) for mol in mols]
        elif fingerprint == "topotorsion":
            return [
                Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols
            ]

    # E-state fingerprints
    if fingerprint in rdkitestatefps:
        if fingerprint == "estate1":
            return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols]
        elif fingerprint == "estate2":
            return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols]

    # unknown fingerprint
    return None
def atom_pairs_fp(SMILES, Library):
    ms = [Chem.MolFromSmiles(i) for i in SMILES]
    fp = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
    sim = [DataStructs.FingerprintSimilarity(y, x) for x, y in it.combinations(fp, 2)]
    sim.sort()
    #     sim = MACCKeys
    y = np.arange(1, len(sim) + 1) / len(sim)  # eje y#estatistical values
    stat = {
        "MIN": [round(min(sim), 2)],
        "1Q": [round(np.percentile(sim, 25))],
        "MEDIAN": [round(st.median(sim))],
        "MEAN": [round(st.mean(sim), 2)],
        "3Q": [round(np.percentile(sim, 75), 2)],
        "MAX": [max(sim)],
        "STD": [round(st.stdev(sim), 2)],
        "Library": [str(Library)],
    }
    df = pd.DataFrame.from_dict(stat)
    fp_result = {"sim": sim, "y": np.arange(1, len(sim) + 1) / len(sim), "df": df}
    return fp_result
示例#10
0
def ATOMPAIRSfpDataFrame(chempandas, namecol, smicol):
    """
    AtomPairs-based fingerprints 2048 bits.
    """
    assert chempandas.shape[0] <= MAXLINES
    molsmitmp = [Chem.MolFromSmiles(x) for x in chempandas.iloc[:, smicol]]
    i = 0
    molsmi = []
    for x in molsmitmp:
        if x is not None:
            x.SetProp("_Name", chempandas.iloc[i, namecol])
            molsmi.append(x)
        i += 1
    # ATOMPAIRS Fingerprints.
    fps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in molsmi]
    fpsmat = np.matrix(fps)
    df = DataFrame(fpsmat, index=[x.GetProp("_Name")
                                  for x in molsmi])  # how to name the col?
    df['SMILES'] = [Chem.MolToSmiles(x) for x in molsmi]
    df['CHEMBL'] = df.index
    return (df)
示例#11
0
def _getFPSStream(f, mols, type='morgan', radius=2, n_bits=2048):
    f.write("#FPS1\n#num_bits=%s\n#software=RDKit/%s\n" %
            (n_bits, rdBase.rdkitVersion))
    for i, mol in enumerate(mols):
        if mol:
            idx = i
            if mol.HasProp('chembl_id'):
                idx = mol.GetProp('chembl_id')
            elif Chem.INCHI_AVAILABLE:
                try:
                    Chem.SanitizeMol(mol)
                    idx = Chem.InchiToInchiKey(Chem.MolToInchi(mol))
                except:
                    pass
            if type == 'morgan':
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    mol, radius, nBits=n_bits)
            elif type == 'pair':
                fp = Pairs.GetAtomPairFingerprintAsBitVect(mol)
            elif type == 'maccs':
                fp = MACCSkeys.GenMACCSKeys(mol)
            f.write("%s\t%s\n" % (DataStructs.BitVectToFPSText(fp), idx))
示例#12
0
def rdk_fingerprint(smi, fp_type="rdkit", size=1024, output="bit"):
    _fingerprinters = {
        "rdkit": Chem.rdmolops.RDKFingerprint,
        "maccs": MACCSkeys.GenMACCSKeys,
        "TopologicalTorsion": Torsions.GetTopologicalTorsionFingerprint,
        "Avalon": pyAvalonTools.GetAvalonFP
    }
    mol = Chem.MolFromSmiles(smi)
    if fp_type in _fingerprinters:
        fingerprinter = _fingerprinters[fp_type]
        fp = fingerprinter(mol)
    elif fp_type == "AtomPair":
        fp = Pairs.GetAtomPairFingerprintAsBitVect(mol, nBits=size)
    elif fp_type == "Morgan":
        fp = GetMorganFingerprintAsBitVect(mol, 2, nBits=size)
    else:
        raise IOError('invalid fingerprint type')
    if output == "bit":
        temp = fp.GetOnBits()
        res = [i for i in temp]
    else:
        res = np.array(fp)
    return res
 def Calc_AtomPairs_Bit(self):
     pairFps_bit = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in self.sd]
     return pairFps_bit
def calculate_similarity_vector(smile_pair):
    """
    Calculate fingerprints between two smile terms using different fingerprinters,
    and use different similarity metrics to calculate the difference between those fingerprints.
    """
    #    smile1, smile2 = smile_pair.split('_')
    smile1, smile2 = smile_pair

    mol1 = Chem.MolFromSmiles(smile1)
    mol2 = Chem.MolFromSmiles(smile2)

    molecule_similarity = list()

    # RDK topological fingerprint for a molecule
    fp1 = Chem.RDKFingerprint(mol1)
    fp2 = Chem.RDKFingerprint(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.KulczynskiSimilarity(fp1,fp2)

    ## LayeredFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.LayeredFingerprint(mol1)
    #fp2 = Chem.LayeredFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    # PatternFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.PatternFingerprint(mol1)
    #fp2 = Chem.PatternFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Topological Fingerprints
    # Uses Chem.RDKFingerprint internally, but with different parameters, I guess...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#topological-fingerprints
    from rdkit.Chem.Fingerprints import FingerprintMols
    fp1 = FingerprintMols.FingerprintMol(mol1)
    fp2 = FingerprintMols.FingerprintMol(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # MACCS Keys
    # There is a SMARTS-based implementation of the 166 public MACCS keys.
    # http://www.rdkit.org/docs/GettingStartedInPython.html#maccs-keys
    from rdkit.Chem import MACCSkeys
    fp1 = MACCSkeys.GenMACCSKeys(mol1)
    fp2 = MACCSkeys.GenMACCSKeys(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Atom Pairs and Topological Torsions
    # Atom-pair descriptors [3] are available in several different forms.
    # The standard form is as fingerprint including counts for each bit instead of just zeros and ones:
    # http://www.rdkit.org/docs/GettingStartedInPython.html#atom-pairs-and-topological-torsions
    from rdkit.Chem.AtomPairs import Pairs
    fp1 = Pairs.GetAtomPairFingerprintAsBitVect(mol1)
    fp2 = Pairs.GetAtomPairFingerprintAsBitVect(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.DiceSimilarity(fp1,fp2)
    from rdkit.Chem.AtomPairs import Torsions
    fp1 = Torsions.GetTopologicalTorsionFingerprint(mol1)
    fp2 = Torsions.GetTopologicalTorsionFingerprint(mol2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Morgan Fingerprints (Circular Fingerprints)
    #This family of fingerprints, better known as circular fingerprints [5],
    #is built by applying the Morgan algorithm to a set of user-supplied atom invariants.
    #When generating Morgan fingerprints, the radius of the fingerprint must also be provided...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#morgan-fingerprints-circular-fingerprints
    from rdkit.Chem import rdMolDescriptors
    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2, useFeatures=True)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2, useFeatures=True)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    return molecule_similarity
示例#15
0
 def atom_pair_fp(self):
     ms = [Chem.MolFromSmiles(i) for i in self.Data.SMILES]
     fp = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms]
     return fp
def atom_pairs(m):
    return Pairs.GetAtomPairFingerprintAsBitVect(m)
示例#17
0
def FptAtomPairs(rdkmol, fptype='bit'):
    if fptype.lower() == 'bit':
        return Pairs.GetAtomPairFingerprintAsBitVect(rdkmol)
    else:
        return Pairs.GetAtomPairFingerprint(rdkmol)