Пример #1
0
def do_images(xs, molecules, conditions, samples, images_per_condition,
              output_dir):
    os.makedirs(os.path.join(output_dir, 'valid'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'accurate'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'invalid'), exist_ok=True)
    for c in range(conditions):
        image_mols = random.sample(
            list(
                zip(xs[c * samples:(c + 1) * samples],
                    molecules[c * samples:(c + 1) * samples])),
            images_per_condition)
        for i, (x, a) in enumerate(image_mols):
            m = get_mol(x, a[0])
            try:
                err = Chem.rdmolops.SanitizeMol(m, catchErrors=True)
                if err == 0 and is_valid(x, a[0]):
                    if get_is_connected(x, a[0]):
                        Draw.MolToImageFile(
                            m,
                            os.path.join(output_dir, 'accurate',
                                         'c_{}_all_{}.png'.format(c, i)))
                    else:
                        Draw.MolToImageFile(
                            m,
                            os.path.join(output_dir, 'valid',
                                         'c_{}_all_{}.png'.format(c, i)))
                else:
                    Draw.MolToImageFile(
                        m,
                        os.path.join(output_dir,
                                     'c_{}_all_{}.png'.format(c, i)))
            except ValueError:
                continue
def converter(file_name,save_name):

    mols = [ mol for mol in Chem.SDMolSupplier( file_name ) ]

    outname = save_name + ".smi"

    out_file = open( outname, "w" )

    for  mol in mols:

        smi = Chem.MolToSmiles(mol)
        #print(smi)

        name = mol.GetProp("_Name")

        out_file.write( "{}\t{}\n".format(smi, name ))

        m = Chem.MolFromSmiles(smi)

        m_qed = Chem.QED.qed(m)
        
        m_LogP = round(Descriptors.MolLogP(mol), 4)

        print(file_name,end = "  ")
        
        print("->",m_qed,m_LogP) #Chem.QED.properties(m)

        Draw.MolToImageFile(m,save_name+".png",size=(300, 300))

        m = Chem.AddHs(m)

        AllChem.EmbedMolecule( m,randomSeed=3 )

        try :
            #AllChem.MMFFOptimizeMolecule(m)

            #Chem.MolToMolFile(m,file_name+".mol")

            #out_file.close()

            return smi,m_qed,m_LogP
            
        except ValueError:
            
            print("Rdkit not opt mol")
            return 0
Пример #3
0
import os
from rdkit import Chem
from rdkit.Chem import Draw


# get a path
def GetPath(file):
    path = sys.path[0]
    path = os.path.normpath(path)
    return os.path.join(path, file)


SMILES1 = "O"
SMILES2 = "CCO"
SMILES3 = "O=C=O"
SMILES4 = "C#N"
SMILES5 = "C1CCCCC1"
SMILES6 = "CC"
SMILES7 = "C=C"
SMILES8 = "C#C"
SMILES9 = "CC(=O)OCC"
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES1), GetPath("水.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES2), GetPath("乙醇.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES3), GetPath("二氧化碳.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES4), GetPath("氰化氢.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES5), GetPath("环已烷.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES6), GetPath("乙烷.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES7), GetPath("乙烯.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES8), GetPath("乙炔.jpg"))
Draw.MolToImageFile(Chem.MolFromSmiles(SMILES9), GetPath("乙酸乙酯.jpg"))
Пример #4
0
from rdkit import Chem
from rdkit.Chem import Draw
smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(m,"mol.jpg")
Пример #5
0
def plot_mol_matrix():
    import cairosvg
    import seaborn as sns
    import matplotlib.pyplot as plt
    smiles = 'CN(C)C(=N)NC(=N)N'  #'CC(C)NC1=CC=CO1'  #'CC1=C(SC(=C1)C(=O)NCC2=NOC=C2)Br'
    bond, atoms = smiles_to_adj(smiles, 'qm9')
    bond = bond[0]
    atoms = atoms[0]

    # def save_mol_png(mol, filepath, size=(100, 100)):
    #     Draw.MolToFile(mol, filepath, size=size)

    Draw.MolToImageFile(Chem.MolFromSmiles(smiles), 'mol.pdf')
    # save_mol_png(Chem.MolFromSmiles(smiles), 'mol.png')
    svg = Draw.MolsToGridImage([Chem.MolFromSmiles(smiles)],
                               legends=[],
                               molsPerRow=1,
                               subImgSize=(250, 250),
                               useSVG=True)
    # highlightAtoms=vhighlight)  # , useSVG=True

    cairosvg.svg2pdf(bytestring=svg.encode('utf-8'), write_to="mol.pdf")
    cairosvg.svg2png(bytestring=svg.encode('utf-8'), write_to="mol.png")

    # sns.set()
    # ax = sns.heatmap(1-atoms)
    # with sns.axes_style("white"):
    fig, ax = plt.subplots(figsize=(2, 3.4))
    # sns.palplot(sns.diverging_palette(240, 10, n=9))
    ax = sns.heatmap(atoms,
                     linewidths=.5,
                     ax=ax,
                     annot_kws={"size": 18},
                     cbar=False,
                     xticklabels=False,
                     yticklabels=False,
                     square=True,
                     cmap="vlag",
                     vmin=-1,
                     vmax=1,
                     linecolor='black')
    # ,cmap=sns.diverging_palette(240, 10, n=9)) #"YlGnBu"  , square=True

    plt.show()
    fig.savefig('atom.pdf')
    fig.savefig('atom.png')

    for i, x in enumerate(bond):
        fig, ax = plt.subplots(figsize=(5, 5))
        # sns.palplot(sns.diverging_palette(240, 10, n=9))
        ax = sns.heatmap(x,
                         linewidths=.5,
                         ax=ax,
                         annot_kws={"size": 18},
                         cbar=False,
                         xticklabels=False,
                         yticklabels=False,
                         square=True,
                         cmap="vlag",
                         vmin=-1,
                         vmax=1,
                         linecolor='black')
        # ,cmap=sns.diverging_palette(240, 10, n=9)) #"YlGnBu"  , square=True

        plt.show()
        fig.savefig('bond{}.pdf'.format(i))
        fig.savefig('bond{}.png'.format(i))
Пример #6
0
img = Draw.ReactionToImage(
    rxn
)
img.save(
    '/drug_development/studyRdkit/st_rdcit/img/mol31.jpg'
)
# 反应模板如下图所示:

# 从反应模板中,我们看到主要的变化是Cl变成羰基氧,N上多了一个甲基
# >注:这是一个逆反应模板
# 反应物如下图所示 :
mol = Chem.MolFromSmiles(
    'CC(C)(Nc1nc(Cl)c(-c2ccc(F)cc2)c(-c2ccncc2)n1)c1ccccc1')
Draw.MolToImageFile(
    mol,
    "/drug_development/studyRdkit/st_rdcit/img/mol32.jpg",
    size=(350, 300),
    legend='CC(C)(Nc1nc(Cl)c(-c2ccc(F)cc2)c(-c2ccncc2)n1)c1ccccc1'
)
# .创建具体反应规则的引擎对象rxn = AllChem.ReactionFromSmarts(tem)
# .输入反应物,借助引擎产生反应rxn.RunReactants([productmol])


def getrxns(rxn, product_smi):
    """
    获取反应规则的引擎对象
    product_smi 反应物
    """
    product_mol = Chem.MolFromSmiles(product_smi)
    reactions = rxn.RunReactants([product_mol])
    rxns = []
    for reaction in reactions:
Пример #7
0
                   width,
                   bottom=bottom,
                   color=col,
                   antialiased=True)
            bottom += sort[length - i][1]
            if col != 'black':
                c_index += 0.3
        bottoms.append(bottom)
    except:
        raise
ax.set_xticks(keys)
ax.set_xticklabels(keys)

plt.ylabel('Absolute substructure frequency')
plt.xlabel('Fingerprint bit')
text_yoffset = 5
for i in keys:
    text_xoffset = -0.125 if uniques[i] < 100 else -0.185
    plt.text(i + text_xoffset,
             bottoms[i] + text_yoffset,
             uniques[i],
             weight='bold')

plt.savefig('substructures.png', dpi=400)

# RDKit throws errors, don't know why
for struct_ix in range(len(draw_structs)):
    Draw.MolToImageFile(Chem.MolFromSmarts(draw_structs[struct_ix]),
                        f'substruct_{struct_ix}.png',
                        kekulize=False)
Пример #8
0
#! /usr/bin/python
# coding: utf-8
# @Time: 2020-05-29 14:36:04
# @Author: zeoy
# rdkit 修改分子

# 一、引入所需库
from rdkit import Chem
from rdkit.Chem import Draw


# 二、增删H原子
mol = Chem.MolFromSmiles('OC1C2C1CC2')
# 画分子结构
Draw.MolToImageFile(
    mol,
    '/drug_development/studyRdkit/st_rdcit/img/mol5.jpg'
)

# 2.1 增加H原子函数解析

# 将氢添加到分子图上
rdkit.Chem.rdmolops.AddHs(
    (Mol)mol   # 要修饰的分子
    [, (bool) explicitOnly=False  # (可选)如果设置了此切换,则仅将显式Hs添加到分子中。默认值为0(添加隐式和显式Hs)。
     [, (bool) addCoords=False  # (可选) 如果设置了此开关,则Hs将设置3D坐标。默认值为0(无3D坐标)。
      [, (AtomPairsParameters) onlyOnAtoms=None  # (可选)如果提供了此序列,则仅将这些原子视为具有添加的Hs
       [, (bool)addResidueInfo=False  # (可选)如果为true,则将残基信息添加到氢原子(对PDB文件有用)。
        ]]]]
)

# 2.2 增加H原子
Пример #9
0
from optimizer import get_mol
import torch
from rdkit import Chem
from rdkit.Chem import Draw

x = torch.FloatTensor([[1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])
a = torch.LongTensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 3],
        [0, 0, 3, 1]])
m = get_mol(x, a)

Chem.rdmolops.SanitizeMol(m)

s = Chem.MolToSmiles(m)
print(s)
m = Chem.MolFromSmiles(s)

for a in m.GetAtoms():
    print(a.GetSymbol(), a.GetExplicitValence())

for b in m.GetBonds():
    print(b)
Draw.MolToImageFile(m, 'images/kill_me.png')
Пример #10
0
# coding=utf-8
import io

from PIL import Image
from rdkit import Chem
from rdkit.Chem import Draw

smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(m, 'mol.jpg')

import base64
from io import BytesIO

# img_buffer = BytesIO()
# a.save(img_buffer, format='JPEG')
# byte_data = img_buffer.getvalue()
# base64_str = base64.b64encode(byte_data)
# print(base64_str)
# 图片转字节
with open('mol.jpg', 'rb') as fp:
    tu = base64.b64encode(fp.read())
    print(tu)
Пример #11
0
img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=(
    200, 200), legends=['' for x in mols])
img.save('/drug_development/studyRdkit/st_rdcit/img/mol13.jpg')

# > 注:ReplaceSubstructs()替换操作返回的是分子对象操作列表,如果分子只有一个地方匹配到,则返回一个分子的列表。
# 如果分子中有2个地方匹配到,则返回2个分子的列表。为了标准化smiles,可以将得到的分子mol化->smiles->mol,然后在进行可视化

# 2.3 SAR分析-core可视化
# Chem.ReplaceSidechains(m1,core) : 我们需要定义分子对象,骨架分子; 然后执行ReplaceSidechains函数,删除侧链就能得到骨架可视化。
# 定义嘧啶为核心结构,对其骨架进行可视化
m1 = Chem.MolFromSmiles('BrCCc1cncnc1C(=O)O')
core = Chem.MolFromSmiles('c1cncnc1')
tmp = Chem.ReplaceSidechains(m1, core)
Chem.MolToSmiles(tmp)

Draw.MolToImageFile(
    tmp, '/drug_development/studyRdkit/st_rdcit/img/mol14.jpg')

# 2.4 SAR分析-sidechain可视化
m1 = Chem.MolFromSmiles('BrCCc1cncnc1C(=O)O')
core = Chem.MolFromSmiles('c1cncnc1')
tmp = Chem.ReplaceCore(m1, core)
Draw.MolToImageFile(
    tmp, '/drug_development/studyRdkit/st_rdcit/img/mol15.jpg')

# >注:侧链的编号默认是从1开始的,这取决于算法找到侧链的先后顺序。
# 也可以根据侧链连接到骨架上的原子进行编号tmp=CHem.ReqlaceCore(m1, core)
tmp = Chem.ReplaceCore(m1, core, labelByIndex=True)
Draw.MolToImageFile(
    tmp, '/drug_development/studyRdkit/st_rdcit/img/mol16.jpg')

# 2.5 拆分手段
Пример #12
0
# 三、SMARTS 支持的扩展

# # 3.1 杂化方式查询
# 杂化方式在SMARTS 中通过^符号进行定义。 如:

# 1.^0 匹配S 杂化的原子
# 2.^1 匹配SP 杂化的原子
# 3.^2 匹配SP2 杂化的原子
# 4.^3 匹配SP3 杂化的原子
# 5.^4 匹配SP3D 杂化的原子
# 6.^5 匹配SP3D2 杂化的原子

aspirin = Chem.MolFromSmiles('CC(=O)OC1=CC=CC=C1C(=O)O')
Draw.MolToImageFile(
    aspirin,
    '/drug_development/studyRdkit/st_rdcit/img/mol52.jpg',
    legend='aspirin'
)
# 阿司匹林
# sp2杂化的原子
sp2_atoms = aspirin.GetSubstructMatches(Chem.MolFromSmarts('[^2]'))
# sp3杂化的原子
sp3_atoms = aspirin.GetSubstructMatches(Chem.MolFromSmarts('[^3]'))
print('sp2 atoms', sp2_atoms)
# sp2 atoms ((1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,), (10,), (11,), (12,))
print('sp3 atoms', sp3_atoms)  # sp3 atoms ((0,),)

# 对于分子阿司匹林,只有0号原子是sp3杂化,其他原子都是sp2杂化
# >注:苯酚中的氧都是sp2杂化,所以羟基氧才具有更强的酸性。COO中的两个氧都是sp2杂化 羧基中也有类似苯环的共轭体系,并且羧酸中羟基氢酸性更强,共轭更明显,更应该是sp2。 醇羟基中的氧是sp3杂化。
# # 3.2 配位键
# rdkit的SMARTS通过 -> 和 < -符号表示配位键 , 箭头的方向代表电子转译的方向
Пример #13
0
# rdkit smiles支持和扩展

# 一、引入所需库
import os

from rdkit import Chem

from rdkit.Chem import Draw
# rdkit涵盖了Daylight SMILES所有的标准功能以及一些有用的扩展,下面是扩展的部分内容

# 二、芳香性
# 和氧同族的Te(碲 , 拼音 : dì , 原子序数52 , 是银白色的类金属 ) 元素也可能具有芳香性 , 当其连接2个芳香原子时 , 它贡献2个pi电子 。
m = Chem.MolFromSmiles('OC(=O)c1[te]ccc1')
Draw.MolToImageFile(
    m,
    '/drug_development/studyRdkit/st_rdcit/img/mol48.jpg',
    legend='tellurophene-2-carboxylic acid'
)
# 碲吩-2甲酸分子
# Te原子的编号是4,下面检查其芳香性
aromatic_atom4 = m.GetAtomWithIdx(4).GetIsAromatic()
print('atom4 is aromatic', aromatic_atom4)  # atom4 is aromatic True
# 三 配位键
rdkit通过 -> 和 < -来支持配位键表示 , 箭头的方向非常重要 , 代表了谁提供电子
配位键不会影响起始原子的价态 , 只会影响指向原子的价态
cu_mol = Chem.MolFromSmiles('[Cu](Cl)Cl')
bipy = Chem.MolFromSmiles('C1(C2=NC=CC=C2)=CC=CC=N1')
bipycu = Chem.MolFromSmiles('c1cccn->2c1-c1n->3cccc1.[Cu]23(Cl)Cl')
mols = [cu_mol, bipy, bipycu]
img = Draw.MolsToGridImage(
    mols,
Пример #14
0
def draw_glycine():
    Draw.MolToImageFile(Chem.MolFromFASTA('G'), 'glycine.png')
Пример #15
0
            # 如果两个环上有公用的原子
            if nInCommon and (includeSpiro or nInCommon > 1):
                # 公用的原子,说明两个环是并在一起的
                # 将两个环的原子去重合并
                ringAts = ringAts.union(system)
                print(ringAts)
            else:
                nSystems.append(system)
        nSystems.append(ringAts)
        systems = nSystems
    return systems


mol = Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3')
Draw.MolToImageFile(
    mol,
    "/drug_development/studyRdkit/st_rdcit/img/mol42.jpg",
)
ringInfo = GetRingSystems(mol)
print(ringInfo)

# # 4.2 环外原子对芳香环(Aromatic)的影响
# >注:环键上连接的负电性原子会“窃取”环原子的价电子,且这些亚原子,提供了使环芳香性所必须的元素。
# 使用稠环来增加芳香度可能导致单个环不是芳香的情况,但稠环系统是芳香性的。其中一个例子就是azulene(甘菊蓝)
# 下面的例子,展示了两个稠环和环外双键的影响
m = Chem.MolFromSmiles('O=C1C=CC(=O)C2=C1OC=CO2')
Draw.MolToImageFile(
    m,
    "/drug_development/studyRdkit/st_rdcit/img/mol43.jpg",
)
isAromatic6 = m.GetAtomWithIdx(6).GetIsAromatic()
print(isAromatic6)  # True
Пример #16
0
def getNearestNeighbors(query,
                        n,
                        NNDataPath,
                        FPPath=None,
                        resPath=None,
                        idx=0):
    """ get the n nearest neighbors
        query: bin string with query fingerprint
        returns an ordered list with the n top neighbors (each one in a dict):
            [ {
                "id"          : ID, 
                "expVal"      : ExpValues, 
                "similarity"  : TanimotoSimilarity, 
                "smi"         : smiles, 
                "imgPath"     : imgPath,
                "MeanInhib"   : Mean Inhib. },  ... ]        

        It will saves the images in resPath:
             NN_1.png    #1 neighbor
             NN_2.png    #2 neighbor
             ...
             NN_n.png    #n neighbor
    """
    if not query or not n or not NNDataPath or not FPPath:
        return []
    #if resPath and not os.path.isdir(resPath):
    #    os.makedirs(resPath)

    # get the correct header
    file = open(NNDataPath, "r")
    header = file.readline().strip().split('\t')
    file.close()

    if "Molecule SMILES" not in header or "Compound Name" not in header:
        print "NN dataset ", NNDataPath, " have not the correct header. It must contain 'Molecule SMILES' and 'Compound Name' attributes."
        return []
    # Index will have to be sum 1 because the TS will be prepended
    idxID = header.index("Compound Name") + 1
    idxExpVal = len(header)
    idxSMILES = header.index("Molecule SMILES") + 1
    idxSimilarity = 0

    Nbits = 2048
    cmdStr = 'echo "' + query + '" | fpin ' + FPPath + " " + NNDataPath + ' 0.0 ' + str(
        n)
    status, output = commands.getstatusoutput(cmdStr)
    if status:
        print status
        print output
        raise Exception(str(output))
    #             TS              SMILES                    AZID         DATE       expRes
    # output = "0.7117   CCCC(C)C1(C(=O)NC(=O)NC1=O)CC   AZ10046012   2009-12-02   3.480007"
    TS = []
    for ts in output.split("\n"):
        TS.append(ts.strip().split('\t'))
    # in TS:
    #    TS[n][0] - tanimoto similarity
    #    TS[n][1] - SMILES
    #    TS[n][2] - AZID
    #    TS[n][-1]- expRes
    res = []
    timeStamp = str(time.time()).replace(".", '')
    for fidx, nn in enumerate(TS):
        ID = nn[idxID]
        if miscUtilities.isNumber(nn[idxExpVal]):
            expVal = str(round(float(nn[idxExpVal]), 2))
        else:
            expVal = nn[idxExpVal]
        SMILES = nn[idxSMILES]
        if resPath and os.path.isdir(resPath):
            imgPath = os.path.join(
                resPath, "NN" + str(idx) + "_" + str(fidx + 1) + "_" +
                timeStamp + ".png")
            mol = Chem.MolFromSmiles(SMILES)
            # save the respective imgPath...
            Draw.MolToImageFile(mol,
                                imgPath,
                                size=(300, 300),
                                kekulize=True,
                                wedgeBonds=True)
        else:
            imgPath = ""
        res.append({
            "id": ID,
            "expVal": expVal,
            "similarity": nn[idxSimilarity],
            "smi": SMILES,
            "imgPath": imgPath,
            "MeanInhib": ''
        })
    return res
Пример #17
0
from rdkit import Chem
from rdkit.Chem import Draw


if __name__ == '__main__':
    smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'
    m = Chem.MolFromSmiles(smi)
    Draw.MolToImageFile(m, "mol.jpg", size=(200, 300))
Пример #18
0
    def createSignImg(self,
                      smi,
                      signature,
                      atomColor,
                      imgPath,
                      endHeight=None):
        colors = []
        print "Creating signature image..."
        if not signature or not atomColor or not smi:
            print "Missing inputs:", str([smi, signature, atomColor])
            return "", "", [], []
        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            # Create an Orange ExampleTable with a smiles attribute
            smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smi])
            myDomain = orange.Domain([smilesAttr], 0)
            smilesData = dataUtilities.DataTable(myDomain, [[smi]])
            preCalcData = None
            startHeight = 0
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                smilesData,
                startHeight,
                endHeight,
                preCalcData,
                returnAtomID=True)
            cmpdSignList = cmpdSignList[0]
            CLabDesc = []
            # create a mol file
            tmpFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol")
            file = open(tmpFile, "w")
            molStr = ""
            for line in sdfStr[0]:
                if "$$$$" in line:
                    break
                molStr += line
                file.write(line)
            file.close()
        else:
            CLabDesc, cmpdSignList, tmpFile, molStr = self.getClabDescSignList(
                smi, getMolFile=True)
        if not cmpdSignList or not tmpFile:
            print "Couldn't get the cmpd list or the mol file"
            return "", "", [], []
        # create an RDKit mol
        mol = Chem.MolFromMolFile(tmpFile, True, False)
        if not mol:
            mol = Chem.MolFromMolFile(tmpFile, False, False)
        if not mol:
            print "Could not create mol for: ", smi
            return "", "", [], []
        adj = GetAdjacencyMatrix(mol)
        # find the NN
        hights = []
        for i in miscUtilities.Range(0, len(cmpdSignList), mol.GetNumAtoms()):
            hList = cmpdSignList[i:i + mol.GetNumAtoms()]
            if len(hList):
                hights.append(cmpdSignList[i:i + mol.GetNumAtoms()])

        atoms = []
        hight = None
        for idx, h in enumerate(hights):
            if signature in h:
                for i, a in enumerate(h):
                    if a == signature:
                        atoms.append(i)
                hight = idx
                break
        if len(atoms) == 0:
            print "ERROR: Could not find the atom for ", signature
            return "signatureNOTfound", "", [], []
        #print "IniAtoms: ",atoms
        visitedAtoms = []
        for n in range(hight):
            for atom in copy.deepcopy(atoms):
                if atom not in visitedAtoms:
                    lNN = findNeighbors(atom, adj)
                    visitedAtoms.append(atom)
                    for lnn in lNN:
                        if lnn not in atoms:
                            atoms.append(lnn)
        atoms.sort()
        os.system("rm " + tmpFile)
        #Specify the atom colors
        colors = [atomColor] * len(atoms)

        if not imgPath:
            return "", molStr, atoms, colors
        try:
            #Draw the image
            MolDrawing.elemDict = defaultdict(lambda: (0, 0, 0))
            Draw.MolToImageFile(mol,
                                imgPath,
                                size=(300, 300),
                                kekulize=True,
                                wedgeBonds=True,
                                highlightAtoms=atoms)
            #Color the Highlighted atoms with the choosen atomColor.
            # Only using one color
            if atomColor == 'r':
                rgb = (255, 0, 0)
            elif atomColor == 'g':
                rgb = (0, 255, 0)
            else:
                rgb = (0, 0, 255)  #Blue

            img = Image.open(imgPath)
            img = img.convert("RGBA")
            pixdata = img.getdata()
            newData = list()
            for item in pixdata:
                if item[0] == 255 and item[1] == 0 and item[2] == 0:
                    newData.append(rgb + (255, ))
                else:
                    newData.append(item)
            img.putdata(newData)
            img.save(imgPath)

            if os.path.isfile(imgPath):
                return imgPath, molStr, atoms, colors
            else:
                return "", molStr, atoms, colors
        except:
            return "", molStr, atoms, colors
print('smi=', hierarch.smiles)  # smi= CCC(=O)OCCOc1ccccc1

# 每个节点使用smiles键控的字典跟踪其子节点
ks = hierarch.children.keys()
print(sorted(ks))
# ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1']
# # 3.2 BRICS方法
# RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。

# 对下图中的分子进行BRICS分解
smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(
    m,
    "/drug_development/studyRdkit/st_rdcit/img/mol34.jpg",
    size=(600, 400),
    legend=
    'zanubrutinib(C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N)'
)

frags = (BRICS.BRICSDecompose(m))
print(frags)

mols = []
for fsmi in frags:
    mols.append(Chem.MolFromSmiles(fsmi))

img = Draw.MolsToGridImage(mols,
                           molsPerRow=3,
                           subImgSize=(200, 200),
                           legends=['' for x in mols])
from rdkit.Chem import ChemicalFeatures
from rdkit.Chem.Pharm2D.SigFactory import SigFactory
from rdkit.Chem.Pharm2D import Generate, Gobbi_Pharm2D

# 二、化学性质
# 建立一个化学性质对象,通过该对象可以得到分子的化学性质

fdefName = os.path.join(
    RDConfig.RDDataDir,
    '/drug_development/studyRdkit/st_rdcit/data/BaseFeatures.fdef')
factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(
    m,
    "/drug_development/studyRdkit/st_rdcit/img/mol38.jpg",
)
# 使用特征工厂搜索特征
feats = factory.GetFeaturesForMol(m)
print(len(feats))  # 16

# 搜索到的每个特征都包含了改特征家族(例如受体、供体等)特征类别、该特征对应的原子、特征对应的序号等
for f in feats:
    print(
        f.GetFamily(),  # 特征家族信息
        f.GetType(),  # 特征类型信息
        f.GetAtomIds()  # 特征对应原子
    )

# Donor SingleAtomDonor (4,)
# Donor SingleAtomDonor (13,)
Пример #21
0
# rdkit支持从Smiles、mol、sdf文件中读入分子获取分子对象。
# Smiles、mol通常用于保存单个分子;而sdf格式是作为分子库形式设计的。
# 因此读入sdf得到的是分子迭代器,读入Smiles、mol文件得到分子对象。

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

# 一、读分子操作
# 1.1、读入smiles
smi = 'CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3'
mol = Chem.MolFromSmiles(smi)  # 将Smiles转换为mol对象
# 将Mol分子画出结构图,并存储在相应地址
Draw.MolToImageFile(
    mol,  # mol分子对象
    "/drug_development/studyRdkit/st_rdcit/img/mol2.jpg"  # 分子结构图存储地址
)
print('mol的类型=', type(mol))  # mol的类型=<class 'rdkit.Chem.rdchem.Mol'>

# 1.2、读入mol文件

# 将mol文件转换为mol对象
mol3 = Chem.MolFromMolFile(
    '/drug_development/studyRdkit/st_rdcit/data/952883.mol')
# 将Mol分子画出结构图,并存储在相应地址
Draw.MolToImageFile(
    mol3,  # mol分子对象
    "/drug_development/studyRdkit/st_rdcit/img/mol3.jpg"  # 分子结构图存储地址
)
print('mol3的类型=', type(mol))  # mol3的类型=<class 'rdkit.Chem.rdchem.Mol'>