def do_images(xs, molecules, conditions, samples, images_per_condition, output_dir): os.makedirs(os.path.join(output_dir, 'valid'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'accurate'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'invalid'), exist_ok=True) for c in range(conditions): image_mols = random.sample( list( zip(xs[c * samples:(c + 1) * samples], molecules[c * samples:(c + 1) * samples])), images_per_condition) for i, (x, a) in enumerate(image_mols): m = get_mol(x, a[0]) try: err = Chem.rdmolops.SanitizeMol(m, catchErrors=True) if err == 0 and is_valid(x, a[0]): if get_is_connected(x, a[0]): Draw.MolToImageFile( m, os.path.join(output_dir, 'accurate', 'c_{}_all_{}.png'.format(c, i))) else: Draw.MolToImageFile( m, os.path.join(output_dir, 'valid', 'c_{}_all_{}.png'.format(c, i))) else: Draw.MolToImageFile( m, os.path.join(output_dir, 'c_{}_all_{}.png'.format(c, i))) except ValueError: continue
def converter(file_name,save_name): mols = [ mol for mol in Chem.SDMolSupplier( file_name ) ] outname = save_name + ".smi" out_file = open( outname, "w" ) for mol in mols: smi = Chem.MolToSmiles(mol) #print(smi) name = mol.GetProp("_Name") out_file.write( "{}\t{}\n".format(smi, name )) m = Chem.MolFromSmiles(smi) m_qed = Chem.QED.qed(m) m_LogP = round(Descriptors.MolLogP(mol), 4) print(file_name,end = " ") print("->",m_qed,m_LogP) #Chem.QED.properties(m) Draw.MolToImageFile(m,save_name+".png",size=(300, 300)) m = Chem.AddHs(m) AllChem.EmbedMolecule( m,randomSeed=3 ) try : #AllChem.MMFFOptimizeMolecule(m) #Chem.MolToMolFile(m,file_name+".mol") #out_file.close() return smi,m_qed,m_LogP except ValueError: print("Rdkit not opt mol") return 0
import os from rdkit import Chem from rdkit.Chem import Draw # get a path def GetPath(file): path = sys.path[0] path = os.path.normpath(path) return os.path.join(path, file) SMILES1 = "O" SMILES2 = "CCO" SMILES3 = "O=C=O" SMILES4 = "C#N" SMILES5 = "C1CCCCC1" SMILES6 = "CC" SMILES7 = "C=C" SMILES8 = "C#C" SMILES9 = "CC(=O)OCC" Draw.MolToImageFile(Chem.MolFromSmiles(SMILES1), GetPath("水.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES2), GetPath("乙醇.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES3), GetPath("二氧化碳.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES4), GetPath("氰化氢.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES5), GetPath("环已烷.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES6), GetPath("乙烷.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES7), GetPath("乙烯.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES8), GetPath("乙炔.jpg")) Draw.MolToImageFile(Chem.MolFromSmiles(SMILES9), GetPath("乙酸乙酯.jpg"))
from rdkit import Chem from rdkit.Chem import Draw smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile(m,"mol.jpg")
def plot_mol_matrix(): import cairosvg import seaborn as sns import matplotlib.pyplot as plt smiles = 'CN(C)C(=N)NC(=N)N' #'CC(C)NC1=CC=CO1' #'CC1=C(SC(=C1)C(=O)NCC2=NOC=C2)Br' bond, atoms = smiles_to_adj(smiles, 'qm9') bond = bond[0] atoms = atoms[0] # def save_mol_png(mol, filepath, size=(100, 100)): # Draw.MolToFile(mol, filepath, size=size) Draw.MolToImageFile(Chem.MolFromSmiles(smiles), 'mol.pdf') # save_mol_png(Chem.MolFromSmiles(smiles), 'mol.png') svg = Draw.MolsToGridImage([Chem.MolFromSmiles(smiles)], legends=[], molsPerRow=1, subImgSize=(250, 250), useSVG=True) # highlightAtoms=vhighlight) # , useSVG=True cairosvg.svg2pdf(bytestring=svg.encode('utf-8'), write_to="mol.pdf") cairosvg.svg2png(bytestring=svg.encode('utf-8'), write_to="mol.png") # sns.set() # ax = sns.heatmap(1-atoms) # with sns.axes_style("white"): fig, ax = plt.subplots(figsize=(2, 3.4)) # sns.palplot(sns.diverging_palette(240, 10, n=9)) ax = sns.heatmap(atoms, linewidths=.5, ax=ax, annot_kws={"size": 18}, cbar=False, xticklabels=False, yticklabels=False, square=True, cmap="vlag", vmin=-1, vmax=1, linecolor='black') # ,cmap=sns.diverging_palette(240, 10, n=9)) #"YlGnBu" , square=True plt.show() fig.savefig('atom.pdf') fig.savefig('atom.png') for i, x in enumerate(bond): fig, ax = plt.subplots(figsize=(5, 5)) # sns.palplot(sns.diverging_palette(240, 10, n=9)) ax = sns.heatmap(x, linewidths=.5, ax=ax, annot_kws={"size": 18}, cbar=False, xticklabels=False, yticklabels=False, square=True, cmap="vlag", vmin=-1, vmax=1, linecolor='black') # ,cmap=sns.diverging_palette(240, 10, n=9)) #"YlGnBu" , square=True plt.show() fig.savefig('bond{}.pdf'.format(i)) fig.savefig('bond{}.png'.format(i))
img = Draw.ReactionToImage( rxn ) img.save( '/drug_development/studyRdkit/st_rdcit/img/mol31.jpg' ) # 反应模板如下图所示: # 从反应模板中,我们看到主要的变化是Cl变成羰基氧,N上多了一个甲基 # >注:这是一个逆反应模板 # 反应物如下图所示 : mol = Chem.MolFromSmiles( 'CC(C)(Nc1nc(Cl)c(-c2ccc(F)cc2)c(-c2ccncc2)n1)c1ccccc1') Draw.MolToImageFile( mol, "/drug_development/studyRdkit/st_rdcit/img/mol32.jpg", size=(350, 300), legend='CC(C)(Nc1nc(Cl)c(-c2ccc(F)cc2)c(-c2ccncc2)n1)c1ccccc1' ) # .创建具体反应规则的引擎对象rxn = AllChem.ReactionFromSmarts(tem) # .输入反应物,借助引擎产生反应rxn.RunReactants([productmol]) def getrxns(rxn, product_smi): """ 获取反应规则的引擎对象 product_smi 反应物 """ product_mol = Chem.MolFromSmiles(product_smi) reactions = rxn.RunReactants([product_mol]) rxns = [] for reaction in reactions:
width, bottom=bottom, color=col, antialiased=True) bottom += sort[length - i][1] if col != 'black': c_index += 0.3 bottoms.append(bottom) except: raise ax.set_xticks(keys) ax.set_xticklabels(keys) plt.ylabel('Absolute substructure frequency') plt.xlabel('Fingerprint bit') text_yoffset = 5 for i in keys: text_xoffset = -0.125 if uniques[i] < 100 else -0.185 plt.text(i + text_xoffset, bottoms[i] + text_yoffset, uniques[i], weight='bold') plt.savefig('substructures.png', dpi=400) # RDKit throws errors, don't know why for struct_ix in range(len(draw_structs)): Draw.MolToImageFile(Chem.MolFromSmarts(draw_structs[struct_ix]), f'substruct_{struct_ix}.png', kekulize=False)
#! /usr/bin/python # coding: utf-8 # @Time: 2020-05-29 14:36:04 # @Author: zeoy # rdkit 修改分子 # 一、引入所需库 from rdkit import Chem from rdkit.Chem import Draw # 二、增删H原子 mol = Chem.MolFromSmiles('OC1C2C1CC2') # 画分子结构 Draw.MolToImageFile( mol, '/drug_development/studyRdkit/st_rdcit/img/mol5.jpg' ) # 2.1 增加H原子函数解析 # 将氢添加到分子图上 rdkit.Chem.rdmolops.AddHs( (Mol)mol # 要修饰的分子 [, (bool) explicitOnly=False # (可选)如果设置了此切换,则仅将显式Hs添加到分子中。默认值为0(添加隐式和显式Hs)。 [, (bool) addCoords=False # (可选) 如果设置了此开关,则Hs将设置3D坐标。默认值为0(无3D坐标)。 [, (AtomPairsParameters) onlyOnAtoms=None # (可选)如果提供了此序列,则仅将这些原子视为具有添加的Hs [, (bool)addResidueInfo=False # (可选)如果为true,则将残基信息添加到氢原子(对PDB文件有用)。 ]]]] ) # 2.2 增加H原子
from optimizer import get_mol import torch from rdkit import Chem from rdkit.Chem import Draw x = torch.FloatTensor([[1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]) a = torch.LongTensor([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 3], [0, 0, 3, 1]]) m = get_mol(x, a) Chem.rdmolops.SanitizeMol(m) s = Chem.MolToSmiles(m) print(s) m = Chem.MolFromSmiles(s) for a in m.GetAtoms(): print(a.GetSymbol(), a.GetExplicitValence()) for b in m.GetBonds(): print(b) Draw.MolToImageFile(m, 'images/kill_me.png')
# coding=utf-8 import io from PIL import Image from rdkit import Chem from rdkit.Chem import Draw smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile(m, 'mol.jpg') import base64 from io import BytesIO # img_buffer = BytesIO() # a.save(img_buffer, format='JPEG') # byte_data = img_buffer.getvalue() # base64_str = base64.b64encode(byte_data) # print(base64_str) # 图片转字节 with open('mol.jpg', 'rb') as fp: tu = base64.b64encode(fp.read()) print(tu)
img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=( 200, 200), legends=['' for x in mols]) img.save('/drug_development/studyRdkit/st_rdcit/img/mol13.jpg') # > 注:ReplaceSubstructs()替换操作返回的是分子对象操作列表,如果分子只有一个地方匹配到,则返回一个分子的列表。 # 如果分子中有2个地方匹配到,则返回2个分子的列表。为了标准化smiles,可以将得到的分子mol化->smiles->mol,然后在进行可视化 # 2.3 SAR分析-core可视化 # Chem.ReplaceSidechains(m1,core) : 我们需要定义分子对象,骨架分子; 然后执行ReplaceSidechains函数,删除侧链就能得到骨架可视化。 # 定义嘧啶为核心结构,对其骨架进行可视化 m1 = Chem.MolFromSmiles('BrCCc1cncnc1C(=O)O') core = Chem.MolFromSmiles('c1cncnc1') tmp = Chem.ReplaceSidechains(m1, core) Chem.MolToSmiles(tmp) Draw.MolToImageFile( tmp, '/drug_development/studyRdkit/st_rdcit/img/mol14.jpg') # 2.4 SAR分析-sidechain可视化 m1 = Chem.MolFromSmiles('BrCCc1cncnc1C(=O)O') core = Chem.MolFromSmiles('c1cncnc1') tmp = Chem.ReplaceCore(m1, core) Draw.MolToImageFile( tmp, '/drug_development/studyRdkit/st_rdcit/img/mol15.jpg') # >注:侧链的编号默认是从1开始的,这取决于算法找到侧链的先后顺序。 # 也可以根据侧链连接到骨架上的原子进行编号tmp=CHem.ReqlaceCore(m1, core) tmp = Chem.ReplaceCore(m1, core, labelByIndex=True) Draw.MolToImageFile( tmp, '/drug_development/studyRdkit/st_rdcit/img/mol16.jpg') # 2.5 拆分手段
# 三、SMARTS 支持的扩展 # # 3.1 杂化方式查询 # 杂化方式在SMARTS 中通过^符号进行定义。 如: # 1.^0 匹配S 杂化的原子 # 2.^1 匹配SP 杂化的原子 # 3.^2 匹配SP2 杂化的原子 # 4.^3 匹配SP3 杂化的原子 # 5.^4 匹配SP3D 杂化的原子 # 6.^5 匹配SP3D2 杂化的原子 aspirin = Chem.MolFromSmiles('CC(=O)OC1=CC=CC=C1C(=O)O') Draw.MolToImageFile( aspirin, '/drug_development/studyRdkit/st_rdcit/img/mol52.jpg', legend='aspirin' ) # 阿司匹林 # sp2杂化的原子 sp2_atoms = aspirin.GetSubstructMatches(Chem.MolFromSmarts('[^2]')) # sp3杂化的原子 sp3_atoms = aspirin.GetSubstructMatches(Chem.MolFromSmarts('[^3]')) print('sp2 atoms', sp2_atoms) # sp2 atoms ((1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,), (10,), (11,), (12,)) print('sp3 atoms', sp3_atoms) # sp3 atoms ((0,),) # 对于分子阿司匹林,只有0号原子是sp3杂化,其他原子都是sp2杂化 # >注:苯酚中的氧都是sp2杂化,所以羟基氧才具有更强的酸性。COO中的两个氧都是sp2杂化 羧基中也有类似苯环的共轭体系,并且羧酸中羟基氢酸性更强,共轭更明显,更应该是sp2。 醇羟基中的氧是sp3杂化。 # # 3.2 配位键 # rdkit的SMARTS通过 -> 和 < -符号表示配位键 , 箭头的方向代表电子转译的方向
# rdkit smiles支持和扩展 # 一、引入所需库 import os from rdkit import Chem from rdkit.Chem import Draw # rdkit涵盖了Daylight SMILES所有的标准功能以及一些有用的扩展,下面是扩展的部分内容 # 二、芳香性 # 和氧同族的Te(碲 , 拼音 : dì , 原子序数52 , 是银白色的类金属 ) 元素也可能具有芳香性 , 当其连接2个芳香原子时 , 它贡献2个pi电子 。 m = Chem.MolFromSmiles('OC(=O)c1[te]ccc1') Draw.MolToImageFile( m, '/drug_development/studyRdkit/st_rdcit/img/mol48.jpg', legend='tellurophene-2-carboxylic acid' ) # 碲吩-2甲酸分子 # Te原子的编号是4,下面检查其芳香性 aromatic_atom4 = m.GetAtomWithIdx(4).GetIsAromatic() print('atom4 is aromatic', aromatic_atom4) # atom4 is aromatic True # 三 配位键 rdkit通过 -> 和 < -来支持配位键表示 , 箭头的方向非常重要 , 代表了谁提供电子 配位键不会影响起始原子的价态 , 只会影响指向原子的价态 cu_mol = Chem.MolFromSmiles('[Cu](Cl)Cl') bipy = Chem.MolFromSmiles('C1(C2=NC=CC=C2)=CC=CC=N1') bipycu = Chem.MolFromSmiles('c1cccn->2c1-c1n->3cccc1.[Cu]23(Cl)Cl') mols = [cu_mol, bipy, bipycu] img = Draw.MolsToGridImage( mols,
def draw_glycine(): Draw.MolToImageFile(Chem.MolFromFASTA('G'), 'glycine.png')
# 如果两个环上有公用的原子 if nInCommon and (includeSpiro or nInCommon > 1): # 公用的原子,说明两个环是并在一起的 # 将两个环的原子去重合并 ringAts = ringAts.union(system) print(ringAts) else: nSystems.append(system) nSystems.append(ringAts) systems = nSystems return systems mol = Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3') Draw.MolToImageFile( mol, "/drug_development/studyRdkit/st_rdcit/img/mol42.jpg", ) ringInfo = GetRingSystems(mol) print(ringInfo) # # 4.2 环外原子对芳香环(Aromatic)的影响 # >注:环键上连接的负电性原子会“窃取”环原子的价电子,且这些亚原子,提供了使环芳香性所必须的元素。 # 使用稠环来增加芳香度可能导致单个环不是芳香的情况,但稠环系统是芳香性的。其中一个例子就是azulene(甘菊蓝) # 下面的例子,展示了两个稠环和环外双键的影响 m = Chem.MolFromSmiles('O=C1C=CC(=O)C2=C1OC=CO2') Draw.MolToImageFile( m, "/drug_development/studyRdkit/st_rdcit/img/mol43.jpg", ) isAromatic6 = m.GetAtomWithIdx(6).GetIsAromatic() print(isAromatic6) # True
def getNearestNeighbors(query, n, NNDataPath, FPPath=None, resPath=None, idx=0): """ get the n nearest neighbors query: bin string with query fingerprint returns an ordered list with the n top neighbors (each one in a dict): [ { "id" : ID, "expVal" : ExpValues, "similarity" : TanimotoSimilarity, "smi" : smiles, "imgPath" : imgPath, "MeanInhib" : Mean Inhib. }, ... ] It will saves the images in resPath: NN_1.png #1 neighbor NN_2.png #2 neighbor ... NN_n.png #n neighbor """ if not query or not n or not NNDataPath or not FPPath: return [] #if resPath and not os.path.isdir(resPath): # os.makedirs(resPath) # get the correct header file = open(NNDataPath, "r") header = file.readline().strip().split('\t') file.close() if "Molecule SMILES" not in header or "Compound Name" not in header: print "NN dataset ", NNDataPath, " have not the correct header. It must contain 'Molecule SMILES' and 'Compound Name' attributes." return [] # Index will have to be sum 1 because the TS will be prepended idxID = header.index("Compound Name") + 1 idxExpVal = len(header) idxSMILES = header.index("Molecule SMILES") + 1 idxSimilarity = 0 Nbits = 2048 cmdStr = 'echo "' + query + '" | fpin ' + FPPath + " " + NNDataPath + ' 0.0 ' + str( n) status, output = commands.getstatusoutput(cmdStr) if status: print status print output raise Exception(str(output)) # TS SMILES AZID DATE expRes # output = "0.7117 CCCC(C)C1(C(=O)NC(=O)NC1=O)CC AZ10046012 2009-12-02 3.480007" TS = [] for ts in output.split("\n"): TS.append(ts.strip().split('\t')) # in TS: # TS[n][0] - tanimoto similarity # TS[n][1] - SMILES # TS[n][2] - AZID # TS[n][-1]- expRes res = [] timeStamp = str(time.time()).replace(".", '') for fidx, nn in enumerate(TS): ID = nn[idxID] if miscUtilities.isNumber(nn[idxExpVal]): expVal = str(round(float(nn[idxExpVal]), 2)) else: expVal = nn[idxExpVal] SMILES = nn[idxSMILES] if resPath and os.path.isdir(resPath): imgPath = os.path.join( resPath, "NN" + str(idx) + "_" + str(fidx + 1) + "_" + timeStamp + ".png") mol = Chem.MolFromSmiles(SMILES) # save the respective imgPath... Draw.MolToImageFile(mol, imgPath, size=(300, 300), kekulize=True, wedgeBonds=True) else: imgPath = "" res.append({ "id": ID, "expVal": expVal, "similarity": nn[idxSimilarity], "smi": SMILES, "imgPath": imgPath, "MeanInhib": '' }) return res
from rdkit import Chem from rdkit.Chem import Draw if __name__ == '__main__': smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile(m, "mol.jpg", size=(200, 300))
def createSignImg(self, smi, signature, atomColor, imgPath, endHeight=None): colors = [] print "Creating signature image..." if not signature or not atomColor or not smi: print "Missing inputs:", str([smi, signature, atomColor]) return "", "", [], [] if hasattr(self.model, "specialType") and self.model.specialType == 1: # Create an Orange ExampleTable with a smiles attribute smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smi]) myDomain = orange.Domain([smilesAttr], 0) smilesData = dataUtilities.DataTable(myDomain, [[smi]]) preCalcData = None startHeight = 0 dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures( smilesData, startHeight, endHeight, preCalcData, returnAtomID=True) cmpdSignList = cmpdSignList[0] CLabDesc = [] # create a mol file tmpFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol") file = open(tmpFile, "w") molStr = "" for line in sdfStr[0]: if "$$$$" in line: break molStr += line file.write(line) file.close() else: CLabDesc, cmpdSignList, tmpFile, molStr = self.getClabDescSignList( smi, getMolFile=True) if not cmpdSignList or not tmpFile: print "Couldn't get the cmpd list or the mol file" return "", "", [], [] # create an RDKit mol mol = Chem.MolFromMolFile(tmpFile, True, False) if not mol: mol = Chem.MolFromMolFile(tmpFile, False, False) if not mol: print "Could not create mol for: ", smi return "", "", [], [] adj = GetAdjacencyMatrix(mol) # find the NN hights = [] for i in miscUtilities.Range(0, len(cmpdSignList), mol.GetNumAtoms()): hList = cmpdSignList[i:i + mol.GetNumAtoms()] if len(hList): hights.append(cmpdSignList[i:i + mol.GetNumAtoms()]) atoms = [] hight = None for idx, h in enumerate(hights): if signature in h: for i, a in enumerate(h): if a == signature: atoms.append(i) hight = idx break if len(atoms) == 0: print "ERROR: Could not find the atom for ", signature return "signatureNOTfound", "", [], [] #print "IniAtoms: ",atoms visitedAtoms = [] for n in range(hight): for atom in copy.deepcopy(atoms): if atom not in visitedAtoms: lNN = findNeighbors(atom, adj) visitedAtoms.append(atom) for lnn in lNN: if lnn not in atoms: atoms.append(lnn) atoms.sort() os.system("rm " + tmpFile) #Specify the atom colors colors = [atomColor] * len(atoms) if not imgPath: return "", molStr, atoms, colors try: #Draw the image MolDrawing.elemDict = defaultdict(lambda: (0, 0, 0)) Draw.MolToImageFile(mol, imgPath, size=(300, 300), kekulize=True, wedgeBonds=True, highlightAtoms=atoms) #Color the Highlighted atoms with the choosen atomColor. # Only using one color if atomColor == 'r': rgb = (255, 0, 0) elif atomColor == 'g': rgb = (0, 255, 0) else: rgb = (0, 0, 255) #Blue img = Image.open(imgPath) img = img.convert("RGBA") pixdata = img.getdata() newData = list() for item in pixdata: if item[0] == 255 and item[1] == 0 and item[2] == 0: newData.append(rgb + (255, )) else: newData.append(item) img.putdata(newData) img.save(imgPath) if os.path.isfile(imgPath): return imgPath, molStr, atoms, colors else: return "", molStr, atoms, colors except: return "", molStr, atoms, colors
print('smi=', hierarch.smiles) # smi= CCC(=O)OCCOc1ccccc1 # 每个节点使用smiles键控的字典跟踪其子节点 ks = hierarch.children.keys() print(sorted(ks)) # ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1'] # # 3.2 BRICS方法 # RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。 # 对下图中的分子进行BRICS分解 smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile( m, "/drug_development/studyRdkit/st_rdcit/img/mol34.jpg", size=(600, 400), legend= 'zanubrutinib(C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N)' ) frags = (BRICS.BRICSDecompose(m)) print(frags) mols = [] for fsmi in frags: mols.append(Chem.MolFromSmiles(fsmi)) img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=(200, 200), legends=['' for x in mols])
from rdkit.Chem import ChemicalFeatures from rdkit.Chem.Pharm2D.SigFactory import SigFactory from rdkit.Chem.Pharm2D import Generate, Gobbi_Pharm2D # 二、化学性质 # 建立一个化学性质对象,通过该对象可以得到分子的化学性质 fdefName = os.path.join( RDConfig.RDDataDir, '/drug_development/studyRdkit/st_rdcit/data/BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N' m = Chem.MolFromSmiles(smi) Draw.MolToImageFile( m, "/drug_development/studyRdkit/st_rdcit/img/mol38.jpg", ) # 使用特征工厂搜索特征 feats = factory.GetFeaturesForMol(m) print(len(feats)) # 16 # 搜索到的每个特征都包含了改特征家族(例如受体、供体等)特征类别、该特征对应的原子、特征对应的序号等 for f in feats: print( f.GetFamily(), # 特征家族信息 f.GetType(), # 特征类型信息 f.GetAtomIds() # 特征对应原子 ) # Donor SingleAtomDonor (4,) # Donor SingleAtomDonor (13,)
# rdkit支持从Smiles、mol、sdf文件中读入分子获取分子对象。 # Smiles、mol通常用于保存单个分子;而sdf格式是作为分子库形式设计的。 # 因此读入sdf得到的是分子迭代器,读入Smiles、mol文件得到分子对象。 from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Draw # 一、读分子操作 # 1.1、读入smiles smi = 'CC(C)OC(=O)C(C)NP(=O)(OCC1C(C(C(O1)N2C=CC(=O)NC2=O)(C)F)O)OC3=CC=CC=C3' mol = Chem.MolFromSmiles(smi) # 将Smiles转换为mol对象 # 将Mol分子画出结构图,并存储在相应地址 Draw.MolToImageFile( mol, # mol分子对象 "/drug_development/studyRdkit/st_rdcit/img/mol2.jpg" # 分子结构图存储地址 ) print('mol的类型=', type(mol)) # mol的类型=<class 'rdkit.Chem.rdchem.Mol'> # 1.2、读入mol文件 # 将mol文件转换为mol对象 mol3 = Chem.MolFromMolFile( '/drug_development/studyRdkit/st_rdcit/data/952883.mol') # 将Mol分子画出结构图,并存储在相应地址 Draw.MolToImageFile( mol3, # mol分子对象 "/drug_development/studyRdkit/st_rdcit/img/mol3.jpg" # 分子结构图存储地址 ) print('mol3的类型=', type(mol)) # mol3的类型=<class 'rdkit.Chem.rdchem.Mol'>