def generate_chemicals_from_fragments(smiles_list, n=10): """ reconstruct chemicals from fragments Paramters ----------------- smiles_list: list of string list of smiles of fragments n: int number of chemicals to be generated Returns --------------- smiles_list: list of string list of newly generated smiles """ # convert smiles to mol objects all_components = [Chem.MolFromSmiles(f) for f in smiles_list] builder = BRICS.BRICSBuild(all_components) generated_mol_list = [] for i in (range(n)): m = next(builder) m.UpdatePropertyCache(strict=True) generated_mol_list.append(m) smiles_list = [Chem.MolToSmiles(m) for m in generated_mol_list] return smiles_list
def fragmenter(thefile): os.remove('output.txt') id = [] for line in open(thefile): line = line.strip() id.append(line) df = pd.DataFrame() df = id count = 0 mylist = [] for y in df: base = Chem.MolFromSmiles(df[count]) catalog = BRICS.BRICSDecompose(base) mcat = [Chem.MolFromSmiles(x) for x in catalog] ms = BRICS.BRICSBuild(mcat) for m in ms: a = Chem.MolToSmiles(m) mylist.append(a) count = count + 1 df2 = pd.DataFrame({'smiles': mylist}) f3 = open('output.txt', 'w+') for j in mylist: print(j, file=f3) f3.close() return mylist
def combine_frag(self): self.generate_frag_templates() print('Merging fragments together to generate compounds...') for current_template in self.potential_cpd_templates: fragms = [Chem.MolFromSmiles(x) for x in sorted(current_template)] ms = BRICS.BRICSBuild(fragms) prods = [next(ms) for x in range(1)] # mini_frags = self.collect_mini_frags_from_each_template(current_template) # percent = len(mini_frags) # counter = 0 for i in range(1): # for j in range(len(mini_frags)): sampler = Chem.MolToSmiles(prods[i], True) # if mini_frags[j] in sampler: # counter+=1 # if counter == percent: if sampler not in self.templates: print(sampler) self.templates.append(sampler) self.chembank.write(sampler + '\n')
molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf') if molecule is not None ] # molecules = [molecule for molecule in Chem.SmilesMolSupplier('logSdataset1290_2d.smi', # delimiter='\t', titleLine=False) # if molecule is not None] print(len(molecules)) fragments = set() for molecule in molecules: fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2) # print(fragment) # print(list(BRICS.FindBRICSBonds(molecule))) fragments.update(fragment) print(len(fragments)) # print (fragments) generated_structures = BRICS.BRICSBuild( [Chem.MolFromSmiles(smiles) for smiles in fragments]) writer = Chem.SDWriter('generated_structures.sdf') # writer = Chem.SmilesWriter('generated_structures.smi') number_of_generated_structures = 0 for generated_structure in generated_structures: generated_structure.UpdatePropertyCache(True) AllChem.Compute2DCoords(generated_structure) writer.write(generated_structure) number_of_generated_structures += 1 if number_of_generated_structures >= max_number_of_generated_structures: break writer.close()
frags = (BRICS.BRICSDecompose(m)) print(frags) mols = [] for fsmi in frags: mols.append(Chem.MolFromSmiles(fsmi)) img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=(200, 200), legends=['' for x in mols]) img.save('/drug_development/studyRdkit/st_rdcit/img/mol35.jpg') # 四、组合分子片段--BRICS方法 # 以上述片段进行BRICS组合产生分子 newms = BRICS.BRICSBuild(mols) newms = list(newms) print('新分子数:', len(newms)) # 新分子数: 76(含少量化学结构不合理的结构) mols = [newms[0], newms[1], newms[2]] img = Draw.MolsToGridImage(mols, molsPerRow=3, subImgSize=(200, 200), legends=['' for x in mols]) # # 可视化前3个结构 img.save('/drug_development/studyRdkit/st_rdcit/img/mol36.jpg') # 五、自定义片段生成方法 # 除了上面提到的自动片段分解方法,RDKit提供了更灵活的函数可根据用户定义的键进行切断产生片段。 # 比如对所有环上的原子和非环上的原子组成的键进行进行切断。 smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
number_of_generating_structures = 100 # 繰り返し 1 回あたり生成する化学構造の数 number_of_iterations = 10 # 繰り返し回数。(number_of_generating_structures × number_of_iterations) 個の化学構造が生成されます dataset = pd.read_csv('molecules.csv', index_col=0) # 種構造の SMILES のデータセットの読み込み molecules = [Chem.MolFromSmiles(smiles) for smiles in dataset.iloc[:, 0]] print('種となる分子の数 :', len(molecules)) # フラグメントへの変換 fragments = set() for molecule in molecules: fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=1) fragments.update(fragment) print('生成されたフラグメントの数 :', len(fragments)) # 化学構造生成 generated_structures = [] for iteration in range(number_of_iterations): print(iteration + 1, '/', number_of_iterations) generated_structures_all = BRICS.BRICSBuild( [Chem.MolFromSmiles(fragment) for fragment in fragments]) for index, generated_structure in enumerate(generated_structures_all): # print(iteration + 1, '/', number_of_iterations, ', ', index + 1, '/', number_of_generating_structures) generated_structure.UpdatePropertyCache(True) generated_structures.append(Chem.MolToSmiles(generated_structure)) if index + 1 >= number_of_generating_structures: break generated_structures = list(set(generated_structures)) # 重複する構造の削除 generated_structures = pd.DataFrame(generated_structures, columns=['SMILES']) generated_structures.to_csv('generated_structures_brics.csv' ) # csv ファイルに保存。同じ名前のファイルがあるときは上書きされますので注意してください
TARGET = ['measured log solubility in mols per litre'] df['mol'] = df['smiles'].apply(Chem.MolFromSmiles) fragments = set() for ix, mol in df[['mol']].iterrows(): f = BRICS.BRICSDecompose(mol[0], returnMols=True) fragments.update(list(f)) else: print(len(fragments)) NUM_ITER = 100000 from random import seed #--- starts parallel BRICS start = time() seed(20200315) builder = BRICS.BRICSBuild(fragments) with open('./results/mol_single.smi', 'w') as f: for i in range(NUM_ITER): m = next(builder) m.UpdatePropertyCache(strict=True) smi = Chem.MolToSmiles(m) f.write(smi + '\n') print('Elapsed time', stopwatch(start), '[mins]') #--- starts parallel BRICS start2 = time() c = 0 seed(20200315) builder = BRICS.BRICSBuild(fragments) with Pool(4) as p: f = open('./results/mol_quad.smi', 'w')
def make_virtual_lib(method_name): theme_name = t_theme_name.get() df_brics = pd.read_csv(t_csv_filepath.get()) df_brics['mols'] = df_brics[t_smiles.get()].map(apply_molfromsmiles) df_brics = df_brics.dropna() allfrags = set() #Applying the for-loop to pandas df is not good. for mol in df_brics['mols']: frag = BRICS.BRICSDecompose(mol) allfrags.update(frag) print('the number of allfrags', len(allfrags)) allcomponents = [apply_molfromsmiles(f) for f in allfrags] Nonecomponents = [f for f in allcomponents if f == None or f == ""] print('len(Nonecomponents)', len(Nonecomponents)) allcomponents = [f for f in allcomponents if f != ""] allcomponents = [f for f in allcomponents if f != None] for f in allfrags: #print('f: ', f) #print('Mol: ',Chem.MolFromSmiles(f)) pass builder = BRICS.BRICSBuild(allcomponents) print(builder) virtual_mols = [] successful_cnt = 0 error_cnt = 0 for i in range(virtual_libraly_num): try: m = next(builder) m.UpdatePropertyCache(strict=True) virtual_mols.append(m) successful_cnt += 1 except StopIteration: #print(i, '- stopiteration of next(builder)') error_cnt += 1 pass except: print('error') error_cnt += 1 pass print('The total number : ', virtual_libraly_num) print('The number of error : ', error_cnt) print('The ratio of error : ', error_cnt / virtual_libraly_num) for i, mol in enumerate(virtual_mols): Draw.MolToFile( mol, str(parent_path / 'results' / theme_name / method_name / high_low / 'brics_virtual' / 'molecular-structure' / ('tmp-' + str(i).zfill(6) + '.png'))) virtual_list = [] for i, mol in enumerate(virtual_mols): virtual_list.append([i, Chem.MolToSmiles(mol), 0]) #print(virtual_list) df_virtual = pd.DataFrame( virtual_list, columns=[t_id.get(), t_smiles.get(), t_task.get()]) #print(df_virtual) csv_path = parent_path / 'results' / theme_name / method_name / high_low / 'brics_virtual' / 'virtual.csv' df_virtual.to_csv(csv_path) return csv_path
if __name__ == "__main__": allfrags = set() for m in supp: pieces = BRICS.BRICSDecompose(m) allfrags.update(pieces) print len(allfrags) currtime = time() #make new molecules from fragments import random random.seed(127) fragms = [Chem.MolFromSmiles(x) for x in allfrags] ms = BRICS.BRICSBuild(fragms) prods = [ms.next() for x in range(10000)] #clean up generated molecules for prod in prods: prod.UpdatePropertyCache(strict=False) #srpin340 is a low affinity but selective SRPK1 inhibitor srpin340Mol = Chem.MolFromSmiles( 'C1CCN(CC1)C2=C(C=C(C=C2)C(F)(F)F)NC(=O)C3=CC=NC=C3') srpin340fps = Generate.Gen2DFingerprint(srpin340Mol, sigFactory) #sphinx is a higher affinity but selective SRPK1 inhibitor sphinxMol = Chem.MolFromSmiles(