def generate_chemicals_from_fragments(smiles_list, n=10):
    """
    reconstruct chemicals from fragments

    Paramters
    -----------------
    smiles_list: list of string
        list of smiles of fragments
    n: int
        number of chemicals to be generated

    Returns
    ---------------
    smiles_list: list of string
        list of newly generated smiles
    """

    # convert smiles to mol objects
    all_components = [Chem.MolFromSmiles(f) for f in smiles_list]
    builder = BRICS.BRICSBuild(all_components)

    generated_mol_list = []
    for i in (range(n)):
        m = next(builder)
        m.UpdatePropertyCache(strict=True)
        generated_mol_list.append(m)

    smiles_list = [Chem.MolToSmiles(m) for m in generated_mol_list]

    return smiles_list
Пример #2
0
def fragmenter(thefile):
    os.remove('output.txt')
    id = []

    for line in open(thefile):

        line = line.strip()

        id.append(line)

    df = pd.DataFrame()

    df = id

    count = 0

    mylist = []

    for y in df:

        base = Chem.MolFromSmiles(df[count])

        catalog = BRICS.BRICSDecompose(base)

        mcat = [Chem.MolFromSmiles(x) for x in catalog]

        ms = BRICS.BRICSBuild(mcat)

        for m in ms:

            a = Chem.MolToSmiles(m)

            mylist.append(a)

        count = count + 1

    df2 = pd.DataFrame({'smiles': mylist})
    f3 = open('output.txt', 'w+')
    for j in mylist:
        print(j, file=f3)

    f3.close()
    return mylist
Пример #3
0
    def combine_frag(self):
        self.generate_frag_templates()
        print('Merging fragments together to generate compounds...')
        for current_template in self.potential_cpd_templates:
            fragms = [Chem.MolFromSmiles(x) for x in sorted(current_template)]
            ms = BRICS.BRICSBuild(fragms)
            prods = [next(ms) for x in range(1)]
            #            mini_frags = self.collect_mini_frags_from_each_template(current_template)
            #            percent = len(mini_frags)
            #            counter = 0
            for i in range(1):
                #                for j in range(len(mini_frags)):
                sampler = Chem.MolToSmiles(prods[i], True)
                #                    if mini_frags[j] in sampler:
                #                        counter+=1
                #                        if counter == percent:

                if sampler not in self.templates:
                    print(sampler)
                    self.templates.append(sampler)
                    self.chembank.write(sampler + '\n')
Пример #4
0
    molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf')
    if molecule is not None
]
# molecules = [molecule for molecule in Chem.SmilesMolSupplier('logSdataset1290_2d.smi',
#                                                              delimiter='\t', titleLine=False)
#              if molecule is not None]

print(len(molecules))
fragments = set()
for molecule in molecules:
    fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2)
    #    print(fragment)
    #    print(list(BRICS.FindBRICSBonds(molecule)))
    fragments.update(fragment)
print(len(fragments))
# print (fragments)

generated_structures = BRICS.BRICSBuild(
    [Chem.MolFromSmiles(smiles) for smiles in fragments])
writer = Chem.SDWriter('generated_structures.sdf')
# writer = Chem.SmilesWriter('generated_structures.smi')
number_of_generated_structures = 0
for generated_structure in generated_structures:
    generated_structure.UpdatePropertyCache(True)
    AllChem.Compute2DCoords(generated_structure)
    writer.write(generated_structure)
    number_of_generated_structures += 1
    if number_of_generated_structures >= max_number_of_generated_structures:
        break
writer.close()
frags = (BRICS.BRICSDecompose(m))
print(frags)

mols = []
for fsmi in frags:
    mols.append(Chem.MolFromSmiles(fsmi))

img = Draw.MolsToGridImage(mols,
                           molsPerRow=3,
                           subImgSize=(200, 200),
                           legends=['' for x in mols])
img.save('/drug_development/studyRdkit/st_rdcit/img/mol35.jpg')
# 四、组合分子片段--BRICS方法
# 以上述片段进行BRICS组合产生分子
newms = BRICS.BRICSBuild(mols)
newms = list(newms)
print('新分子数:', len(newms))  # 新分子数: 76(含少量化学结构不合理的结构)

mols = [newms[0], newms[1], newms[2]]
img = Draw.MolsToGridImage(mols,
                           molsPerRow=3,
                           subImgSize=(200, 200),
                           legends=['' for x in mols])
# # 可视化前3个结构
img.save('/drug_development/studyRdkit/st_rdcit/img/mol36.jpg')
# 五、自定义片段生成方法

# 除了上面提到的自动片段分解方法,RDKit提供了更灵活的函数可根据用户定义的键进行切断产生片段。
# 比如对所有环上的原子和非环上的原子组成的键进行进行切断。
smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
Пример #6
0
number_of_generating_structures = 100  # 繰り返し 1 回あたり生成する化学構造の数
number_of_iterations = 10  # 繰り返し回数。(number_of_generating_structures × number_of_iterations) 個の化学構造が生成されます

dataset = pd.read_csv('molecules.csv', index_col=0)  # 種構造の SMILES のデータセットの読み込み
molecules = [Chem.MolFromSmiles(smiles) for smiles in dataset.iloc[:, 0]]
print('種となる分子の数 :', len(molecules))

# フラグメントへの変換
fragments = set()
for molecule in molecules:
    fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=1)
    fragments.update(fragment)
print('生成されたフラグメントの数 :', len(fragments))

# 化学構造生成
generated_structures = []
for iteration in range(number_of_iterations):
    print(iteration + 1, '/', number_of_iterations)
    generated_structures_all = BRICS.BRICSBuild(
        [Chem.MolFromSmiles(fragment) for fragment in fragments])
    for index, generated_structure in enumerate(generated_structures_all):
        #        print(iteration + 1, '/', number_of_iterations, ', ', index + 1, '/', number_of_generating_structures)
        generated_structure.UpdatePropertyCache(True)
        generated_structures.append(Chem.MolToSmiles(generated_structure))
        if index + 1 >= number_of_generating_structures:
            break
generated_structures = list(set(generated_structures))  # 重複する構造の削除
generated_structures = pd.DataFrame(generated_structures, columns=['SMILES'])
generated_structures.to_csv('generated_structures_brics.csv'
                            )  # csv ファイルに保存。同じ名前のファイルがあるときは上書きされますので注意してください
Пример #7
0
    TARGET = ['measured log solubility in mols per litre']
    df['mol'] = df['smiles'].apply(Chem.MolFromSmiles)

    fragments = set()
    for ix, mol in df[['mol']].iterrows():
        f = BRICS.BRICSDecompose(mol[0], returnMols=True)
        fragments.update(list(f))
    else:
        print(len(fragments))

    NUM_ITER = 100000
    from random import seed
    #--- starts parallel BRICS
    start = time()
    seed(20200315)
    builder = BRICS.BRICSBuild(fragments)
    with open('./results/mol_single.smi', 'w') as f:
        for i in range(NUM_ITER):
            m = next(builder)
            m.UpdatePropertyCache(strict=True)
            smi = Chem.MolToSmiles(m)
            f.write(smi + '\n')
    print('Elapsed time', stopwatch(start), '[mins]')

    #--- starts parallel BRICS
    start2 = time()
    c = 0
    seed(20200315)
    builder = BRICS.BRICSBuild(fragments)
    with Pool(4) as p:
        f = open('./results/mol_quad.smi', 'w')
Пример #8
0
def make_virtual_lib(method_name):
    theme_name = t_theme_name.get()
    df_brics = pd.read_csv(t_csv_filepath.get())
    df_brics['mols'] = df_brics[t_smiles.get()].map(apply_molfromsmiles)
    df_brics = df_brics.dropna()

    allfrags = set()
    #Applying the for-loop to pandas df is not good.
    for mol in df_brics['mols']:
        frag = BRICS.BRICSDecompose(mol)
        allfrags.update(frag)

    print('the number of allfrags', len(allfrags))

    allcomponents = [apply_molfromsmiles(f) for f in allfrags]
    Nonecomponents = [f for f in allcomponents if f == None or f == ""]
    print('len(Nonecomponents)', len(Nonecomponents))
    allcomponents = [f for f in allcomponents if f != ""]
    allcomponents = [f for f in allcomponents if f != None]

    for f in allfrags:
        #print('f: ', f)
        #print('Mol: ',Chem.MolFromSmiles(f))
        pass

    builder = BRICS.BRICSBuild(allcomponents)
    print(builder)

    virtual_mols = []
    successful_cnt = 0
    error_cnt = 0

    for i in range(virtual_libraly_num):
        try:
            m = next(builder)
            m.UpdatePropertyCache(strict=True)
            virtual_mols.append(m)
            successful_cnt += 1

        except StopIteration:
            #print(i, '- stopiteration of next(builder)')
            error_cnt += 1
            pass
        except:
            print('error')
            error_cnt += 1
            pass
    print('The total number : ', virtual_libraly_num)
    print('The number of error : ', error_cnt)
    print('The ratio of error : ', error_cnt / virtual_libraly_num)

    for i, mol in enumerate(virtual_mols):
        Draw.MolToFile(
            mol,
            str(parent_path / 'results' / theme_name / method_name / high_low /
                'brics_virtual' / 'molecular-structure' /
                ('tmp-' + str(i).zfill(6) + '.png')))

    virtual_list = []
    for i, mol in enumerate(virtual_mols):
        virtual_list.append([i, Chem.MolToSmiles(mol), 0])

    #print(virtual_list)
    df_virtual = pd.DataFrame(
        virtual_list, columns=[t_id.get(),
                               t_smiles.get(),
                               t_task.get()])

    #print(df_virtual)
    csv_path = parent_path / 'results' / theme_name / method_name / high_low / 'brics_virtual' / 'virtual.csv'
    df_virtual.to_csv(csv_path)
    return csv_path
if __name__ == "__main__":

    allfrags = set()
    for m in supp:
        pieces = BRICS.BRICSDecompose(m)
        allfrags.update(pieces)

    print len(allfrags)

    currtime = time()
    #make new molecules from fragments
    import random
    random.seed(127)
    fragms = [Chem.MolFromSmiles(x) for x in allfrags]
    ms = BRICS.BRICSBuild(fragms)

    prods = [ms.next() for x in range(10000)]
    #clean up generated molecules
    for prod in prods:
        prod.UpdatePropertyCache(strict=False)

    #srpin340 is a low affinity but selective SRPK1 inhibitor

    srpin340Mol = Chem.MolFromSmiles(
        'C1CCN(CC1)C2=C(C=C(C=C2)C(F)(F)F)NC(=O)C3=CC=NC=C3')
    srpin340fps = Generate.Gen2DFingerprint(srpin340Mol, sigFactory)

    #sphinx is a higher affinity but selective SRPK1 inhibitor

    sphinxMol = Chem.MolFromSmiles(