def get_sgs(record_dict, n_min, n_max, method="exhaustive"):
    if method == "exhaustive":
        return Chem.rdmolops.FindAllSubgraphsOfLengthMToN(
            record_dict["mol"], n_min, n_max)

    elif method == "RECAP":
        hierarchy = Recap.RecapDecompose(record_dict["mol"])
        sgs = []
        for substructure in hierarchy.GetAllChildren().values():
            substructure = Chem.DeleteSubstructs(substructure.mol,
                                                 Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)

    elif method == "BRICS":
        substructures = BRICS.BRICSDecompose(record_dict["mol"])
        sgs = []
        for substructure in substructures:
            substructure = Chem.DeleteSubstructs(
                Chem.MolFromSmiles(substructure), Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)
Exemplo n.º 2
0
def get_ddi_mask(atc42SMLES, med_voc):

    # ATC3_List[22] = {0}
    # ATC3_List[25] = {0}
    # ATC3_List[27] = {0}
    fraction = []
    for k, v in med_voc.idx2word.items():
        tempF = set()
        for SMILES in atc42SMLES[v]:
            try:
                m = BRICS.BRICSDecompose(Chem.MolFromSmiles(SMILES))
                for frac in m:
                    tempF.add(frac)
            except:
                pass
        fraction.append(tempF)
    fracSet = []
    for i in fraction:
        fracSet += i
    fracSet = list(set(fracSet))  # set of all segments

    ddi_matrix = np.zeros((len(med_voc.idx2word), len(fracSet)))
    for i, fracList in enumerate(fraction):
        for frac in fracList:
            ddi_matrix[i, fracSet.index(frac)] = 1
    return ddi_matrix
Exemplo n.º 3
0
def pair_frags(fname, out, method='Recap', is_mf=True):
    smiles = pd.read_table(fname).Smiles.dropna()
    pairs = []
    for i, smile in enumerate(tqdm(smiles)):
        smile = utils.clean_mol(smile)
        mol = Chem.MolFromSmiles(smile)
        if method == 'recap':
            frags = np.array(sorted(Recap.RecapDecompose(mol).GetLeaves().keys()))
        else:
            frags = BRICS.BRICSDecompose(mol)
            frags = np.array(sorted({re.sub(r'\[\d+\*\]', '*', f) for f in frags}))
        if len(frags) == 1: continue
        du, hy = Chem.MolFromSmiles('*'), Chem.MolFromSmiles('[H]')
        subs = np.array([Chem.MolFromSmiles(f) for f in frags])
        subs = np.array([Chem.RemoveHs(Chem.ReplaceSubstructs(f, du, hy, replaceAll=True)[0]) for f in subs])
        subs = np.array([m for m in subs if m.GetNumAtoms() > 1])
        match = np.array([[m.HasSubstructMatch(f) for f in subs] for m in subs])
        frags = subs[match.sum(axis=0) == 1]
        frags = sorted(frags, key=lambda x:-x.GetNumAtoms())[:voc.n_frags]
        frags = [Chem.MolToSmiles(Chem.RemoveHs(f)) for f in frags]

        max_comb = len(frags) if is_mf else 1
        for ix in range(1, max_comb+1):
            combs = combinations(frags, ix)
            for comb in combs:
                input = '.'.join(comb)
                if len(input) > len(smile): continue
                if mol.HasSubstructMatch(Chem.MolFromSmarts(input)):
                    pairs.append([input, smile])
    df = pd.DataFrame(pairs, columns=['Frags', 'Smiles'])
    df.to_csv(out, sep='\t',  index=False)
Exemplo n.º 4
0
def brics(
    mol: Chem.Mol,
    singlepass: bool = True,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Run BRICS on the molecules and potentially fix dummy atoms.

    Args:
        mol: a molecule.
        singlepass: Single pass for `BRICSDecompose`.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = BRICS.BRICSDecompose(mol, returnMols=True, singlePass=singlepass)
    frags = list(frags)

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]
    if remove_parent:
        frags.pop(0)

    frags = [x for x in frags if x is not None]

    return frags
def fragmentate_chemicals(SMILES_list, return_only_fragments=True):
    """
    fragmentate chemicals by BRICs algorithm

    Parameters
    ---------------------
    SMILES_list: list of string
       list of smiles
    return_only_fragments: bool
       if true, return only fragment parts

    Returns
    ----------------------
    fragmentated_smiles: list of string
        list of fragmentated chemicals
    """

    mols = [Chem.MolFromSmiles(SMILES) for SMILES in SMILES_list]
    fragmentated_smiles = [BRICS.BRICSDecompose(mol) for mol in tqdm(mols)]

    # nested list to normal list
    fragmentated_smiles = (list(
        itertools.chain.from_iterable(fragmentated_smiles)))
    fragmentated_smiles = list(set(fragmentated_smiles))

    if return_only_fragments:
        fragmentated_smiles = [
            i for i in fragmentated_smiles if i.find("*") > 0
        ]

    return fragmentated_smiles
Exemplo n.º 6
0
def fragmenter(thefile):
    os.remove('output.txt')
    id = []

    for line in open(thefile):

        line = line.strip()

        id.append(line)

    df = pd.DataFrame()

    df = id

    count = 0

    mylist = []

    for y in df:

        base = Chem.MolFromSmiles(df[count])

        catalog = BRICS.BRICSDecompose(base)

        mcat = [Chem.MolFromSmiles(x) for x in catalog]

        ms = BRICS.BRICSBuild(mcat)

        for m in ms:

            a = Chem.MolToSmiles(m)

            mylist.append(a)

        count = count + 1

    df2 = pd.DataFrame({'smiles': mylist})
    f3 = open('output.txt', 'w+')
    for j in mylist:
        print(j, file=f3)

    f3.close()
    return mylist
Exemplo n.º 7
0
Arquivo: ChemX.py Projeto: zinph/ChemX
    def fragment_database(self):
        fName = 'C:/RDKit_2017_03_2/Data/FunctionalGroups.txt'
        fparams = FragmentCatalog.FragCatParams(1, 6, fName)
        self.fcat = FragmentCatalog.FragCatalog(fparams)

        ##        macrocycle_file = 'macrocycles_IDs.csv'
        ##        suppl = [i.split(',')[0] for i in open(self.directory+name,'r').read().splitlines()][1:]       # read all the macrocycle smiles from file
        ##        ms = [Chem.MolFromSmiles(i) for i in suppl]     # mols of macrocycles

        zinc_file = 'data/smiles_database.csv'
        zinc_suppl = [
            i.split(',')[1]
            for i in open(self.directory + zinc_file, 'r').read().splitlines()
        ][1:]
        zinc_ms = [Chem.MolFromSmiles(i) for i in zinc_suppl]

        pre_synthetic_frag_database = [
            BRICS.BRICSDecompose(i) for i in zinc_ms
        ]
        self.synthetic_frag_database = list(
            set(chain.from_iterable(pre_synthetic_frag_database)))
Exemplo n.º 8
0
    def get_scaffolds(self,
                      scaffolding_method=ScaffoldingMethod.MurckoScaffold):
        """Compute deemed scaffolds for a given compound.

        Args:
            scaffolding_method (ScaffoldingMethod, optional):
                Defaults to MurckoScaffold. Scaffolding method to use

        Returns:
            list[rdkit.Chem.rdchem.Mol]: Scaffolds found in the component.
        """
        try:
            scaffolds = []

            if scaffolding_method == ScaffoldingMethod.MurckoScaffold:
                scaffolds = [(MurckoScaffold.GetScaffoldForMol(self.mol_no_h))]

            elif scaffolding_method == ScaffoldingMethod.MurckoGeneric:
                scaffolds = [
                    (MurckoScaffold.MakeScaffoldGeneric(self.mol_no_h))
                ]

            elif scaffolding_method == ScaffoldingMethod.Brics:
                scaffolds = BRICS.BRICSDecompose(self.mol_no_h)
                brics_smiles = [
                    re.sub(r"(\[[0-9]*\*\])", "[H]", i) for i in scaffolds
                ]  # replace dummy atoms with H's to get matches https://sourceforge.net/p/rdkit/mailman/message/35261974/
                brics_mols = [
                    rdkit.Chem.MolFromSmiles(x) for x in brics_smiles
                ]

                for mol in brics_mols:
                    rdkit.Chem.RemoveHs(mol)

                brics_hits = [
                    self.mol_no_h.GetSubstructMatches(i) for i in brics_mols
                ]

                for index, brics_hit in enumerate(brics_hits):
                    smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
                    name = scaffolding_method.name
                    source = 'RDKit scaffolds'
                    key = f'{name}_{smiles}'
                    brics_hit = conversions.listit(brics_hit)

                    if not smiles:
                        continue

                    if key not in self._scaffolds:
                        self._scaffolds[key] = SubstructureMapping(
                            name, smiles, source, brics_hit)

                return brics_mols

            for s in scaffolds:
                scaffold_atom_names = [
                    atom.GetProp('name') for atom in s.GetAtoms()
                ]
                mapping = []
                for at_name in scaffold_atom_names:
                    idx = [
                        atom.GetIdx() for atom in self.mol.GetAtoms()
                        if atom.GetProp('name') == at_name
                    ][0]
                    mapping.append(idx)

                smiles = rdkit.Chem.MolToSmiles(s)
                name = scaffolding_method.name
                source = 'RDKit scaffolds'

                if not smiles:
                    continue

                if name in self._scaffolds:
                    self._scaffolds[name].mappings.append(mapping)
                else:
                    self._scaffolds[name] = SubstructureMapping(
                        name, smiles, source, [mapping])

            return scaffolds

        except (RuntimeError, ValueError):
            raise CCDUtilsError(
                f'Computing scaffolds using method {scaffolding_method.name} failed.'
            )
Exemplo n.º 9
0
print 'Argument List:', str(sys.argv)

if len(sys.argv ) == 3 :
    inp_sdf_file = sys.argv[1]
    out_sdf_file = sys.argv[2]
else:
   sys.exit ("Usage: fragmenter.py infile outfile")
   
   
try:
    suppl = Chem.SDMolSupplier(inp_sdf_file)
    catalog=set()
    for mol in suppl:
        if mol is None: continue
        print mol.GetNumAtoms() 
        #AllChem.Compute2DCoords(mol)
        pieces = BRICS.BRICSDecompose(mol)                   
        catalog.update(pieces)

    print('Generated: ', len(catalog), ' fragments.')
    ofile = Chem.SDWriter(out_sdf_file)    
    for frg in catalog: 
        cmol = Chem.MolFromSmiles(frg)
        AllChem.Compute2DCoords(cmol)
        ofile.write(cmol)        

            
except IOError:
   print >> sys.stderr, "Input file could not be opened"
   sys.exit(1)    
Exemplo n.º 10
0
fgData = """AcidChloride	C(=O)Cl	Acid Chloride
CarboxylicAcid	C(=O)[O;H,-]	Carboxylic acid
SulfonylChloride	[$(S-!@[#6])](=O)(=O)(Cl)	Sulfonyl Chloride
Amine				[N;!H0;$(N-[#6]);!$(N-[!#6]);!$(N-C=[O,N,S])]	Amine
BoronicAcid			[$(B-!@[#6])](O)(O)		Boronic Acid
Isocyanate			[$(N-!@[#6])](=!@C=!@O)	Isocyanate
Alcohol				[O;H1;$(O-!@[#6;!$(C=!@[O,N,S])])]	Alcohol
Aldehyde			[CH;D2;!$(C-[!#6])]=O	Aldehyde
Halogen				[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]	Halogen"""
fglines = [re.split(r'\t+', x.strip()) for x in fgData.split('\n')]
hLabels = [x[0] for x in fglines]
patts = [Chem.MolFromSmarts(x[1]) for x in fglines]

labels = inLs[0].strip().split(delim) + hLabels + ['HasBRICSBond?']
print(delim.join(labels))
for line in inLs[1:]:
  splitL = line.strip().split(delim)
  mol = Chem.MolFromSmiles(splitL[1])
  for fg in patts:
    if mol.HasSubstructMatch(fg):
      splitL.append('True')
    else:
      splitL.append('False')

  bricsRes = BRICS.BRICSDecompose(mol)
  if len(bricsRes) > 1:
    splitL.append('True')
  else:
    splitL.append('False')
  print(delim.join(splitL))
# ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1']
# # 3.2 BRICS方法
# RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。

# 对下图中的分子进行BRICS分解
smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(
    m,
    "/drug_development/studyRdkit/st_rdcit/img/mol34.jpg",
    size=(600, 400),
    legend=
    'zanubrutinib(C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N)'
)

frags = (BRICS.BRICSDecompose(m))
print(frags)

mols = []
for fsmi in frags:
    mols.append(Chem.MolFromSmiles(fsmi))

img = Draw.MolsToGridImage(mols,
                           molsPerRow=3,
                           subImgSize=(200, 200),
                           legends=['' for x in mols])
img.save('/drug_development/studyRdkit/st_rdcit/img/mol35.jpg')
# 四、组合分子片段--BRICS方法
# 以上述片段进行BRICS组合产生分子
newms = BRICS.BRICSBuild(mols)
newms = list(newms)
Exemplo n.º 12
0
# In[ ]:




# In[8]:

from rdkit.Chem import AllChem
from rdkit.Chem import BRICS
from rdkit.Chem import rdMolDescriptors


# In[21]:

smiles = Chem.MolToSmiles(mol)
frag = BRICS.BRICSDecompose(mol)
print(smiles)


# In[20]:

print(len(frag))


# In[11]:

print(frag)


# In[12]:
Exemplo n.º 13
0
Arquivo: ChemX.py Projeto: zinph/ChemX
 def fragment_target(self):
     self.target_fragments = list(
         BRICS.BRICSDecompose(Chem.MolFromSmiles(self.target)))
Exemplo n.º 14
0
    multiprocessing.Pool.imapに渡す関数。
    Molオブジェクトの更新をしたあとでSMILESに変換。
    """
    mol.UpdatePropertyCache(strict=True)
    return Chem.MolToSmiles(mol)


if __name__ == '__main__':
    df = pd.read_csv('./data/delaney-solubility/delaney-processed.csv',
                     index_col=0)
    TARGET = ['measured log solubility in mols per litre']
    df['mol'] = df['smiles'].apply(Chem.MolFromSmiles)

    fragments = set()
    for ix, mol in df[['mol']].iterrows():
        f = BRICS.BRICSDecompose(mol[0], returnMols=True)
        fragments.update(list(f))
    else:
        print(len(fragments))

    NUM_ITER = 100000
    from random import seed
    #--- starts parallel BRICS
    start = time()
    seed(20200315)
    builder = BRICS.BRICSBuild(fragments)
    with open('./results/mol_single.smi', 'w') as f:
        for i in range(NUM_ITER):
            m = next(builder)
            m.UpdatePropertyCache(strict=True)
            smi = Chem.MolToSmiles(m)
Exemplo n.º 15
0
def make_virtual_lib(method_name):
    theme_name = t_theme_name.get()
    df_brics = pd.read_csv(t_csv_filepath.get())
    df_brics['mols'] = df_brics[t_smiles.get()].map(apply_molfromsmiles)
    df_brics = df_brics.dropna()

    allfrags = set()
    #Applying the for-loop to pandas df is not good.
    for mol in df_brics['mols']:
        frag = BRICS.BRICSDecompose(mol)
        allfrags.update(frag)

    print('the number of allfrags', len(allfrags))

    allcomponents = [apply_molfromsmiles(f) for f in allfrags]
    Nonecomponents = [f for f in allcomponents if f == None or f == ""]
    print('len(Nonecomponents)', len(Nonecomponents))
    allcomponents = [f for f in allcomponents if f != ""]
    allcomponents = [f for f in allcomponents if f != None]

    for f in allfrags:
        #print('f: ', f)
        #print('Mol: ',Chem.MolFromSmiles(f))
        pass

    builder = BRICS.BRICSBuild(allcomponents)
    print(builder)

    virtual_mols = []
    successful_cnt = 0
    error_cnt = 0

    for i in range(virtual_libraly_num):
        try:
            m = next(builder)
            m.UpdatePropertyCache(strict=True)
            virtual_mols.append(m)
            successful_cnt += 1

        except StopIteration:
            #print(i, '- stopiteration of next(builder)')
            error_cnt += 1
            pass
        except:
            print('error')
            error_cnt += 1
            pass
    print('The total number : ', virtual_libraly_num)
    print('The number of error : ', error_cnt)
    print('The ratio of error : ', error_cnt / virtual_libraly_num)

    for i, mol in enumerate(virtual_mols):
        Draw.MolToFile(
            mol,
            str(parent_path / 'results' / theme_name / method_name / high_low /
                'brics_virtual' / 'molecular-structure' /
                ('tmp-' + str(i).zfill(6) + '.png')))

    virtual_list = []
    for i, mol in enumerate(virtual_mols):
        virtual_list.append([i, Chem.MolToSmiles(mol), 0])

    #print(virtual_list)
    df_virtual = pd.DataFrame(
        virtual_list, columns=[t_id.get(),
                               t_smiles.get(),
                               t_task.get()])

    #print(df_virtual)
    csv_path = parent_path / 'results' / theme_name / method_name / high_low / 'brics_virtual' / 'virtual.csv'
    df_virtual.to_csv(csv_path)
    return csv_path
Exemplo n.º 16
0
        if (len(Chem.MolToSmiles(rm3).split(".")) > 1):
            count1 = count1 + 1
            [functional_list.append(i) for i in Chem.MolToSmiles(rm3).split(".")]
        else: 
            if(len(Chem.MolToSmiles(rm2).split(".")) > 1):
                count2 = count2 + 1
                [functional_list.append(i) for i in Chem.MolToSmiles(rm2).split(".")]

            else:
                if(len(Chem.MolToSmiles(rm).split(".")) > 1):
                    count3 = count3 + 1
                    [functional_list.append(i) for i in Chem.MolToSmiles(rm).split(".")]
                    
                else:
                    pieces_smi = Chem.BRICS.BRICSDecompose(temp)
                    pieces = [Chem.MolFromSmiles(x) for x in BRICS.BRICSDecompose(temp)]
                    count_fail_no_match += 1
                    print(can_smi)

                    
    except:
        count_fail += 1
print(list(set(functional_list))) #retrieve only the found functional groups
print(len(list(set(functional_list))))
print(count1, count2, count3)
print("total processed: "+ str(count1+count2+count3))
print("no substructured: "+ str(count_fail_no_match))
print("fail processed: "+ str(count_fail))


Exemplo n.º 17
0
number_of_free_bonds = 0  # The number of free bond(s) restricted. If 0, all fragments are saved

# load molecules
molecules = [
    molecule for molecule in Chem.SmilesMolSupplier(
        'logS_molecules_1290.smi', delimiter='\t', titleLine=False)
    if molecule is not None
]
# molecules = [molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf') if molecule is not None]
print('number of molecules :', len(molecules))

# generate fragments
fragments = set()
for molecule in molecules:
    fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2)
    fragments.update(fragment)

# select and arange fragments
new_fragments = []
number_of_generated_structures = 0
for fragment in fragments:
    free_bond = []
    free_bond = [index for index, atom in enumerate(fragment) if atom == '*']
    flag = False
    if number_of_free_bonds == 0:
        if len(free_bond):
            flag = True
    else:
        if len(free_bond) == number_of_free_bonds:
            flag = True
Exemplo n.º 18
0
def update_atom_position(mol1, mol2):
    mol_copy = Chem.Mol(mol2)

    # This is a work-around to get a seedSmarts for the FMCS algorithm
    # and prevent the occassional hanging of FMCS
    # Might be unnecessary with future versions of rdkit
    core_frags = BRICS.BRICSDecompose(Chem.RemoveHs(mol1))
    frag_smarts = []
    for frag in enumerate(core_frags):
        smi_str = (re.sub('[[1-9][0-9]{0,2}\*]', '[*]', frag[1]))
        frag_smarts.append(
            Chem.MolToSmarts(Chem.MolFromSmiles(smi_str)).replace(
                ':', '~').replace('-', '~').replace('=',
                                                    '~').replace('#0', '*'))

    seed = None
    for query in frag_smarts:
        if mol_copy.HasSubstructMatch(Chem.MolFromSmarts(query)):
            seed = query
            break

    # Now get MCSS
    res = rdFMCS.FindMCS([mol1, mol_copy], seedSmarts=seed)
    mcs_q = Chem.MolFromSmarts(res.smartsString)

    # Get atom IDs
    template = mol1.GetSubstructMatches(mcs_q)[0]
    hit_atom = mol_copy.GetSubstructMatches(mcs_q)[0]

    # Update XYZ coords of MCSS
    running_distance = 0
    for i in range(0, len(template)):
        origin = mol1.GetConformer().GetAtomPosition(template[i])
        pos = mol_copy.GetConformer().GetAtomPosition(hit_atom[i])

        p1 = np.array([origin.x, origin.y, origin.z])
        p2 = np.array([pos.x, pos.y, pos.z])

        sq_dist = np.sum((p1 - p2)**2, axis=0)
        dist = np.sqrt(sq_dist)

        running_distance += dist

        mol_copy.GetConformer().SetAtomPosition(hit_atom[i],
                                                (origin.x, origin.y, origin.z))

    if running_distance > 0.1:
        # relax atoms outside MCSS
        res_atom = []
        for atom in mol_copy.GetAtoms():
            if atom.GetIdx() not in hit_atom:
                res_atom.append(atom.GetIdx())

        # do minimization
        mp = ChemicalForceFields.MMFFGetMoleculeProperties(mol_copy)
        ff = ChemicalForceFields.MMFFGetMoleculeForceField(mol_copy, mp)

        for val in hit_atom:
            ff.AddFixedPoint(val)
        for val in res_atom:
            ff.MMFFAddPositionConstraint(val, 1, 5)

        ff.Minimize()

    return mol_copy