Exemplo n.º 1
0
def GetScaffold(mol, generic_framework=False):
    """
    #################################################################
    Calculate Scaffold

    Usage:

        result = GetScaffold(mol)

        Input: mol is a molecule object.

        generic_framework is boolean value. If the generic_framework is True, the

        result returns a generic framework.

        Output: result is a string form of the molecule's scaffold.
    #################################################################
    """
    core = MurckoScaffold.GetScaffoldForMol(mol)
    if generic_framework == True:
        fw = MurckoScaffold.MakeScaffoldGeneric(core)
        mol_generic_framework = Chem.MolToSmiles(fw)
        return mol_generic_framework
    else:
        mol_scafflod = Chem.MolToSmiles(core)
        return mol_scafflod
Exemplo n.º 2
0
 def __call__(self, smiles, radius=3, bit_len=4096, scaffold=0):
     fps = np.zeros((len(smiles), bit_len))
     for i, smile in enumerate(smiles):
         mol = Chem.MolFromSmiles(smile)
         arr = np.zeros((1, ))
         try:
             if scaffold == 1:
                 mol = MurckoScaffold.GetScaffoldForMol(mol)
             elif scaffold == 2:
                 mol = MurckoScaffold.MakeScaffoldGeneric(mol)
             if not mol:
                 raise Exception(
                     f'Failed to calculate Morgan fingerprint (creating RDKit instance from smiles failed: {smile})'
                 )
             fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                        radius,
                                                        nBits=bit_len)
             DataStructs.ConvertToNumpyArray(fp, arr)
             fps[i, :] = arr
         except Exception as exp:
             # TODO: use a more specific exception related to descriptor errors
             # traceback.print_exc()
             self.builder.errors.append(exp)
             fps[i, :] = [0] * bit_len
     return pd.DataFrame(fps)
Exemplo n.º 3
0
def extract_murcko_scaffolds(mols, verbose=True):
    """ Extract Bemis-Murcko scaffolds from a smile string.

    :param mols: molecule data set in rdkit mol format.
    :return: smiles string of a scaffold and a framework.
    """
    scaf = []
    scaf_unique = []
    generic_scaf = []
    generic_scaf_unique = []
    start = time.time()
    for mol in mols:
        if mol is None:
            continue
        try:
            core = MurckoScaffold.GetScaffoldForMol(mol)
            fw = MurckoScaffold.MakeScaffoldGeneric(core)
            scaf.append(Chem.MolToSmiles(core, isomericSmiles=True))
            generic_scaf.append(Chem.MolToSmiles(fw, isomericSmiles=True))
        except ValueError as e:
            print(e)
            scaf.append(['error'])
            generic_scaf.append(['error'])
    if verbose:
        print('Extracted', len(scaf), 'scaffolds in',
              time.time() - start, 'seconds.')
    return scaf, generic_scaf
Exemplo n.º 4
0
def get_scaffolds(compounds):
    for i, c in enumerate(compounds):
        mol = Chem.MolFromSmiles(c["canonical_smiles"])
        core = MurckoScaffold.GetScaffoldForMol(mol)
        compounds[i]["scaffold"] = Chem.MolToSmiles(core)
        compounds[i]["generic_scaffold"] = Chem.MolToSmiles(
            MurckoScaffold.MakeScaffoldGeneric(core))
    return compounds
Exemplo n.º 5
0
 def getGenericScaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         scaffold = MurckoScaffold.MakeScaffoldGeneric(
             MurckoScaffold.GetScaffoldForMol(mol))
         return Chem.MolToSmiles(scaffold, isomericSmiles=False)
     else:
         return ''
Exemplo n.º 6
0
def GetMurckoScaffold(mol):
    #mol: rdkit RWMol or Mol
    from rdkit.Chem.Scaffolds import MurckoScaffold

    scaffold = MurckoScaffold.MakeScaffoldGeneric(mol)

    #return scaffold rdkit.mol object
    return scaffold
Exemplo n.º 7
0
def AddMurckoToFrame(frame, molCol = 'ROMol', MurckoCol = 'Murcko_SMILES', Generic = False):
  '''
  Adds column with SMILES of Murcko scaffolds to pandas DataFrame. Generic set to true results in SMILES of generic framework.
  '''
  if Generic:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(x[molCol]))), axis=1)
  else:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])), axis=1)
Exemplo n.º 8
0
def _getscaffold(mol, stype='Murcko'):
    """
    *Internal used only*
    
    """
    assert stype in [
        'Murcko', 'Carbon'
    ], 'scaffold type must be a member of "Murcko" or "Carbon"'
    core = MurckoScaffold.GetScaffoldForMol(mol)
    core = core if stype == 'Murcko' else MurckoScaffold.MakeScaffoldGeneric(
        core)
    return Chem.MolToSmiles(core, isomericSmiles=False, canonical=True)
 def _calculate_scaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         try:
             scaffold = MurckoScaffold.MakeScaffoldGeneric(
                 MurckoScaffold.GetScaffoldForMol(mol))
             scaffold_smiles = Chem.MolToSmiles(scaffold,
                                                isomericSmiles=False)
         except ValueError:
             scaffold_smiles = ''
     else:
         scaffold_smiles = ''
     return scaffold_smiles
Exemplo n.º 10
0
def computeFramwork(df):
    murckos = []
    carbons = []
    for smi in df['can']:
        mol = Chem.MolFromSmiles(smi)
        core = MurckoScaffold.GetScaffoldForMol(mol)
        carb = MurckoScaffold.MakeScaffoldGeneric(core)
        #将Murcko骨架和C骨架转成smile
        mur = Chem.MolToSmiles(core)
        carb = Chem.MolToSmiles(carb)
        murckos.append(mur)
        carbons.append(carb)
    df['murckos'] = murckos
    df['carbons'] = carbons
    return df
Exemplo n.º 11
0
    def __init__(self, smi):
        self._smi = smi
        self._mol = Chem.MolFromSmiles(smi)
        self._scaf = MurckoScaffold.GetScaffoldForMol(
            MurckoScaffold.MakeScaffoldGeneric(self._mol))
        self._scaf_atoms = self._scaf.GetAtoms()
        self._scaf_bonds = self._scaf.GetBonds()
        self._scaf_smi = Chem.MolToSmiles(self._scaf)
        self._ring_system = self.GetRingSystemsofscaf()
        self._ring_system_count = self.count_ring_systems()
        self._bin_values = [1, 2, 3, 4, 7]

        # Linkers: [direct bond between rings, linear chain between rings, branched chain between rings]
        self._linkers = [0, 0, 0]
        self._chain_binning = [0, 0, 0, 0, 0]
Exemplo n.º 12
0
def get_murcko_scaffold(mol, generic=False):
    """Get the murcko scaffold for an input molecule

    Parameters
    ----------
    mol (Chem.Mol): an rdkit molecule
    generic (bool): if True return a generic scaffold (CSK)

    Returns
    -------
    murcko (Chem.Mol): an rdkit molecule (scaffold)
    """
    murcko = MurckoScaffold.GetScaffoldForMol(mol)
    if generic:
        murcko = MurckoScaffold.MakeScaffoldGeneric(murcko)
    return murcko
def SmilesToFrameInChIKey(SMILES):
    """
    """
    mol = GetMol(SMILES)
    
    if mol:
        frame = MurckoScaffold.GetScaffoldForMol(mol)
        try:
            frame = MurckoScaffold.MakeScaffoldGeneric(frame)
        except:
            pass
    else:
        frame = None
    
    frame = Chem.MolToInchiKey(frame) if frame else None
    return frame
Exemplo n.º 14
0
 def ECFP_from_SMILES(cls, smiles, radius=3, bit_len=4096, scaffold=0, index=None):
     fps = np.zeros((len(smiles), bit_len))
     for i, smile in enumerate(smiles):
         mol = Chem.MolFromSmiles(smile)
         arr = np.zeros((1,))
         try:
             if scaffold == 1:
                 mol = MurckoScaffold.GetScaffoldForMol(mol)
             elif scaffold == 2:
                 mol = MurckoScaffold.MakeScaffoldGeneric(mol)
             fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=bit_len)
             DataStructs.ConvertToNumpyArray(fp, arr)
             fps[i, :] = arr
         except:
             print(smile)
             fps[i, :] = [0] * bit_len
     return pd.DataFrame(fps, index=(smiles if index is None else index))
Exemplo n.º 15
0
def scaffold2smiles(mol, generic=True, return_smiles = True):
    ''' Returns a SMILES string representing the Murcko Scaffold of a given molecule'''
    if generic:
        # Makes a Murcko scaffold generic (all atoms -> carbon and all bonds -> single)
        scff = MurckoScaffold.MakeScaffoldGeneric(mol)
        scff = MurckoScaffold.GetScaffoldForMol(scff)
        scff_smiles = MolToSmiles(scff)
    else:
        # Return a smiles scaffold 
        try:
            scff = MurckoScaffold.GetScaffoldForMol(mol)
            scff_smiles =  MolToSmiles(scff)
        except:
            scff_smiles = '' 
            scff = np.nan
    if return_smiles:
        return scff_smiles
    else:
        return scff
Exemplo n.º 16
0
def get_murcko_scaffold(mol, generic=False):
    """Get the murcko scaffold for an input molecule.

    Parameters
    ----------
    mol : rdkit.rdchem.Chem.Mol
    generic : bool
        If True return a generic scaffold (CSK)

    Returns
    -------
    murcko : rdkit.Chem.rdchem.Mol
        Murcko scaffold.

    """
    murcko = MurckoScaffold.GetScaffoldForMol(mol)
    if generic:
        murcko = MurckoScaffold.MakeScaffoldGeneric(murcko)
    return murcko
Exemplo n.º 17
0
def CalculateMaxSizeSystemRing(mol):
    """
    Number of atoms involved in the biggest system ring  
    ---> maxring
    
    :param mol: molecular
    :type mol: rdkit.Chem.rdchem.Mol
    :return: number of atoms involved in the biggest system ring
    :rtype: int
    
    """
    #0.Get the scaffold
    core = MurckoScaffold.GetScaffoldForMol(mol)
    fw = MurckoScaffold.MakeScaffoldGeneric(core)
    #1.Obtaining which atoms consist of rings
    MaxRing = 0
    ri = fw.GetRingInfo()
    atoms = list(ri.AtomRings())
    length = len(atoms)
    if length == 0:
        pass
    else:
        rw = Chem.RWMol(fw)
        #2.Judge which atoms are replacement
        atoms = [set(x) for x in atoms]
        for pair in combinations(range(length), 2):
            replace = list(atoms[pair[0]] & atoms[pair[1]])
            if len(replace) >= 2:
                for repl in list(combinations(replace, 2)):
                    rw.RemoveBond(*repl)
            else:
                pass
        m = Chem.MolFromSmiles(Chem.MolToSmiles(rw))
        ri = m.GetRingInfo()
        bonds = ri.BondRings()
        for item in bonds:
            if len(item) > MaxRing:
                MaxRing = len(item)
    return MaxRing
Exemplo n.º 18
0
    def extract_descriptors(self, individual):
        """
        Returning the descriptor(s) extracted from the given individual
        :param individual:
        :return:
        """

        if self.descriptor_key == "gen_scaffolds":
            return [
                MolToSmiles(
                    MurckoScaffold.MakeScaffoldGeneric(
                        MolFromSmiles(individual.to_smiles())))
            ]
        elif self.descriptor_key == "ifg":
            curr_ifgs = ifg.identify_functional_groups(
                MolFromSmiles(individual.to_smiles()))
            return list(set([curr_ifg[2] for curr_ifg in curr_ifgs]))
        elif self.descriptor_key == "atoms":
            return list(set(individual.get_atom_types()))
        elif self.descriptor_key == "shg_1":
            return list(extract_shingles(individual, 1))
        elif self.descriptor_key == "checkmol":
            return list(set(extract_checkmol(individual)))
Exemplo n.º 19
0
 def func(x):
     return Chem.MolToSmiles(
         MurckoScaffold.MakeScaffoldGeneric(
             MurckoScaffold.GetScaffoldForMol(x[molCol])))
    del df_mesfp
    
    # remove the CIDs of the top x compounds...
    TPec = list(TS_ec_df.index)
    TPhts = list(TS_hts_df.index)
    TPmes = list(TS_mes_df.index)
    
    # caluculate smiles and Topological scaffold for each compound
    # analysing topological scaffolds...
    cmpd_lists = {'ecfp4':TPec, 'htsfp':TPhts, 'mesfp':TPmes}
    Generic_sets_dict = {}
    for FP_name, cmpds in cmpd_lists.items():
        gen_scaf_set = set()
        for cid in cmpds:
            if str(cid) in cid2smi:                      
                g_scaf = Chem.MolToSmiles(ms.MakeScaffoldGeneric(Chem.MolFromSmiles(cid2smi[str(cid)])))
                gen_scaf_set.add(g_scaf)
            else: 
                print('NA???, thats not meant to happen....')
                continue
        
        print('{}\t   {}     /    {}'.format(FP_name,len(gen_scaf_set),numba))
        Generic_sets_dict[FP_name] = gen_scaf_set
    
    ec_scafs = Generic_sets_dict['ecfp4']
    hts_scafs = Generic_sets_dict['htsfp']
    mes_scafs = Generic_sets_dict['mesfp']

    plt.figure(i, figsize=(6,5))
    plt.title('AID:'+assay, size=15)
Exemplo n.º 21
0
def fuzzy_scaffolding(
    mols: List[Chem.rdchem.Mol],
    enforce_subs: List[str] = None,
    n_atom_cuttoff: int = 8,
    additional_templates: List[Chem.rdchem.Mol] = None,
    ignore_non_ring: bool = False,
    mcs_params: Dict[Any, Any] = None,
):
    """Generate fuzzy scaffold with enforceable group that needs to appear
    in the core, forcing to keep the full side chain if required.

    NOTE(hadim): consider parallelize this (if possible).

    Args:
        mols: List of all molecules
        enforce_subs: List of substructure to enforce on the scaffold.
        n_atom_cuttoff: Minimum number of atom a core should have.
        additional_templates: Additional template to use to generate scaffolds.
        ignore_non_ring: Whether to ignore atom no in murcko ring system, even if they are in the framework.
        mcs_params: Arguments of MCS algorithm.

    Returns:
        scaffolds: set
            All found scaffolds in the molecules as valid smiles
        scaffold_infos: dict of dict
            Infos on the scaffold mapping, ignoring any side chain that had to be enforced.
            Key corresponds to generic scaffold smiles
            Values at ['smarts'] corresponds to smarts representation of the true scaffold (from MCS)
            Values at ['mols'] corresponds to list of molecules matching the scaffold
        scaffold_to_group: dict of list
            Map between each generic scaffold and the R-groups decomposition row
    """

    if enforce_subs is None:
        enforce_subs = []

    if additional_templates is None:
        additional_templates = []

    if mcs_params is None:
        mcs_params = {}

    rg_params = rdRGroupDecomposition.RGroupDecompositionParameters()
    rg_params.removeAllHydrogenRGroups = True
    rg_params.removeHydrogensPostMatch = True
    rg_params.alignment = rdRGroupDecomposition.RGroupCoreAlignment.MCS
    rg_params.matchingStrategy = rdRGroupDecomposition.RGroupMatching.Exhaustive
    rg_params.rgroupLabelling = rdRGroupDecomposition.RGroupLabelling.AtomMap
    rg_params.labels = rdRGroupDecomposition.RGroupLabels.AtomIndexLabels

    core_query_param = AdjustQueryParameters()
    core_query_param.makeDummiesQueries = True
    core_query_param.adjustDegree = False
    core_query_param.makeBondsGeneric = True

    # group molecules by they generic Murcko scaffold, allowing
    # side chain that contains cycle (might be a bad idea)
    scf2infos = collections.defaultdict(dict)
    scf2groups = {}
    all_scaffolds = set([])

    for m in mols:
        generic_m = MurckoScaffold.MakeScaffoldGeneric(m)
        scf = MurckoScaffold.GetScaffoldForMol(m)
        try:
            scf = MurckoScaffold.MakeScaffoldGeneric(scf)
        except:
            pass

        if ignore_non_ring:
            rw_scf = Chem.RWMol(scf)
            atms = [a.GetIdx() for a in rw_scf.GetAtoms() if not a.IsInRing()]
            atms.sort(reverse=True)
            for a in atms:
                rw_scf.RemoveAtom(a)
            scfs = list(rdmolops.GetMolFrags(rw_scf, asMols=False))
        else:
            scfs = [dm.to_smiles(scf)]

        # add templates mols if exists:
        for tmp in additional_templates:
            tmp = dm.to_mol(tmp)
            tmp_scf = MurckoScaffold.MakeScaffoldGeneric(tmp)
            if generic_m.HasSubstructMatch(tmp_scf):
                scfs.append(dm.to_smiles(tmp_scf))

        for scf in scfs:
            if scf2infos[scf].get("mols"):
                scf2infos[scf]["mols"].append(m)
            else:
                scf2infos[scf]["mols"] = [m]

    for scf in scf2infos:
        # cheat by adding murcko as last mol always
        popout = False
        mols = scf2infos[scf]["mols"]
        if len(mols) < 2:
            mols = mols + [MurckoScaffold.GetScaffoldForMol(mols[0])]
            popout = True

        # compute the MCS of the cluster
        mcs = rdFMCS.FindMCS(
            mols,
            atomCompare=rdFMCS.AtomCompare.CompareAny,
            bondCompare=rdFMCS.BondCompare.CompareAny,
            completeRingsOnly=True,
            **mcs_params,
        )

        mcsM = Chem.MolFromSmarts(mcs.smartsString)
        mcsM.UpdatePropertyCache(False)
        Chem.SetHybridization(mcsM)

        if mcsM.GetNumAtoms() < n_atom_cuttoff:
            continue

        scf2infos[scf]["smarts"] = dm.to_smarts(mcsM)
        if popout:
            mols = mols[:-1]

        core_groups = []
        # generate rgroups based on the mcs core
        success_mols = []
        try:
            rg = rdRGroupDecomposition.RGroupDecomposition(mcsM, rg_params)
            for i, analog in enumerate(mols):
                analog.RemoveAllConformers()
                res = rg.Add(analog)
                if not (res < 0):
                    success_mols.append(i)
            rg.Process()
            core_groups = rg.GetRGroupsAsRows()
        except Exception:
            pass

        mols = [mols[i] for i in success_mols]
        scf2groups[scf] = core_groups
        for mol, gp in zip(mols, core_groups):
            core = gp["Core"]
            acceptable_groups = [
                a.GetAtomMapNum() for a in core.GetAtoms()
                if (a.GetAtomMapNum() and not a.IsInRing())
            ]

            rgroups = [
                gp[f"R{k}"] for k in acceptable_groups if f"R{k}" in gp.keys()
            ]
            if enforce_subs:
                rgroups = [
                    rgp for rgp in rgroups if not any([
                        len(rgp.GetSubstructMatch(frag)) > 0
                        for frag in enforce_subs
                    ])
                ]
            try:
                scaff = trim_side_chain(
                    mol, AdjustQueryProperties(core, core_query_param),
                    rgroups)
            except:
                continue
            all_scaffolds.add(dm.to_smiles(scaff))

    return all_scaffolds, scf2infos, scf2groups
Exemplo n.º 22
0
    def get_scaffolds(self,
                      scaffolding_method=ScaffoldingMethod.MurckoScaffold):
        """Compute deemed scaffolds for a given compound.

        Args:
            scaffolding_method (ScaffoldingMethod, optional):
                Defaults to MurckoScaffold. Scaffolding method to use

        Returns:
            list[rdkit.Chem.rdchem.Mol]: Scaffolds found in the component.
        """
        try:
            scaffolds = []

            if scaffolding_method == ScaffoldingMethod.MurckoScaffold:
                scaffolds = [(MurckoScaffold.GetScaffoldForMol(self.mol_no_h))]

            elif scaffolding_method == ScaffoldingMethod.MurckoGeneric:
                scaffolds = [
                    (MurckoScaffold.MakeScaffoldGeneric(self.mol_no_h))
                ]

            elif scaffolding_method == ScaffoldingMethod.Brics:
                scaffolds = BRICS.BRICSDecompose(self.mol_no_h)
                brics_smiles = [
                    re.sub(r"(\[[0-9]*\*\])", "[H]", i) for i in scaffolds
                ]  # replace dummy atoms with H's to get matches https://sourceforge.net/p/rdkit/mailman/message/35261974/
                brics_mols = [
                    rdkit.Chem.MolFromSmiles(x) for x in brics_smiles
                ]

                for mol in brics_mols:
                    rdkit.Chem.RemoveHs(mol)

                brics_hits = [
                    self.mol_no_h.GetSubstructMatches(i) for i in brics_mols
                ]

                for index, brics_hit in enumerate(brics_hits):
                    smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
                    name = scaffolding_method.name
                    source = 'RDKit scaffolds'
                    key = f'{name}_{smiles}'
                    brics_hit = conversions.listit(brics_hit)

                    if not smiles:
                        continue

                    if key not in self._scaffolds:
                        self._scaffolds[key] = SubstructureMapping(
                            name, smiles, source, brics_hit)

                return brics_mols

            for s in scaffolds:
                scaffold_atom_names = [
                    atom.GetProp('name') for atom in s.GetAtoms()
                ]
                mapping = []
                for at_name in scaffold_atom_names:
                    idx = [
                        atom.GetIdx() for atom in self.mol.GetAtoms()
                        if atom.GetProp('name') == at_name
                    ][0]
                    mapping.append(idx)

                smiles = rdkit.Chem.MolToSmiles(s)
                name = scaffolding_method.name
                source = 'RDKit scaffolds'

                if not smiles:
                    continue

                if name in self._scaffolds:
                    self._scaffolds[name].mappings.append(mapping)
                else:
                    self._scaffolds[name] = SubstructureMapping(
                        name, smiles, source, [mapping])

            return scaffolds

        except (RuntimeError, ValueError):
            raise CCDUtilsError(
                f'Computing scaffolds using method {scaffolding_method.name} failed.'
            )
Exemplo n.º 23
0
def gframecheck(s):
    try:
        return Chem.MolToSmiles(ms.MakeScaffoldGeneric(Chem.MolFromSmiles(s)))
    except:
        pass
Exemplo n.º 24
0
    # remove the CIDs of the top x compounds...
    TPec = list(ecfp_r.index)
    TPhts = list(htsfp_r.index)
    TPces = list(BaSH_r.index)

    # caluculate smiles and Topological scaffold for each compound
    # analysing topological scaffolds...
    cmpd_lists = {'ecfp': TPec, 'htsfp': TPhts, 'cesfp': TPces}
    Generic_sets_dict = {}
    for FP_name, cmpds in cmpd_lists.items():
        gen_scaf_set = set()
        for cid in cmpds:
            if str(cid) in cid2smi:
                m_scaf = ms.GetScaffoldForMol(
                    Chem.MolFromSmiles(cid2smi[str(cid)]))
                g_scaf = Chem.MolToSmiles(ms.MakeScaffoldGeneric(m_scaf))
                gen_scaf_set.add(g_scaf)
            else:
                print('NA???, thats not meant to happen....')
                continue

        print('{}\t   {}     /    {}'.format(FP_name, len(gen_scaf_set),
                                             numba))
        Generic_sets_dict[FP_name] = gen_scaf_set

    ec_scafs = Generic_sets_dict['ecfp']
    hts_scafs = Generic_sets_dict['htsfp']
    ces_scafs = Generic_sets_dict['cesfp']

    #    o = ec_scafs.union(hts_scafs)
    #    l=len(o)
Exemplo n.º 25
0
def genericize_scaffold(s):
    try:
        return MurckoScaffold.MakeScaffoldGeneric(s)
    except ValueError:
        return None
Exemplo n.º 26
0
#!/usr/bin/env python3

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Scaffolds import MurckoScaffold

drugbank_input = Chem.SDMolSupplier('drugbank.sdf')
drugbank = [m for m in drugbank_input if m]

basic_structure = drugbank[222]
atomic_scaffold = MurckoScaffold.GetScaffoldForMol(basic_structure)
atomic_scaffold.Compute2DCoords()
graph_scaffold = MurckoScaffold.MakeScaffoldGeneric(atomic_scaffold)
Draw.MolsToGridImage([basic_structure, atomic_scaffold, graph_scaffold])

drugbank_atomic_scaffolds = [
    MurckoScaffold.GetScaffoldForMol(mol) for mol in drugbank
]
for i in drugbank_atomic_scaffolds:
    i.Compute2DCoords()


def genericize_scaffold(s):
    try:
        return MurckoScaffold.MakeScaffoldGeneric(s)
    except ValueError:
        return None


drugbank_grafh_scaffolds = [
    genericize_scaffold(s) for s in drugbank_atomic_scaffolds
Exemplo n.º 27
0
def GenerateMolecularFrameworks():
    """Generate Bemis Murcko molecular framworks."""
    
    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    UseChirality = OptionsInfo["UseChirality"]

    RemoveDuplicateFrameworks = OptionsInfo["RemoveDuplicateFrameworks"]
    UseGraphFrameworks = OptionsInfo["UseGraphFrameworks"]
    
    SortFrameworks = OptionsInfo["SortFrameworks"]
    if SortFrameworks:
        FrameworkMolIDs = []
        FrameworkMolIDToMolMap = {}
        FrameworkMolIDToAtomCountMap = {}
        
        DuplicateFrameworkMolIDs = []
        DuplicateFrameworkMolIDToMolMap = {}
        DuplicateFrameworkMolIDToAtomCountMap = {}
        
    DuplicatesOutfile = ""
    if RemoveDuplicateFrameworks:
        DuplicatesOutfile = OptionsInfo["DuplicatesOutfile"]

    # Setup a molecule reader...
    MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
    Mols  = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
    
    # Set up a molecular framework  writer...
    Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
    
    # Set up a duplicate molecular framework writer...    
    if RemoveDuplicateFrameworks:
        DuplicatesWriter = RDKitUtil.MoleculesWriter(DuplicatesOutfile, **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for duplicates output fie %s " % DuplicatesOutfile)
        
    if RemoveDuplicateFrameworks:
        MiscUtil.PrintInfo("Generating files: %s and %s..." % (Outfile, DuplicatesOutfile))
    else:
        MiscUtil.PrintInfo("Generating file %s..." % Outfile)

    # Process molecules...
    MolCount = 0
    ValidMolCount = 0
    
    FrameworksCount = 0
    UniqueFrameworksCount = 0
    DuplicateFrameworksCount = 0
    
    CanonicalSMILESMap = {}
    
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    
    for Mol in Mols:
        MolCount += 1
        
        if Mol is None:
            continue
        
        if RDKitUtil.IsMolEmpty(Mol):
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
            continue
        
        ValidMolCount += 1

        if UseGraphFrameworks:
            FrameworksMol = MurckoScaffold.MakeScaffoldGeneric(Mol)
        else:
            FrameworksMol = MurckoScaffold.GetScaffoldForMol(Mol)

        if Compute2DCoords:
            AllChem.Compute2DCoords(FrameworksMol)
            
        if SortFrameworks:
            HeavyAtomCount = FrameworksMol.GetNumHeavyAtoms()

        FrameworksCount += 1
        
        if RemoveDuplicateFrameworks:
            CanonicalSMILES = Chem.MolToSmiles(FrameworksMol, isomericSmiles = UseChirality, canonical = True)
            if CanonicalSMILES in CanonicalSMILESMap:
                DuplicateFrameworksCount += 1
                if SortFrameworks:
                    # Track duplicate frameworks...
                    DuplicateFrameworkMolIDs.append(DuplicateFrameworksCount)
                    DuplicateFrameworkMolIDToMolMap[DuplicateFrameworksCount] = FrameworksMol
                    DuplicateFrameworkMolIDToAtomCountMap[DuplicateFrameworksCount] = HeavyAtomCount
                else:
                    # Write it out...
                    DuplicatesWriter.write(FrameworksMol)
            else:
                UniqueFrameworksCount += 1
                CanonicalSMILESMap[CanonicalSMILES] = CanonicalSMILES
                if SortFrameworks:
                    # Track unique frameworks...
                    FrameworkMolIDs.append(UniqueFrameworksCount)
                    FrameworkMolIDToMolMap[UniqueFrameworksCount] = FrameworksMol
                    FrameworkMolIDToAtomCountMap[UniqueFrameworksCount] = HeavyAtomCount
                else:
                    # Write it out...
                    Writer.write(FrameworksMol)
        elif SortFrameworks:
            # Track for sorting...
            FrameworkMolIDs.append(FrameworksCount)
            FrameworkMolIDToMolMap[FrameworksCount] = FrameworksMol
            FrameworkMolIDToAtomCountMap[FrameworksCount] = HeavyAtomCount
        else:
            # Write it out...
            Writer.write(FrameworksMol)
            
    if SortFrameworks:
        ReverseOrder = OptionsInfo["DescendingSortOrder"]
        SortAndWriteFrameworks(Writer, FrameworkMolIDs, FrameworkMolIDToMolMap, FrameworkMolIDToAtomCountMap, ReverseOrder)
        if RemoveDuplicateFrameworks:
            SortAndWriteFrameworks(DuplicatesWriter, DuplicateFrameworkMolIDs, DuplicateFrameworkMolIDToMolMap, DuplicateFrameworkMolIDToAtomCountMap, ReverseOrder)
    
    Writer.close()
    if RemoveDuplicateFrameworks:
        DuplicatesWriter.close()

    MiscUtil.PrintInfo("\nTotal number of molecular frameworks: %d" % FrameworksCount)
    if RemoveDuplicateFrameworks:
        MiscUtil.PrintInfo("Number of unique molecular frameworks: %d" % UniqueFrameworksCount)
        MiscUtil.PrintInfo("Number of duplicate molecular frameworks: %d" % DuplicateFrameworksCount)
        
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
Exemplo n.º 28
0
    # get the molid
    molid = line.split()[1].strip()

    # get its smiles string
    smi = line.split()[0].strip()

    # get RDKit canonical smiles
    can = Chem.MolToSmiles(Chem.MolFromSmiles(smi))

    # get canonical murcko smiles from canonical smiles
    murcko = ms.MurckoScaffoldSmiles(can)
    murcko = Chem.MolToSmiles(Chem.MolFromSmiles(murcko))

    # get generic murcko smiles
    gen_murcko = Chem.MolToSmiles(
        ms.MakeScaffoldGeneric(Chem.MolFromSmiles(murcko)))

    # for each molid key, add the smi, murcko mol, and murcko smiles
    mol_dict[molid] = [smi, can, murcko, gen_murcko]

    # bin the mols into the different murcko scaffolds observed
    if gen_murcko in ms_dict:
        ms_dict[gen_murcko].append(molid)
    else:
        ms_dict[gen_murcko] = [molid]

uniq_scaffs = ms_dict.keys()

print "molid,scaffid"
for molid in mol_dict:
    scaff = mol_dict[molid][3]
Exemplo n.º 29
0
def calc_murcko_frame(mol):
    """Calculate the Murcko generic frame from a molecule as Smiles."""
    return Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(mol))