Exemplo n.º 1
0
    def get_fragments(self):
        fragments = None
        if False not in [self.is_mol(),
                         self.is_small(),
                         self.has_2_rings()]:  # 3 requirements fulfilled
            fragments = []

            # 1st add scf of the fragments
            hierarch = Recap.RecapDecompose(self.get_mol())
            ks = hierarch.children
            for s, obj in ks.items():
                m = obj.mol
                if (m is None) or (Chem.GetSSSR(m) < 2):
                    continue
                # Fragments into scaffolds conversion
                try:
                    core = MurckoScaffold.GetScaffoldForMol(m)
                except ValueError:  # scf calculation not possible
                    continue
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is None:
                    continue
                fragments.append(smiles_scf)

            # 2nd add scf of itself
            try:
                core = MurckoScaffold.GetScaffoldForMol(self.get_mol())
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is not None:
                    fragments.append(smiles_scf)
            except ValueError:  # scf calculation not possible
                pass

        return (fragments)
Exemplo n.º 2
0
def AddMurckoToFrame(frame, molCol='ROMol', MurckoCol='Murcko_SMILES', Generic=False):
  '''
  Adds column with SMILES of Murcko scaffolds to pandas DataFrame.

  Generic set to true results in SMILES of generic framework.
  '''
  if Generic:
    func = lambda x: Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(
      MurckoScaffold.GetScaffoldForMol(x[molCol])))
  else:
    func = lambda x: Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol]))
  frame[MurckoCol] = frame.apply(func, axis=1)
Exemplo n.º 3
0
def calc_scaffold_similarity(s1: str, s2: str) -> float:
    mol1 = Chem.MolFromSmiles(s1)
    mol2 = Chem.MolFromSmiles(s2)
    if mol1 is None or mol2 is None:
        return -1.0
    try:
        scafold1 = MurckoScaffold.GetScaffoldForMol(mol1)
        scafold2 = MurckoScaffold.GetScaffoldForMol(mol2)
        f1 = AllChem.GetMorganFingerprint(scafold1, 3)
        f2 = AllChem.GetMorganFingerprint(scafold2, 3)
        return DataStructs.TanimotoSimilarity(f1, f2)
    except Exception:
        return -1.0
Exemplo n.º 4
0
def pipe_sim_filter(stream,
                    query,
                    cutoff=80,
                    summary=None,
                    comp_id="pipe_sim_filter"):
    """Filter for compounds that have a similarity greater or equal
    than `cutoff` (in percent) to the `query` Smiles.
    If the field `FP_b64` (e.g. pre-calculated) is present, this will be used,
    otherwise the fingerprint of the Murcko scaffold will be generated on-the-fly (much slower)."""
    rec_counter = 0

    query_mol = Chem.MolFromSmiles(query)
    if not query_mol:
        print("* {} ERROR: could not generate query from SMILES.".format(
            comp_id))
        return None

    murcko_mol = MurckoScaffold.GetScaffoldForMol(query_mol)
    if USE_FP == "morgan":
        query_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
            murcko_mol, 2)
    elif USE_FP == "avalon":
        query_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
    else:
        query_fp = FingerprintMols.FingerprintMol(murcko_mol)

    for rec in stream:
        if "mol" not in rec: continue

        if "FP_b64" in rec:  # use the pre-defined fingerprint if it is present in the stream
            mol_fp = pickle.loads(b64.b64decode(rec["FP_b64"]))
        else:
            murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"])
            if USE_FP == "morgan":
                mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    murcko_mol, 2)
            elif USE_FP == "avalon":
                mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
            else:
                mol_fp = FingerprintMols.FingerprintMol(murcko_mol)

        sim = DataStructs.FingerprintSimilarity(query_fp, mol_fp)
        if sim * 100 >= cutoff:
            rec_counter += 1
            rec["Sim"] = np.round(sim * 100, 2)

            if summary is not None:
                summary[comp_id] = rec_counter

            yield rec
Exemplo n.º 5
0
def GetScaffold(mol, generic_framework=False):
    """
    #################################################################
    Calculate Scaffold

    Usage:

        result = GetScaffold(mol)

        Input: mol is a molecule object.

        generic_framework is boolean value. If the generic_framework is True, the

        result returns a generic framework.

        Output: result is a string form of the molecule's scaffold.
    #################################################################
    """
    core = MurckoScaffold.GetScaffoldForMol(mol)
    if generic_framework == True:
        fw = MurckoScaffold.MakeScaffoldGeneric(core)
        mol_generic_framework = Chem.MolToSmiles(fw)
        return mol_generic_framework
    else:
        mol_scafflod = Chem.MolToSmiles(core)
        return mol_scafflod
Exemplo n.º 6
0
 def getScaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         scaffold = MurckoScaffold.GetScaffoldForMol(mol)
         return Chem.MolToSmiles(scaffold, isomericSmiles=False)
     else:
         return ''
Exemplo n.º 7
0
def get_annotated_murcko_scaffold(mol, scaffold=None, as_mol=True):
    """
    Return an annotated murcko scaffold where side chains are replaced
    with a dummy atom ('*').

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
    scaffold : rdkit.Chem.rdchem.Mol, optional
        If a murcko scaffold is already calculated for the `mol`,
        this can be supplied as a template. The default is None.

    as_mol : bool, optional
        If True return rdkit.Chem.rdchem.Mol object else return
        a SMILES string representation. The default is True.

    Returns
    -------
    {str, rdkit.Chem.rdchem.Mol}
        Annotated Murcko scaffold.

    """
    if not scaffold:
        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    annotated = rdmolops.ReplaceSidechains(mol, scaffold)
    if as_mol:
        return annotated
    if annotated is None:
        return ''
    return MolToSmiles(annotated)
Exemplo n.º 8
0
def SMILES_2_ECFP(smiles, radius=3, bit_len=4096, index=None):
    """
    This function transforms a list of SMILES strings into a list of ECFP with 
    radius 3.
    ----------
    smiles: List of SMILES strings to transform
    Returns
    -------
    This function return the SMILES strings transformed into a vector of 4096 elements
    """
    fps = np.zeros((len(smiles), bit_len))
    for i, smile in enumerate(smiles):
        mol = Chem.MolFromSmiles(smile)
        arr = np.zeros((1, ))
        try:

            mol = MurckoScaffold.GetScaffoldForMol(mol)

            fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius,
                                                       nBits=bit_len)
            DataStructs.ConvertToNumpyArray(fp, arr)
            fps[i, :] = arr
        except:
            print(smile)
            fps[i, :] = [0] * bit_len
    return pd.DataFrame(fps, index=(smiles if index is None else index))
def get_murcko_scaffold(smiles_dict):

    """Reads a smile dictionary in this format
    'CHEMBL189352': 'COc1ccc2c(cnn2n1)c3ccnc(Nc4ccc(cc4)C#N)n3'
    Returns a dictionary of Murcko scaffolds with the corresponding molecules
    'Cc1n[nH]c2ccc(cc12)c3cncc(OC[C@@H](N)Cc4ccccc4)c3': 'CHEMBL379218'
    :param smiles_dict: smiles dictionary
    :return: dictionary of scaffolds and chembl_id
    """
    smiles_list = smiles_dict.values()
    chembl_id_list = smiles_dict.keys()

    mols_list = [Chem.MolFromSmiles(x) for x in smiles_list]

    scaffolds = {}
    for mol, chembl_id in zip(mols_list, chembl_id_list):

        try:
            core = MurckoScaffold.GetScaffoldForMol(mol)
            scaffold = Chem.MolToSmiles(core)

        except Exception as e:
            print("rdkit could not read {}".format(chembl_id))
        if scaffold in scaffolds:
            scaffolds[scaffold].append(chembl_id)
        else:
            scaffolds[scaffold] = []
            scaffolds[scaffold].append(chembl_id)

    return scaffolds
Exemplo n.º 10
0
def extract_murcko_scaffolds(mols, verbose=True):
    """ Extract Bemis-Murcko scaffolds from a smile string.

    :param mols: molecule data set in rdkit mol format.
    :return: smiles string of a scaffold and a framework.
    """
    scaf = []
    scaf_unique = []
    generic_scaf = []
    generic_scaf_unique = []
    start = time.time()
    for mol in mols:
        if mol is None:
            continue
        try:
            core = MurckoScaffold.GetScaffoldForMol(mol)
            fw = MurckoScaffold.MakeScaffoldGeneric(core)
            scaf.append(Chem.MolToSmiles(core, isomericSmiles=True))
            generic_scaf.append(Chem.MolToSmiles(fw, isomericSmiles=True))
        except ValueError as e:
            print(e)
            scaf.append(['error'])
            generic_scaf.append(['error'])
    if verbose:
        print('Extracted', len(scaf), 'scaffolds in',
              time.time() - start, 'seconds.')
    return scaf, generic_scaf
Exemplo n.º 11
0
def main(name, argv):
    if not len(argv) == 2:
        print_usage(name)
        return

    rxn = rdChemReactions.ReactionFromSmarts(
        '[S:1](=[O:2])(=[O:3])F>>[S:1](=[O:2])(=[O:3])n1nnnn1')
    reactents_smarts = rxn.GetReactants()
    back = rdChemReactions.ReactionFromSmarts('[S:1]n1nnnn1>>[S:1]F')
    back_smarts = back.GetReactants()

    with open(argv[0], 'r') as f:
        lines = f.readlines()
    smiles = [line.split() for line in lines]

    with open(argv[1], 'w') as f:
        for line in smiles:
            mol = [Chem.MolFromSmiles(line[0]), line[0], line[1]]
            if mol is None:
                continue
            if not mol[0].HasSubstructMatch(reactents_smarts[0]):
                continue
            fake_ring = rxn.RunReactants((mol[0], ))[0][0]
            fake_ring = Chem.MolFromSmiles(Chem.MolToSmiles(fake_ring))
            core = MurckoScaffold.GetScaffoldForMol(fake_ring)
            if core.HasSubstructMatch(back_smarts[0]):
                scaffold = back.RunReactants((core, ))[0][0]
            else:
                scaffold = Chem.MolFromSmiles('S(=O)(=O)F')
            f.write('%s\t%s\t%s\n' %
                    (Chem.MolToSmiles(scaffold), line[0], line[1]))
Exemplo n.º 12
0
def extract_side_chains(mol, remove_duplicates=False, mark='[*]'):
    """ Extract side chains from a smiles string. Core is handled as Murcko scaffold.

    :param mol: {str} smiles string of a molecule.
    :param remove_duplicates: {bool} Keep or remove duplicates.
    :param mark: character to mark attachment points.
    :return: smiles strings of side chains in a list, attachment points replaced by [R].
    """
    pos = range(0, 20)
    set_pos = ['[' + str(x) + '*]' for x in pos]

    m1 = MolFromSmiles(mol)
    try:
        core = MurckoScaffold.GetScaffoldForMol(m1)
        side_chain = ReplaceCore(m1, core)
        smi = MolToSmiles(side_chain, isomericSmiles=True
                          )  # isomericSmiles adds a number to the dummy atoms.
    except:
        return list()
    for i in pos:
        smi = smi.replace(''.join(set_pos[i]), mark)
    if remove_duplicates:
        return list(set(smi.split('.')))
    else:
        return smi.split('.')
Exemplo n.º 13
0
 def __call__(self, smiles, radius=3, bit_len=4096, scaffold=0):
     fps = np.zeros((len(smiles), bit_len))
     for i, smile in enumerate(smiles):
         mol = Chem.MolFromSmiles(smile)
         arr = np.zeros((1, ))
         try:
             if scaffold == 1:
                 mol = MurckoScaffold.GetScaffoldForMol(mol)
             elif scaffold == 2:
                 mol = MurckoScaffold.MakeScaffoldGeneric(mol)
             if not mol:
                 raise Exception(
                     f'Failed to calculate Morgan fingerprint (creating RDKit instance from smiles failed: {smile})'
                 )
             fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                        radius,
                                                        nBits=bit_len)
             DataStructs.ConvertToNumpyArray(fp, arr)
             fps[i, :] = arr
         except Exception as exp:
             # TODO: use a more specific exception related to descriptor errors
             # traceback.print_exc()
             self.builder.errors.append(exp)
             fps[i, :] = [0] * bit_len
     return pd.DataFrame(fps)
Exemplo n.º 14
0
    def findCluster(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            try:
                scaffold = MurckoScaffold.GetScaffoldForMol(mol)
            except:
                return "", "", False
            if scaffold:
                cluster = Chem.MolToSmiles(scaffold, isomericSmiles=False)
            else:
                return "", "", False
        else:
            return "", "", False

        fp = Pairs.GetAtomPairFingerprint(scaffold)  # Change to Tanimoto?
        if cluster in self.getFingerprints():
            return cluster, fp, False

        fps = list(self.getFingerprints().values())
        sims = DataStructs.BulkTanimotoSimilarity(fp, fps)
        if len(sims) == 0:
            return cluster, fp, True
        closest = np.argmax(sims)
        if sims[closest] >= self.minsimilarity:
            return list(self.getFingerprints().keys())[closest], fp, False
        else:
            return cluster, fp, True
Exemplo n.º 15
0
def generate_molecule_image(path, limit=25):
    from rdkit.Chem.Scaffolds import MurckoScaffold
    from rdkit.Chem import MolFromSmiles
    from rdkit.Chem.Draw import MolToImage
    from PIL import ImageDraw

    if os.path.exists(path):
        # Read the hits file
        smiles = []
        ids = []
        with open(path, 'r') as top_hits:
            for line_number, line in enumerate(top_hits.readlines()):
                if line_number >= limit:
                    break
                smiles.append(line.split(" ")[0])
                ids.append(line.split(" ")[1])

        # Generate scaffold
        for smile, mid in zip(smiles, ids):
            mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile))
            image = MolToImage(mol)

            # Add text to the image
            draw = ImageDraw.Draw(image)
            draw.text((5, 5), mid, fill="black", align="right")
            image.save("GUI/images/molecules/{}.png".format(smile))
    else:
        return
Exemplo n.º 16
0
def get_scaffolds(compounds):
    for i, c in enumerate(compounds):
        mol = Chem.MolFromSmiles(c["canonical_smiles"])
        core = MurckoScaffold.GetScaffoldForMol(mol)
        compounds[i]["scaffold"] = Chem.MolToSmiles(core)
        compounds[i]["generic_scaffold"] = Chem.MolToSmiles(
            MurckoScaffold.MakeScaffoldGeneric(core))
    return compounds
Exemplo n.º 17
0
def compute_scaffold(mol, min_rings=2):
    mol = get_mol(mol)
    scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    n_rings = get_n_rings(scaffold)
    scaffold_smiles = Chem.MolToSmiles(scaffold)
    if scaffold_smiles == '' or n_rings < min_rings:
        return None
    else:
        return scaffold_smiles
Exemplo n.º 18
0
def get_scaffold(mol):
    """Computes the Bemis-Murcko scaffold for a molecule.
    Args:
      mol: RDKit Mol.
    Returns:
      String scaffold SMILES.
    """
    return Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol),
                            isomericSmiles=True)
Exemplo n.º 19
0
def scaffold2smiles(mol, generic=True, return_smiles = True):
    ''' Returns a SMILES string representing the Murcko Scaffold of a given molecule'''
    if generic:
        # Makes a Murcko scaffold generic (all atoms -> carbon and all bonds -> single)
        scff = MurckoScaffold.MakeScaffoldGeneric(mol)
        scff = MurckoScaffold.GetScaffoldForMol(scff)
        scff_smiles = MolToSmiles(scff)
    else:
        # Return a smiles scaffold 
        try:
            scff = MurckoScaffold.GetScaffoldForMol(mol)
            scff_smiles =  MolToSmiles(scff)
        except:
            scff_smiles = '' 
            scff = np.nan
    if return_smiles:
        return scff_smiles
    else:
        return scff
Exemplo n.º 20
0
def compute_scaffold(mol, min_rings=2):
    mol = get_mol(mol)
    try:
        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    except (ValueError, RuntimeError):
        return None
    n_rings = get_n_rings(scaffold)
    scaffold_smiles = Chem.MolToSmiles(scaffold)
    if scaffold_smiles == '' or n_rings < min_rings:
        return None
    return scaffold_smiles
Exemplo n.º 21
0
def _getscaffold(mol, stype='Murcko'):
    """
    *Internal used only*
    
    """
    assert stype in [
        'Murcko', 'Carbon'
    ], 'scaffold type must be a member of "Murcko" or "Carbon"'
    core = MurckoScaffold.GetScaffoldForMol(mol)
    core = core if stype == 'Murcko' else MurckoScaffold.MakeScaffoldGeneric(
        core)
    return Chem.MolToSmiles(core, isomericSmiles=False, canonical=True)
Exemplo n.º 22
0
 def _calculate_scaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         try:
             scaffold = MurckoScaffold.GetScaffoldForMol(mol)
             scaffold_smiles = Chem.MolToSmiles(scaffold,
                                                isomericSmiles=False)
         except ValueError:
             scaffold_smiles = ''
     else:
         scaffold_smiles = ''
     return scaffold_smiles
Exemplo n.º 23
0
 def get_murckoscf(self):
     core = None
     if False not in [self.is_mol(),
                      self.is_small(),
                      self.has_2_rings()]:  # 3 requirements fulfilled
         try:
             core = MurckoScaffold.GetScaffoldForMol(self.get_mol())
         except ValueError:  # scf calculation not possible
             core = None
         if core is not None:
             core = Chem.MolToSmiles(core)
     return (core)
Exemplo n.º 24
0
def extract_murcko_scaffolds(mol):
    """ Extract Bemis-Murcko scaffolds from a smile string.

    :param mol: {str} smiles string of a molecule.
    :return: smiles string of a scaffold.
    """
    m1 = MolFromSmiles(mol)
    try:
        core = MurckoScaffold.GetScaffoldForMol(m1)
        scaf = MolToSmiles(core, isomericSmiles=True)
    except:
        return ''
    return scaf
Exemplo n.º 25
0
def init_scaffold_tbl():
    mol_set = Molecule.objects.all()
    for mol in mol_set.iterator():
        try:
            core = MurckoScaffold.GetScaffoldForMol(mol.structure)
        except:
            continue
        core_smiles = Chem.MolToSmiles(core)
        # print core_smiles
        if core_smiles:
            scaffold, created = Scaffold.objects.get_or_create(smiles=core_smiles)
            scaffold.structure = core
            mol.scaffold = scaffold
            scaffold.save()
            mol.save()
Exemplo n.º 26
0
def computeFramwork(df):
    murckos = []
    carbons = []
    for smi in df['can']:
        mol = Chem.MolFromSmiles(smi)
        core = MurckoScaffold.GetScaffoldForMol(mol)
        carb = MurckoScaffold.MakeScaffoldGeneric(core)
        #将Murcko骨架和C骨架转成smile
        mur = Chem.MolToSmiles(core)
        carb = Chem.MolToSmiles(carb)
        murckos.append(mur)
        carbons.append(carb)
    df['murckos'] = murckos
    df['carbons'] = carbons
    return df
Exemplo n.º 27
0
    def get_stripped_core(self, core_smi):
        if core_smi is None:
            print('No core provided generating core with Murcko Scaffold')
            core = MurckoScaffold.GetScaffoldForMol(self.smi)
            if Chem.MolToSmiles(core) == Chem.MolToSmiles(self.smi):
                print(
                    'Murcko Scaffold failed selecting largest fragment as core'
                )
                hierarch = Recap.RecapDecompose(self.smi).children.keys()
                tmp = Chem.MolFromSmiles(max(hierarch, key=len))
        else:
            core = Chem.MolFromSmiles(core_smi)
            tmp = Chem.ReplaceSidechains(self.smi, core)

        return tmp
Exemplo n.º 28
0
    def __init__(self, smi):
        self._smi = smi
        self._mol = Chem.MolFromSmiles(smi)
        self._scaf = MurckoScaffold.GetScaffoldForMol(
            MurckoScaffold.MakeScaffoldGeneric(self._mol))
        self._scaf_atoms = self._scaf.GetAtoms()
        self._scaf_bonds = self._scaf.GetBonds()
        self._scaf_smi = Chem.MolToSmiles(self._scaf)
        self._ring_system = self.GetRingSystemsofscaf()
        self._ring_system_count = self.count_ring_systems()
        self._bin_values = [1, 2, 3, 4, 7]

        # Linkers: [direct bond between rings, linear chain between rings, branched chain between rings]
        self._linkers = [0, 0, 0]
        self._chain_binning = [0, 0, 0, 0, 0]
Exemplo n.º 29
0
def getModeMurckoScaffoldImage(SMILES_list):
    """
    returns the most common murcko scaffold given a list of smiles as an rdkit image.
    """
    murckoScaffolds = []

    # Looping through and getting the scaffolds for each smile
    for smile in SMILES_list:
        m1 = Chem.MolFromSmiles(smile)
        core = MurckoScaffold.GetScaffoldForMol(m1)
        murckoScaffolds.append(core)

    # Finding the mode Scaffold:
    mode = max(set(murckoScaffolds), key=murckoScaffolds.count)
    PIL_img_mode = MolToImage(mode, size=(700, 700))
    return PIL_img_mode
Exemplo n.º 30
0
def main(name, argv):
    if not len(argv) == 1:
        print_usage(name)
        return

    with open(argv[0], 'r') as f:
        lines = f.readlines()[:10]
    smiles = [line.split()[0] for line in lines]

    molecules = list(
        map(lambda smile: [smile, Chem.MolFromSmiles(smile)], smiles))
    molecules = [m for m in molecules if m[1] is not None]

    for mol in molecules:
        core = MurckoScaffold.GetScaffoldForMol(mol[1])
        print Chem.MolToSmiles(core)