Пример #1
0
 def __call__(self, smiles, radius=3, bit_len=4096, scaffold=0):
     fps = np.zeros((len(smiles), bit_len))
     for i, smile in enumerate(smiles):
         mol = Chem.MolFromSmiles(smile)
         arr = np.zeros((1, ))
         try:
             if scaffold == 1:
                 mol = MurckoScaffold.GetScaffoldForMol(mol)
             elif scaffold == 2:
                 mol = MurckoScaffold.MakeScaffoldGeneric(mol)
             if not mol:
                 raise Exception(
                     f'Failed to calculate Morgan fingerprint (creating RDKit instance from smiles failed: {smile})'
                 )
             fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                        radius,
                                                        nBits=bit_len)
             DataStructs.ConvertToNumpyArray(fp, arr)
             fps[i, :] = arr
         except Exception as exp:
             # TODO: use a more specific exception related to descriptor errors
             # traceback.print_exc()
             self.builder.errors.append(exp)
             fps[i, :] = [0] * bit_len
     return pd.DataFrame(fps)
Пример #2
0
    def get_fragments(self):
        fragments = None
        if False not in [self.is_mol(),
                         self.is_small(),
                         self.has_2_rings()]:  # 3 requirements fulfilled
            fragments = []

            # 1st add scf of the fragments
            hierarch = Recap.RecapDecompose(self.get_mol())
            ks = hierarch.children
            for s, obj in ks.items():
                m = obj.mol
                if (m is None) or (Chem.GetSSSR(m) < 2):
                    continue
                # Fragments into scaffolds conversion
                try:
                    core = MurckoScaffold.GetScaffoldForMol(m)
                except ValueError:  # scf calculation not possible
                    continue
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is None:
                    continue
                fragments.append(smiles_scf)

            # 2nd add scf of itself
            try:
                core = MurckoScaffold.GetScaffoldForMol(self.get_mol())
                smiles_scf = Chem.MolToSmiles(core)
                if Chem.MolFromSmiles(smiles_scf) is not None:
                    fragments.append(smiles_scf)
            except ValueError:  # scf calculation not possible
                pass

        return (fragments)
Пример #3
0
def GetScaffold(mol, generic_framework=False):
    """
    #################################################################
    Calculate Scaffold

    Usage:

        result = GetScaffold(mol)

        Input: mol is a molecule object.

        generic_framework is boolean value. If the generic_framework is True, the

        result returns a generic framework.

        Output: result is a string form of the molecule's scaffold.
    #################################################################
    """
    core = MurckoScaffold.GetScaffoldForMol(mol)
    if generic_framework == True:
        fw = MurckoScaffold.MakeScaffoldGeneric(core)
        mol_generic_framework = Chem.MolToSmiles(fw)
        return mol_generic_framework
    else:
        mol_scafflod = Chem.MolToSmiles(core)
        return mol_scafflod
Пример #4
0
def extract_murcko_scaffolds(mols, verbose=True):
    """ Extract Bemis-Murcko scaffolds from a smile string.

    :param mols: molecule data set in rdkit mol format.
    :return: smiles string of a scaffold and a framework.
    """
    scaf = []
    scaf_unique = []
    generic_scaf = []
    generic_scaf_unique = []
    start = time.time()
    for mol in mols:
        if mol is None:
            continue
        try:
            core = MurckoScaffold.GetScaffoldForMol(mol)
            fw = MurckoScaffold.MakeScaffoldGeneric(core)
            scaf.append(Chem.MolToSmiles(core, isomericSmiles=True))
            generic_scaf.append(Chem.MolToSmiles(fw, isomericSmiles=True))
        except ValueError as e:
            print(e)
            scaf.append(['error'])
            generic_scaf.append(['error'])
    if verbose:
        print('Extracted', len(scaf), 'scaffolds in',
              time.time() - start, 'seconds.')
    return scaf, generic_scaf
Пример #5
0
def AddMurckoToFrame(frame, molCol = 'ROMol', MurckoCol = 'Murcko_SMILES', Generic = False):
  '''
  Adds column with SMILES of Murcko scaffolds to pandas DataFrame. Generic set to true results in SMILES of generic framework.
  '''
  if Generic:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(x[molCol]))), axis=1)
  else:
    frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])), axis=1)
Пример #6
0
 def getGenericScaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         scaffold = MurckoScaffold.MakeScaffoldGeneric(
             MurckoScaffold.GetScaffoldForMol(mol))
         return Chem.MolToSmiles(scaffold, isomericSmiles=False)
     else:
         return ''
Пример #7
0
def get_scaffolds(compounds):
    for i, c in enumerate(compounds):
        mol = Chem.MolFromSmiles(c["canonical_smiles"])
        core = MurckoScaffold.GetScaffoldForMol(mol)
        compounds[i]["scaffold"] = Chem.MolToSmiles(core)
        compounds[i]["generic_scaffold"] = Chem.MolToSmiles(
            MurckoScaffold.MakeScaffoldGeneric(core))
    return compounds
Пример #8
0
def _getscaffold(mol, stype='Murcko'):
    """
    *Internal used only*
    
    """
    assert stype in [
        'Murcko', 'Carbon'
    ], 'scaffold type must be a member of "Murcko" or "Carbon"'
    core = MurckoScaffold.GetScaffoldForMol(mol)
    core = core if stype == 'Murcko' else MurckoScaffold.MakeScaffoldGeneric(
        core)
    return Chem.MolToSmiles(core, isomericSmiles=False, canonical=True)
 def _calculate_scaffold(self, smile):
     mol = Chem.MolFromSmiles(smile)
     if mol:
         try:
             scaffold = MurckoScaffold.MakeScaffoldGeneric(
                 MurckoScaffold.GetScaffoldForMol(mol))
             scaffold_smiles = Chem.MolToSmiles(scaffold,
                                                isomericSmiles=False)
         except ValueError:
             scaffold_smiles = ''
     else:
         scaffold_smiles = ''
     return scaffold_smiles
Пример #10
0
def calc_scaffold_similarity(s1: str, s2: str) -> float:
    mol1 = Chem.MolFromSmiles(s1)
    mol2 = Chem.MolFromSmiles(s2)
    if mol1 is None or mol2 is None:
        return -1.0
    try:
        scafold1 = MurckoScaffold.GetScaffoldForMol(mol1)
        scafold2 = MurckoScaffold.GetScaffoldForMol(mol2)
        f1 = AllChem.GetMorganFingerprint(scafold1, 3)
        f2 = AllChem.GetMorganFingerprint(scafold2, 3)
        return DataStructs.TanimotoSimilarity(f1, f2)
    except Exception:
        return -1.0
Пример #11
0
def pipe_sim_filter(stream,
                    query,
                    cutoff=80,
                    summary=None,
                    comp_id="pipe_sim_filter"):
    """Filter for compounds that have a similarity greater or equal
    than `cutoff` (in percent) to the `query` Smiles.
    If the field `FP_b64` (e.g. pre-calculated) is present, this will be used,
    otherwise the fingerprint of the Murcko scaffold will be generated on-the-fly (much slower)."""
    rec_counter = 0

    query_mol = Chem.MolFromSmiles(query)
    if not query_mol:
        print("* {} ERROR: could not generate query from SMILES.".format(
            comp_id))
        return None

    murcko_mol = MurckoScaffold.GetScaffoldForMol(query_mol)
    if USE_FP == "morgan":
        query_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
            murcko_mol, 2)
    elif USE_FP == "avalon":
        query_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
    else:
        query_fp = FingerprintMols.FingerprintMol(murcko_mol)

    for rec in stream:
        if "mol" not in rec: continue

        if "FP_b64" in rec:  # use the pre-defined fingerprint if it is present in the stream
            mol_fp = pickle.loads(b64.b64decode(rec["FP_b64"]))
        else:
            murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"])
            if USE_FP == "morgan":
                mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    murcko_mol, 2)
            elif USE_FP == "avalon":
                mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
            else:
                mol_fp = FingerprintMols.FingerprintMol(murcko_mol)

        sim = DataStructs.FingerprintSimilarity(query_fp, mol_fp)
        if sim * 100 >= cutoff:
            rec_counter += 1
            rec["Sim"] = np.round(sim * 100, 2)

            if summary is not None:
                summary[comp_id] = rec_counter

            yield rec
Пример #12
0
    def __init__(self, smi):
        self._smi = smi
        self._mol = Chem.MolFromSmiles(smi)
        self._scaf = MurckoScaffold.GetScaffoldForMol(
            MurckoScaffold.MakeScaffoldGeneric(self._mol))
        self._scaf_atoms = self._scaf.GetAtoms()
        self._scaf_bonds = self._scaf.GetBonds()
        self._scaf_smi = Chem.MolToSmiles(self._scaf)
        self._ring_system = self.GetRingSystemsofscaf()
        self._ring_system_count = self.count_ring_systems()
        self._bin_values = [1, 2, 3, 4, 7]

        # Linkers: [direct bond between rings, linear chain between rings, branched chain between rings]
        self._linkers = [0, 0, 0]
        self._chain_binning = [0, 0, 0, 0, 0]
Пример #13
0
def computeFramwork(df):
    murckos = []
    carbons = []
    for smi in df['can']:
        mol = Chem.MolFromSmiles(smi)
        core = MurckoScaffold.GetScaffoldForMol(mol)
        carb = MurckoScaffold.MakeScaffoldGeneric(core)
        #将Murcko骨架和C骨架转成smile
        mur = Chem.MolToSmiles(core)
        carb = Chem.MolToSmiles(carb)
        murckos.append(mur)
        carbons.append(carb)
    df['murckos'] = murckos
    df['carbons'] = carbons
    return df
Пример #14
0
def get_hieriarchical_frags(mol_or_smi):
    """Hierarchically (recursively) split a molecule into fragments.
    Only-non-ring bonds are split and only fragments with at least one ring
    are considered.
    Takes a mol object or a Smiles string as input.
    Returns a list of fragments as Smiles."""
    def _recursive_split(s, n=0):
        m = Chem.MolFromSmiles(s)
        if m is None: return
        splittable_bonds = []
        for b in m.GetBonds():
            if not b.IsInRing():
                splittable_bonds.append(b.GetIdx())
        frags = []
        for bidx in splittable_bonds:
            nm = Chem.FragmentOnBonds(m, [bidx], addDummies=False)
            try:
                splits = Chem.GetMolFrags(nm, asMols=True)
            except ValueError:
                continue
            # verify the split occurred between two rings
            if len(splits) == 2 and Chem.CalcNumRings(
                    splits[0]) > 0 and Chem.CalcNumRings(splits[1]) > 0:
                frags.extend(splits)
        for f in frags:
            try:
                murcko = MurckoScaffold.MurckoScaffoldSmiles(mol=f)
            except ValueError:
                continue
            if murcko not in result:
                result[murcko] = True
                if "[CH]" in murcko:
                    print(f"{murcko}  ({Chem.MolToSmiles(f)})")
                _recursive_split(murcko, n + 1)

    if isinstance(mol_or_smi, str):
        try:
            murcko = MurckoScaffold.MurckoScaffoldSmiles(smiles=mol_or_smi)
        except ValueError:
            return []
    else:
        try:
            murcko = MurckoScaffold.MurckoScaffoldSmiles(mol=mol_or_smi)
        except ValueError:
            return []
    result = {murcko: True}
    _recursive_split(murcko)
    return list(sorted(result.keys(), key=len, reverse=True))
Пример #15
0
    def _recurse(scaf):
        orig_mol = Chem.MolFromSmiles(scaf)
        rwmol = Chem.RWMol(orig_mol)
        ri = rwmol.GetRingInfo()
        if ri.NumRings() < 3:
            return
        bonds = rwmol.GetBonds()
        for bond in bonds:
            if not bond.IsInRing():
                rwmol = Chem.RWMol(orig_mol)
                rwmol.RemoveBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
                frags = rwmol.GetMol()
                frag_list = Chem.MolToSmiles(frags).split(".")
                ring_split = 0
                rings_per_frag = []
                for frag in frag_list:  # have we split between two rings?
                    if len(frag) > 2:
                        mol = Chem.MolFromSmiles(frag)
                        ri = mol.GetRingInfo()
                        num_rings = ri.NumRings()
                        rings_per_frag.append(num_rings)
                        if num_rings > 0:
                            ring_split += 1

                if ring_split >= 2:
                    for idx, frag in enumerate(frag_list):
                        if rings_per_frag[idx] > 1:
                            murcko_frag = MurckoScaffold.MurckoScaffoldSmiles(
                                frag)
                            if murcko_frag not in scaf_list:
                                scaf_list.append(murcko_frag)
                                _recurse(murcko_frag)
Пример #16
0
def generate_molecule_image(path, limit=25):
    from rdkit.Chem.Scaffolds import MurckoScaffold
    from rdkit.Chem import MolFromSmiles
    from rdkit.Chem.Draw import MolToImage
    from PIL import ImageDraw

    if os.path.exists(path):
        # Read the hits file
        smiles = []
        ids = []
        with open(path, 'r') as top_hits:
            for line_number, line in enumerate(top_hits.readlines()):
                if line_number >= limit:
                    break
                smiles.append(line.split(" ")[0])
                ids.append(line.split(" ")[1])

        # Generate scaffold
        for smile, mid in zip(smiles, ids):
            mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile))
            image = MolToImage(mol)

            # Add text to the image
            draw = ImageDraw.Draw(image)
            draw.text((5, 5), mid, fill="black", align="right")
            image.save("GUI/images/molecules/{}.png".format(smile))
    else:
        return
Пример #17
0
def main(name, argv):
    if not len(argv) == 2:
        print_usage(name)
        return

    rxn = rdChemReactions.ReactionFromSmarts(
        '[S:1](=[O:2])(=[O:3])F>>[S:1](=[O:2])(=[O:3])n1nnnn1')
    reactents_smarts = rxn.GetReactants()
    back = rdChemReactions.ReactionFromSmarts('[S:1]n1nnnn1>>[S:1]F')
    back_smarts = back.GetReactants()

    with open(argv[0], 'r') as f:
        lines = f.readlines()
    smiles = [line.split() for line in lines]

    with open(argv[1], 'w') as f:
        for line in smiles:
            mol = [Chem.MolFromSmiles(line[0]), line[0], line[1]]
            if mol is None:
                continue
            if not mol[0].HasSubstructMatch(reactents_smarts[0]):
                continue
            fake_ring = rxn.RunReactants((mol[0], ))[0][0]
            fake_ring = Chem.MolFromSmiles(Chem.MolToSmiles(fake_ring))
            core = MurckoScaffold.GetScaffoldForMol(fake_ring)
            if core.HasSubstructMatch(back_smarts[0]):
                scaffold = back.RunReactants((core, ))[0][0]
            else:
                scaffold = Chem.MolFromSmiles('S(=O)(=O)F')
            f.write('%s\t%s\t%s\n' %
                    (Chem.MolToSmiles(scaffold), line[0], line[1]))
Пример #18
0
 def _recursive_split(s, n=0):
     m = Chem.MolFromSmiles(s)
     if m is None: return
     splittable_bonds = []
     for b in m.GetBonds():
         if not b.IsInRing():
             splittable_bonds.append(b.GetIdx())
     frags = []
     for bidx in splittable_bonds:
         nm = Chem.FragmentOnBonds(m, [bidx], addDummies=False)
         try:
             splits = Chem.GetMolFrags(nm, asMols=True)
         except ValueError:
             continue
         # verify the split occurred between two rings
         if len(splits) == 2 and Chem.CalcNumRings(
                 splits[0]) > 0 and Chem.CalcNumRings(splits[1]) > 0:
             frags.extend(splits)
     for f in frags:
         try:
             murcko = MurckoScaffold.MurckoScaffoldSmiles(mol=f)
         except ValueError:
             continue
         if murcko not in result:
             result[murcko] = True
             if "[CH]" in murcko:
                 print(f"{murcko}  ({Chem.MolToSmiles(f)})")
             _recursive_split(murcko, n + 1)
def get_murcko_scaffold(smiles_dict):

    """Reads a smile dictionary in this format
    'CHEMBL189352': 'COc1ccc2c(cnn2n1)c3ccnc(Nc4ccc(cc4)C#N)n3'
    Returns a dictionary of Murcko scaffolds with the corresponding molecules
    'Cc1n[nH]c2ccc(cc12)c3cncc(OC[C@@H](N)Cc4ccccc4)c3': 'CHEMBL379218'
    :param smiles_dict: smiles dictionary
    :return: dictionary of scaffolds and chembl_id
    """
    smiles_list = smiles_dict.values()
    chembl_id_list = smiles_dict.keys()

    mols_list = [Chem.MolFromSmiles(x) for x in smiles_list]

    scaffolds = {}
    for mol, chembl_id in zip(mols_list, chembl_id_list):

        try:
            core = MurckoScaffold.GetScaffoldForMol(mol)
            scaffold = Chem.MolToSmiles(core)

        except Exception as e:
            print("rdkit could not read {}".format(chembl_id))
        if scaffold in scaffolds:
            scaffolds[scaffold].append(chembl_id)
        else:
            scaffolds[scaffold] = []
            scaffolds[scaffold].append(chembl_id)

    return scaffolds
Пример #20
0
def SMILES_2_ECFP(smiles, radius=3, bit_len=4096, index=None):
    """
    This function transforms a list of SMILES strings into a list of ECFP with 
    radius 3.
    ----------
    smiles: List of SMILES strings to transform
    Returns
    -------
    This function return the SMILES strings transformed into a vector of 4096 elements
    """
    fps = np.zeros((len(smiles), bit_len))
    for i, smile in enumerate(smiles):
        mol = Chem.MolFromSmiles(smile)
        arr = np.zeros((1, ))
        try:

            mol = MurckoScaffold.GetScaffoldForMol(mol)

            fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius,
                                                       nBits=bit_len)
            DataStructs.ConvertToNumpyArray(fp, arr)
            fps[i, :] = arr
        except:
            print(smile)
            fps[i, :] = [0] * bit_len
    return pd.DataFrame(fps, index=(smiles if index is None else index))
def SmilesToFrameInChIKey(SMILES):
    """
    """
    mol = GetMol(SMILES)
    
    if mol:
        frame = MurckoScaffold.GetScaffoldForMol(mol)
        try:
            frame = MurckoScaffold.MakeScaffoldGeneric(frame)
        except:
            pass
    else:
        frame = None
    
    frame = Chem.MolToInchiKey(frame) if frame else None
    return frame
Пример #22
0
def get_annotated_murcko_scaffold(mol, scaffold=None, as_mol=True):
    """
    Return an annotated murcko scaffold where side chains are replaced
    with a dummy atom ('*').

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
    scaffold : rdkit.Chem.rdchem.Mol, optional
        If a murcko scaffold is already calculated for the `mol`,
        this can be supplied as a template. The default is None.

    as_mol : bool, optional
        If True return rdkit.Chem.rdchem.Mol object else return
        a SMILES string representation. The default is True.

    Returns
    -------
    {str, rdkit.Chem.rdchem.Mol}
        Annotated Murcko scaffold.

    """
    if not scaffold:
        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    annotated = rdmolops.ReplaceSidechains(mol, scaffold)
    if as_mol:
        return annotated
    if annotated is None:
        return ''
    return MolToSmiles(annotated)
Пример #23
0
def main(name, argv):
    if len(argv) != 2:
        print_usage(name)
        return

    with open(argv[0], 'r') as f:
        smile = f.readline().split()[0]
    mol = Chem.MolFromSmiles(smile)
    size = mol.GetNumHeavyAtoms()
    hierarch = Recap.RecapDecompose(mol)
    children = []
    for child in hierarch.GetAllChildren().keys() + [smile]:
        new_smiles = child.replace('[*]', '[H]')
        new = Chem.MolFromSmiles(new_smiles)
        if not new == None:
            new_size = new.GetNumHeavyAtoms()
            if new_size > 7 and new_size <= 25:
                if rdMolDescriptors.CalcNumRotatableBonds(new) <= 5:
                    children.append(Chem.MolToSmiles(new, isomericSmiles=True))
                    #children.append(new_smiles)
                core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles(
                    new_smiles, includeChirality=True)
                core = Chem.MolFromSmiles(core_smile)
                if rdMolDescriptors.CalcNumRotatableBonds(
                        core) <= 5 and core.GetNumHeavyAtoms() > 7:
                    children.append(core_smile)
    with open(argv[1], 'w') as f:
        i = 1
        for m in set(children):
            if len(m) > 0:
                f.write(m + '\t' + str(i) + '\n')
                i += 1
Пример #24
0
 def get_scaffold(self, smiles):
     from rdkit.Chem.Scaffolds import MurckoScaffold
     mol = Chem.MolFromSmiles(smiles)
     return MurckoScaffold.MurckoScaffoldSmiles(
         mol = mol,
         includeChirality = self.include_chirality
     )
Пример #25
0
def get_murcko_scaffold(mol, generic=False):
    """Get the murcko scaffold for an input molecule

    Parameters
    ----------
    mol (Chem.Mol): an rdkit molecule
    generic (bool): if True return a generic scaffold (CSK)

    Returns
    -------
    murcko (Chem.Mol): an rdkit molecule (scaffold)
    """
    murcko = MurckoScaffold.GetScaffoldForMol(mol)
    if generic:
        murcko = MurckoScaffold.MakeScaffoldGeneric(murcko)
    return murcko
Пример #26
0
    def findCluster(self, smiles):
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            try:
                scaffold = MurckoScaffold.GetScaffoldForMol(mol)
            except:
                return "", "", False
            if scaffold:
                cluster = Chem.MolToSmiles(scaffold, isomericSmiles=False)
            else:
                return "", "", False
        else:
            return "", "", False

        fp = Pairs.GetAtomPairFingerprint(scaffold)  # Change to Tanimoto?
        if cluster in self.getFingerprints():
            return cluster, fp, False

        fps = list(self.getFingerprints().values())
        sims = DataStructs.BulkTanimotoSimilarity(fp, fps)
        if len(sims) == 0:
            return cluster, fp, True
        closest = np.argmax(sims)
        if sims[closest] >= self.minsimilarity:
            return list(self.getFingerprints().keys())[closest], fp, False
        else:
            return cluster, fp, True
Пример #27
0
def get_fragments(insmiles):
    """Get core and sidechains using Murcko fragmentation method

    Args:
        insmiles (str): SMILES for molecule

    Returns:
        core: scaffold SMILES
        side: side chain SMILES
    """

    mol_prev = Chem.MolFromSmiles(insmiles)
    
    ### get core using Murcko fragmentation ###
    core = Murcko.MurckoScaffoldSmilesFromSmiles(insmiles)
    if core != "":
        mol_core = Chem.MolFromSmiles(core)
    
        ### get sidechains ###
        mol_side = Chem.rdmolops.DeleteSubstructs(mol_prev, mol_core)
        side = Chem.MolToSmiles(mol_side).split(".")
    else:
        side = [insmiles]
        
    return core, side
Пример #28
0
def extract_side_chains(mol, remove_duplicates=False, mark='[*]'):
    """ Extract side chains from a smiles string. Core is handled as Murcko scaffold.

    :param mol: {str} smiles string of a molecule.
    :param remove_duplicates: {bool} Keep or remove duplicates.
    :param mark: character to mark attachment points.
    :return: smiles strings of side chains in a list, attachment points replaced by [R].
    """
    pos = range(0, 20)
    set_pos = ['[' + str(x) + '*]' for x in pos]

    m1 = MolFromSmiles(mol)
    try:
        core = MurckoScaffold.GetScaffoldForMol(m1)
        side_chain = ReplaceCore(m1, core)
        smi = MolToSmiles(side_chain, isomericSmiles=True
                          )  # isomericSmiles adds a number to the dummy atoms.
    except:
        return list()
    for i in pos:
        smi = smi.replace(''.join(set_pos[i]), mark)
    if remove_duplicates:
        return list(set(smi.split('.')))
    else:
        return smi.split('.')
Пример #29
0
 def ECFP_from_SMILES(cls, smiles, radius=3, bit_len=4096, scaffold=0, index=None):
     fps = np.zeros((len(smiles), bit_len))
     for i, smile in enumerate(smiles):
         mol = Chem.MolFromSmiles(smile)
         arr = np.zeros((1,))
         try:
             if scaffold == 1:
                 mol = MurckoScaffold.GetScaffoldForMol(mol)
             elif scaffold == 2:
                 mol = MurckoScaffold.MakeScaffoldGeneric(mol)
             fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=bit_len)
             DataStructs.ConvertToNumpyArray(fp, arr)
             fps[i, :] = arr
         except:
             print(smile)
             fps[i, :] = [0] * bit_len
     return pd.DataFrame(fps, index=(smiles if index is None else index))
Пример #30
0
def GetMurckoScaffold(mol):
    #mol: rdkit RWMol or Mol
    from rdkit.Chem.Scaffolds import MurckoScaffold

    scaffold = MurckoScaffold.MakeScaffoldGeneric(mol)

    #return scaffold rdkit.mol object
    return scaffold