Exemplo n.º 1
0
def split_mol_by_residues(mol):
    """Splits a molecule in multiple fragments based on residues

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        The molecule to fragment

    Returns
    -------
    residues : list
        A list of :class:`rdkit.Chem.rdchem.Mol`

    Notes
    -----
    Code adapted from Maciek Wójcikowski on the RDKit discussion list
    """
    residues = []
    for res in SplitMolByPDBResidues(mol).values():
        for frag in GetMolFrags(res, asMols=True, sanitizeFrags=False):
            # count number of unique residues in the fragment
            resids = {a.GetIdx(): ResidueId.from_atom(a)
                      for a in frag.GetAtoms()}
            if len(set(resids.values())) > 1:
                # split on peptide bonds
                bonds = [b.GetIdx() for b in frag.GetBonds()
                         if is_peptide_bond(b, resids)]
                mols = FragmentOnBonds(frag, bonds, addDummies=False)
                mols = GetMolFrags(mols, asMols=True, sanitizeFrags=False)
                residues.extend(mols)
            else:
                residues.append(frag)
    return residues
Exemplo n.º 2
0
    def keep_biggest(cls, mol_in):
        """Strip small fragments from compound.

        Returns a new compound where only the "biggest" fragment is conserved
        according to (i) the number of non-Hs atoms and if there is tie then 
        according to (ii) the molecular weight.
        
        :param  mol_in:  RDKit Mol
        :return mol_out: new RDKit Mol having only one connected component
        """
        def count_non_hs_atom(mol):
            ans = 0
            for atm in mol.GetAtoms():
                if atm.GetAtomicNum() != 1:
                    ans += 1
            return ans

        # Remove "other" molecules
        molfrag = GetMolFrags(mol_in, asMols=True, sanitizeFrags=False)
        mol_out = mol_in
        if len(molfrag) > 1:
            accepted_nbr_atm = 0  # flag number of atoms in fragment
            accepted_mw = 0  # flag the molecular weight of the biggest fragment
            for f in molfrag:
                nbr_atm = count_non_hs_atom(f)
                if nbr_atm > accepted_nbr_atm or (nbr_atm == accepted_nbr_atm
                                                  and
                                                  MolWt(f) > accepted_mass):
                    accepted_nbr_atm = nbr_atm
                    accepted_mass = MolWt(f)
                    mol_out = f  # keep only the biggest fragment
            cls._copy_properties(mol_in, mol_out)  # save the name and stuff
        return mol_out
    def testB1BondGenMol(self):
        ini_mono_type_list = [S, S, S, G, S]
        sg_ratio = 1.0
        max_monos = 12
        random_num = 55
        initial_monomers = [
            Monomer(mono_type, i)
            for i, mono_type in enumerate(ini_mono_type_list)
        ]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_events.append(Event(GROW, [], rate=1e4))
        initial_state = create_initial_state(initial_events, initial_monomers)
        result = run_kmc(DEF_RXN_RATES,
                         initial_state,
                         initial_events,
                         n_max=max_monos,
                         t_max=2,
                         random_seed=random_num,
                         sg_ratio=sg_ratio)
        nodes = result[MONO_LIST]
        adj = result[ADJ_MATRIX]
        # generate_mol(adj, nodes)
        with capture_stderr(generate_mol, adj, nodes) as output:
            self.assertFalse(output)

        mol = MolFromMolBlock(generate_mol(adj, nodes))
        mols = GetMolFrags(mol)

        analysis = analyze_adj_matrix(adj)
        frag_sizes = analysis[CHAIN_LEN]

        # Make sure there are the same number of separate fragments calculated by RDKIT
        # as we get from just separating the alternate B1
        self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))
Exemplo n.º 4
0
def get_scaffold_frags(frag, hash_func=None):
    """Get fragments from a disconnected structure.

    This function is used primarily used during molecular fragmentation.

    Parameters
    ----------
    frag : rdkit.Chem.rdchem.Mol
        An rdkit Mol containing disconeccted structures.
    hash_func : callable, optional
        Scaffold hash function, the default is the canonical smiles.

    Returns
    -------
    set
        A set of scaffoldgraph.core.Scaffold objects which are unique
        fragments obtained from the disconnected structures in the
        input molecule.

    Notes
    -----
    The function also performs a `partial sanitization` of the input.
    If the molecule fails to sanitize the function will return an
    empty set. This case can occur when an aromatic ring system is
    dissected in a way in which the resultant system is no longer
    aromatic.

    """
    try:
        # frag.ClearComputedProps()
        # frag.UpdatePropertyCache()
        # FastFindRings(frag)
        partial_sanitization(frag)
    except ValueError as e:
        # This error is caught as dissecting an aromatic ring system,
        # may lead to an undefined state where the resultant system
        # is no longer aromatic. We make no attempt to prevent this
        # but log it for reference.
        # This behaviour may be desirable for a scaffold tree and is
        # equivalent to the behavior of SNG (I believe...)
        logger.debug(e)
        return set()
    frags = {Scaffold(f, hash_func) for f in GetMolFrags(frag, True, False)}
    return frags
Exemplo n.º 5
0
def get_scaffold_frags(frag):
    """Get fragments from a disconnected structure.
    Used by fragmentation methods."""
    try:
        # frag.ClearComputedProps()
        # frag.UpdatePropertyCache()
        # Chem.GetSymmSSSR(frag)
        partial_sanitization(frag)
    except ValueError as e:
        # This error is caught as dissecting an aromatic ring system,
        # may lead to an undefined state where the resultant system
        # is no longer aromatic. We make no attempt to prevent this
        # but log it for reference.
        # This behaviour may be desirable for a scaffold tree and is
        # equivalent to the behavior of SNG (I believe...)
        logger.debug(e)
        return set()
    frags = {Scaffold(f) for f in GetMolFrags(frag, True, False)}
    return frags
Exemplo n.º 6
0
def keep_largest_fragment(mol):
    """Return the largest fragment in a disconnected molecule.

    The largest fragment is simply considered to be the
    fragment with the largest number of atoms.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        rdkit molecule containg disconnected fragments.

    Returns
    -------
    mol : rdkit.Chem.rdchem.Mol
        Molecule containing the largest disconnected
        fragment.

    """
    frags = GetMolFrags(mol, asMols=True)
    if len(frags) <= 1:
        return mol
    return max(frags, key=lambda x: x.GetNumAtoms())