示例#1
0
def atomtypingValidityChecks(mol):
    logger.info(
        "Checking validity of Molecule before atomtyping. "
        "If it gives incorrect results or to improve performance disable it with validitychecks=False. "
        "Most of these checks can be passed by using the moleculekit.atomtyper.prepareProteinForAtomtyping function. "
        "But make sure you understand what you are doing.")
    protsel = mol.atomselect("protein")
    metals = mol.atomselect(f"element {' '.join(metal_atypes)}")
    notallowed = ~(protsel | metals)

    if not np.any(protsel):
        raise RuntimeError("No protein atoms found in Molecule")

    if np.any(notallowed):
        resnames = np.unique(mol.resname[notallowed])
        raise RuntimeError(
            "Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals."
            .format(resnames))

    if mol.bonds.shape[0] < (mol.numAtoms - 1):
        raise ValueError(
            "The protein has less bonds than (number of atoms - 1). This seems incorrect. You can assign bonds with `mol.bonds = mol._getBonds()`"
        )

    from moleculekit.molecule import calculateUniqueBonds

    uqbonds, _ = calculateUniqueBonds(mol.bonds, mol.bondtype)
    if uqbonds.shape[0] != mol.bonds.shape[0]:
        raise RuntimeError(
            "The protein has duplicate bond information. This will mess up atom typing. Please keep only unique bonds in the molecule. If you want you can use moleculekit.molecule.calculateUniqueBonds for this."
        )

    if np.all(mol.segid == "") or np.all(mol.chain == ""):
        raise RuntimeError(
            "Please assign segments to the segid and chain fields of the molecule using autoSegment2"
        )

    from moleculekit.tools.autosegment import autoSegment2

    mm = mol.copy()
    mm.segid[:] = ""  # Set segid and chain to '' to avoid name clashes in autoSegment2
    mm.chain[:] = ""
    refmol = autoSegment2(mm, fields=("chain", "segid"), _logger=False)
    numsegsref = len(np.unique(refmol.segid))
    numsegs = len(np.unique(mol.segid))
    if numsegs != numsegsref:
        raise RuntimeError(
            "The molecule contains {} segments while we predict {}. Make sure you used autoSegment2 on the protein"
            .format(numsegs, numsegsref))

    if not np.any(mol.element == "H"):
        raise RuntimeError(
            "No hydrogens found in the Molecule. Make sure to use systemPrepare before passing it to voxelization. Also you might need to recalculate the bonds after this."
        )
示例#2
0
def prepareProteinForAtomtyping(mol,
                                guessBonds=True,
                                protonate=True,
                                pH=7,
                                segment=True,
                                verbose=True):
    """ Prepares a Molecule object for atom typing.

    Parameters
    ----------
    mol : Molecule object
        The protein to prepare
    guessBonds : bool
        Drops the bonds in the molecule and guesses them from scratch
    protonate : bool
        Protonates the protein for the given pH and optimizes hydrogen networks
    pH : float
        The pH for protonation
    segment : bool
        Automatically guesses the segments of a protein by using the guessed bonds
    verbose : bool
        Set to False to turn of the printing

    Returns
    -------
    mol : Molecule object
        The prepared Molecule
    """
    mol = mol.copy()
    protsel = mol.atomselect('protein')

    if not np.any(protsel):
        raise RuntimeError('No protein atoms found in Molecule')

    if np.any(~protsel):
        resnames = np.unique(mol.resname[~protsel])
        raise RuntimeError(
            'Found non-protein atoms with resnames {} in the Molecule. Please make sure to only pass protein atoms.'
            .format(resnames))

    if protonate:
        from moleculekit.tools.preparation import proteinPrepare
        mol = proteinPrepare(mol,
                             pH=pH,
                             verbose=verbose,
                             _loggerLevel='INFO' if verbose else 'ERROR')

    if guessBonds:
        mol.bonds = mol._guessBonds()

    if segment:
        from moleculekit.tools.autosegment import autoSegment2
        mol = autoSegment2(mol, fields=('segid', 'chain'), _logger=verbose)
    return mol
示例#3
0
def atomtypingValidityChecks(mol):
    logger.info('Checking validity of Molecule before atomtyping. ' \
                'If it gives incorrect results or to improve performance disable it with validitychecks=False. ' \
                'Most of these checks can be passed by using the moleculekit.atomtyper.prepareProteinForAtomtyping function. ' \
                'But make sure you understand what you are doing.')
    protsel = mol.atomselect('protein')

    if not np.any(protsel):
        raise RuntimeError('No protein atoms found in Molecule')

    if np.any(~protsel):
        resnames = np.unique(mol.resname[~protsel])
        raise RuntimeError(
            'Found non-protein atoms with resnames {} in the Molecule. Please make sure to only pass protein atoms.'
            .format(resnames))

    if mol.bonds.shape[0] < (mol.numAtoms - 1):
        raise ValueError(
            'The protein has less bonds than (number of atoms - 1). This seems incorrect. You can assign bonds with `mol.bonds = mol._getBonds()`'
        )

    if np.all(mol.segid == '') or np.all(mol.chain == ''):
        raise RuntimeError(
            'Please assign segments to the segid and chain fields of the molecule using autoSegment2'
        )

    from moleculekit.tools.autosegment import autoSegment2
    mm = mol.copy()
    mm.segid[:] = ''  # Set segid and chain to '' to avoid name clashes in autoSegment2
    mm.chain[:] = ''
    refmol = autoSegment2(mm, fields=('chain', 'segid'), _logger=False)
    numsegsref = len(np.unique(refmol.segid))
    numsegs = len(np.unique(mol.segid))
    if numsegs != numsegsref:
        raise RuntimeError(
            'The molecule contains {} segments while we predict {}. Make sure you used autoSegment2 on the protein'
            .format(numsegs, numsegsref))

    if not np.any(mol.element == 'H'):
        raise RuntimeError(
            'No hydrogens found in the Molecule. Make sure to use proteinPrepare before passing it to voxelization. Also you might need to recalculate the bonds after this.'
        )
示例#4
0
def prepareProteinForAtomtyping(mol,
                                guessBonds=True,
                                protonate=True,
                                pH=7.4,
                                segment=True,
                                verbose=True):
    """Prepares a Molecule object for atom typing.

    Parameters
    ----------
    mol : Molecule object
        The protein to prepare
    guessBonds : bool
        Drops the bonds in the molecule and guesses them from scratch
    protonate : bool
        Protonates the protein for the given pH and optimizes hydrogen networks
    pH : float
        The pH for protonation
    segment : bool
        Automatically guesses the segments of a protein by using the guessed bonds
    verbose : bool
        Set to False to turn of the printing

    Returns
    -------
    mol : Molecule object
        The prepared Molecule
    """
    from moleculekit.tools.autosegment import autoSegment2
    from moleculekit.util import sequenceID

    mol = mol.copy()
    if (
            guessBonds
    ):  # Need to guess bonds at the start for atom selection and for autoSegment
        mol.bondtype = np.array([], dtype=object)
        mol.bonds = mol._guessBonds()

    protsel = mol.atomselect("protein")
    metalsel = mol.atomselect(f"element {' '.join(metal_atypes)}")
    watersel = mol.atomselect("water")
    notallowed = ~(protsel | metalsel | watersel)

    if not np.any(protsel):
        raise RuntimeError("No protein atoms found in Molecule")

    if np.any(notallowed):
        resnames = np.unique(mol.resname[notallowed])
        raise RuntimeError(
            "Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals."
            .format(resnames))

    protmol = mol.copy()
    protmol.filter(protsel, _logger=False)
    metalmol = mol.copy()
    metalmol.filter(metalsel, _logger=False)
    watermol = mol.copy()
    watermol.filter(watersel, _logger=False)

    if protonate:
        from moleculekit.tools.preparation import systemPrepare

        if np.all(protmol.segid == "") and np.all(protmol.chain == ""):
            protmol = autoSegment2(
                protmol,
                fields=("segid", "chain"),
                basename="K",
                _logger=verbose)  # We need segments to prepare the protein
        protmol = systemPrepare(
            protmol,
            pH=pH,
            verbose=verbose,
            _logger_level="INFO" if verbose else "ERROR",
        )

    if guessBonds:
        protmol.bonds = protmol._guessBonds()
        # TODO: Should we remove bonds between metals and protein?

    if segment:
        protmol = autoSegment2(
            protmol, fields=("segid", "chain"),
            _logger=verbose)  # Reassign segments after preparation

        # Assign separate segment to the metals just in case pybel takes that into account
        if np.any(protmol.chain == "Z") or np.any(protmol.segid == "ME"):
            raise AssertionError(
                "Report this issue on the moleculekit github issue tracker. Too many chains in the protein."
            )
        metalmol.segid[:] = "ME"
        metalmol.chain[:] = "Z"
        metalmol.resid[:] = (
            np.arange(0, 2 * metalmol.numAtoms, 2) + protmol.resid.max() + 1
        )  # Just in case, let's put a residue gap between the metals so that they are considered separate chains no matter what happens

        if watermol.numAtoms != 0:
            if np.any(protmol.chain == "W") or np.any(protmol.segid == "WX"):
                raise AssertionError(
                    "Report this issue on the moleculekit github issue tracker. Too many chains in the protein."
                )
            watermol.resid[:] = sequenceID(
                (watermol.resid, watermol.segid, watermol.chain), step=2)
            watermol.segid[:] = "WX"
            watermol.chain[:] = "W"

    mol = protmol.copy()
    mol.append(metalmol)
    mol.append(watermol)
    return mol
示例#5
0
def prepareProteinForAtomtyping(mol,
                                guessBonds=True,
                                protonate=True,
                                pH=7,
                                segment=True,
                                verbose=True):
    """ Prepares a Molecule object for atom typing.

    Parameters
    ----------
    mol : Molecule object
        The protein to prepare
    guessBonds : bool
        Drops the bonds in the molecule and guesses them from scratch
    protonate : bool
        Protonates the protein for the given pH and optimizes hydrogen networks
    pH : float
        The pH for protonation
    segment : bool
        Automatically guesses the segments of a protein by using the guessed bonds
    verbose : bool
        Set to False to turn of the printing

    Returns
    -------
    mol : Molecule object
        The prepared Molecule
    """
    from moleculekit.tools.autosegment import autoSegment2

    mol = mol.copy()
    if guessBonds:  # Need to guess bonds at the start for atom selection and for autoSegment
        mol.bondtype = np.array([], dtype=object)
        mol.bonds = mol._guessBonds()

    protsel = mol.atomselect('protein')
    metalsel = mol.atomselect('element {}'.format(' '.join(metal_atypes)))
    notallowed = ~(protsel | metalsel)

    if not np.any(protsel):
        raise RuntimeError('No protein atoms found in Molecule')

    if np.any(notallowed):
        resnames = np.unique(mol.resname[notallowed])
        raise RuntimeError(
            'Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals.'
            .format(resnames))

    protmol = mol.copy()
    protmol.filter(protsel, _logger=False)
    metalmol = mol.copy()
    metalmol.filter(metalsel, _logger=False)

    if protonate:
        from moleculekit.tools.preparation import proteinPrepare
        if np.all(protmol.segid == '') and np.all(protmol.chain == ''):
            protmol = autoSegment2(
                protmol,
                fields=('segid', 'chain'),
                basename='K',
                _logger=verbose)  # We need segments to prepare the protein
        protmol = proteinPrepare(protmol,
                                 pH=pH,
                                 verbose=verbose,
                                 _loggerLevel='INFO' if verbose else 'ERROR')

    if guessBonds:
        protmol.bonds = protmol._guessBonds()
        # TODO: Should we remove bonds between metals and protein? Should we remove metals before guessing bonds and add them back in? Might crash otherwise?

    if segment:
        protmol = autoSegment2(
            protmol, fields=('segid', 'chain'),
            _logger=verbose)  # Reassign segments after preparation

        # Assign separate segment to the metals just in case pybel takes that into account
        if np.any(protmol.chain == 'Z') or np.any(protmol.segid == 'ME'):
            raise AssertionError(
                'Report this issue on the moleculekit github issue tracker. Too many chains in the protein.'
            )
        metalmol.segid[:] = 'ME'
        metalmol.chain[:] = 'Z'
        metalmol.resid[:] = np.arange(
            metalmol.numAtoms
        ) * 2 + protmol.resid.max(
        ) + 1  # Just in case, let's put a residue gap between the metals so that they are considered separate chains no matter what happens

    mol = protmol.copy()
    mol.append(metalmol)
    return mol