def atomtypingValidityChecks(mol): logger.info( "Checking validity of Molecule before atomtyping. " "If it gives incorrect results or to improve performance disable it with validitychecks=False. " "Most of these checks can be passed by using the moleculekit.atomtyper.prepareProteinForAtomtyping function. " "But make sure you understand what you are doing.") protsel = mol.atomselect("protein") metals = mol.atomselect(f"element {' '.join(metal_atypes)}") notallowed = ~(protsel | metals) if not np.any(protsel): raise RuntimeError("No protein atoms found in Molecule") if np.any(notallowed): resnames = np.unique(mol.resname[notallowed]) raise RuntimeError( "Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals." .format(resnames)) if mol.bonds.shape[0] < (mol.numAtoms - 1): raise ValueError( "The protein has less bonds than (number of atoms - 1). This seems incorrect. You can assign bonds with `mol.bonds = mol._getBonds()`" ) from moleculekit.molecule import calculateUniqueBonds uqbonds, _ = calculateUniqueBonds(mol.bonds, mol.bondtype) if uqbonds.shape[0] != mol.bonds.shape[0]: raise RuntimeError( "The protein has duplicate bond information. This will mess up atom typing. Please keep only unique bonds in the molecule. If you want you can use moleculekit.molecule.calculateUniqueBonds for this." ) if np.all(mol.segid == "") or np.all(mol.chain == ""): raise RuntimeError( "Please assign segments to the segid and chain fields of the molecule using autoSegment2" ) from moleculekit.tools.autosegment import autoSegment2 mm = mol.copy() mm.segid[:] = "" # Set segid and chain to '' to avoid name clashes in autoSegment2 mm.chain[:] = "" refmol = autoSegment2(mm, fields=("chain", "segid"), _logger=False) numsegsref = len(np.unique(refmol.segid)) numsegs = len(np.unique(mol.segid)) if numsegs != numsegsref: raise RuntimeError( "The molecule contains {} segments while we predict {}. Make sure you used autoSegment2 on the protein" .format(numsegs, numsegsref)) if not np.any(mol.element == "H"): raise RuntimeError( "No hydrogens found in the Molecule. Make sure to use systemPrepare before passing it to voxelization. Also you might need to recalculate the bonds after this." )
def prepareProteinForAtomtyping(mol, guessBonds=True, protonate=True, pH=7, segment=True, verbose=True): """ Prepares a Molecule object for atom typing. Parameters ---------- mol : Molecule object The protein to prepare guessBonds : bool Drops the bonds in the molecule and guesses them from scratch protonate : bool Protonates the protein for the given pH and optimizes hydrogen networks pH : float The pH for protonation segment : bool Automatically guesses the segments of a protein by using the guessed bonds verbose : bool Set to False to turn of the printing Returns ------- mol : Molecule object The prepared Molecule """ mol = mol.copy() protsel = mol.atomselect('protein') if not np.any(protsel): raise RuntimeError('No protein atoms found in Molecule') if np.any(~protsel): resnames = np.unique(mol.resname[~protsel]) raise RuntimeError( 'Found non-protein atoms with resnames {} in the Molecule. Please make sure to only pass protein atoms.' .format(resnames)) if protonate: from moleculekit.tools.preparation import proteinPrepare mol = proteinPrepare(mol, pH=pH, verbose=verbose, _loggerLevel='INFO' if verbose else 'ERROR') if guessBonds: mol.bonds = mol._guessBonds() if segment: from moleculekit.tools.autosegment import autoSegment2 mol = autoSegment2(mol, fields=('segid', 'chain'), _logger=verbose) return mol
def atomtypingValidityChecks(mol): logger.info('Checking validity of Molecule before atomtyping. ' \ 'If it gives incorrect results or to improve performance disable it with validitychecks=False. ' \ 'Most of these checks can be passed by using the moleculekit.atomtyper.prepareProteinForAtomtyping function. ' \ 'But make sure you understand what you are doing.') protsel = mol.atomselect('protein') if not np.any(protsel): raise RuntimeError('No protein atoms found in Molecule') if np.any(~protsel): resnames = np.unique(mol.resname[~protsel]) raise RuntimeError( 'Found non-protein atoms with resnames {} in the Molecule. Please make sure to only pass protein atoms.' .format(resnames)) if mol.bonds.shape[0] < (mol.numAtoms - 1): raise ValueError( 'The protein has less bonds than (number of atoms - 1). This seems incorrect. You can assign bonds with `mol.bonds = mol._getBonds()`' ) if np.all(mol.segid == '') or np.all(mol.chain == ''): raise RuntimeError( 'Please assign segments to the segid and chain fields of the molecule using autoSegment2' ) from moleculekit.tools.autosegment import autoSegment2 mm = mol.copy() mm.segid[:] = '' # Set segid and chain to '' to avoid name clashes in autoSegment2 mm.chain[:] = '' refmol = autoSegment2(mm, fields=('chain', 'segid'), _logger=False) numsegsref = len(np.unique(refmol.segid)) numsegs = len(np.unique(mol.segid)) if numsegs != numsegsref: raise RuntimeError( 'The molecule contains {} segments while we predict {}. Make sure you used autoSegment2 on the protein' .format(numsegs, numsegsref)) if not np.any(mol.element == 'H'): raise RuntimeError( 'No hydrogens found in the Molecule. Make sure to use proteinPrepare before passing it to voxelization. Also you might need to recalculate the bonds after this.' )
def prepareProteinForAtomtyping(mol, guessBonds=True, protonate=True, pH=7.4, segment=True, verbose=True): """Prepares a Molecule object for atom typing. Parameters ---------- mol : Molecule object The protein to prepare guessBonds : bool Drops the bonds in the molecule and guesses them from scratch protonate : bool Protonates the protein for the given pH and optimizes hydrogen networks pH : float The pH for protonation segment : bool Automatically guesses the segments of a protein by using the guessed bonds verbose : bool Set to False to turn of the printing Returns ------- mol : Molecule object The prepared Molecule """ from moleculekit.tools.autosegment import autoSegment2 from moleculekit.util import sequenceID mol = mol.copy() if ( guessBonds ): # Need to guess bonds at the start for atom selection and for autoSegment mol.bondtype = np.array([], dtype=object) mol.bonds = mol._guessBonds() protsel = mol.atomselect("protein") metalsel = mol.atomselect(f"element {' '.join(metal_atypes)}") watersel = mol.atomselect("water") notallowed = ~(protsel | metalsel | watersel) if not np.any(protsel): raise RuntimeError("No protein atoms found in Molecule") if np.any(notallowed): resnames = np.unique(mol.resname[notallowed]) raise RuntimeError( "Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals." .format(resnames)) protmol = mol.copy() protmol.filter(protsel, _logger=False) metalmol = mol.copy() metalmol.filter(metalsel, _logger=False) watermol = mol.copy() watermol.filter(watersel, _logger=False) if protonate: from moleculekit.tools.preparation import systemPrepare if np.all(protmol.segid == "") and np.all(protmol.chain == ""): protmol = autoSegment2( protmol, fields=("segid", "chain"), basename="K", _logger=verbose) # We need segments to prepare the protein protmol = systemPrepare( protmol, pH=pH, verbose=verbose, _logger_level="INFO" if verbose else "ERROR", ) if guessBonds: protmol.bonds = protmol._guessBonds() # TODO: Should we remove bonds between metals and protein? if segment: protmol = autoSegment2( protmol, fields=("segid", "chain"), _logger=verbose) # Reassign segments after preparation # Assign separate segment to the metals just in case pybel takes that into account if np.any(protmol.chain == "Z") or np.any(protmol.segid == "ME"): raise AssertionError( "Report this issue on the moleculekit github issue tracker. Too many chains in the protein." ) metalmol.segid[:] = "ME" metalmol.chain[:] = "Z" metalmol.resid[:] = ( np.arange(0, 2 * metalmol.numAtoms, 2) + protmol.resid.max() + 1 ) # Just in case, let's put a residue gap between the metals so that they are considered separate chains no matter what happens if watermol.numAtoms != 0: if np.any(protmol.chain == "W") or np.any(protmol.segid == "WX"): raise AssertionError( "Report this issue on the moleculekit github issue tracker. Too many chains in the protein." ) watermol.resid[:] = sequenceID( (watermol.resid, watermol.segid, watermol.chain), step=2) watermol.segid[:] = "WX" watermol.chain[:] = "W" mol = protmol.copy() mol.append(metalmol) mol.append(watermol) return mol
def prepareProteinForAtomtyping(mol, guessBonds=True, protonate=True, pH=7, segment=True, verbose=True): """ Prepares a Molecule object for atom typing. Parameters ---------- mol : Molecule object The protein to prepare guessBonds : bool Drops the bonds in the molecule and guesses them from scratch protonate : bool Protonates the protein for the given pH and optimizes hydrogen networks pH : float The pH for protonation segment : bool Automatically guesses the segments of a protein by using the guessed bonds verbose : bool Set to False to turn of the printing Returns ------- mol : Molecule object The prepared Molecule """ from moleculekit.tools.autosegment import autoSegment2 mol = mol.copy() if guessBonds: # Need to guess bonds at the start for atom selection and for autoSegment mol.bondtype = np.array([], dtype=object) mol.bonds = mol._guessBonds() protsel = mol.atomselect('protein') metalsel = mol.atomselect('element {}'.format(' '.join(metal_atypes))) notallowed = ~(protsel | metalsel) if not np.any(protsel): raise RuntimeError('No protein atoms found in Molecule') if np.any(notallowed): resnames = np.unique(mol.resname[notallowed]) raise RuntimeError( 'Found atoms with resnames {} in the Molecule which can cause issues with the voxelization. Please make sure to only pass protein atoms and metals.' .format(resnames)) protmol = mol.copy() protmol.filter(protsel, _logger=False) metalmol = mol.copy() metalmol.filter(metalsel, _logger=False) if protonate: from moleculekit.tools.preparation import proteinPrepare if np.all(protmol.segid == '') and np.all(protmol.chain == ''): protmol = autoSegment2( protmol, fields=('segid', 'chain'), basename='K', _logger=verbose) # We need segments to prepare the protein protmol = proteinPrepare(protmol, pH=pH, verbose=verbose, _loggerLevel='INFO' if verbose else 'ERROR') if guessBonds: protmol.bonds = protmol._guessBonds() # TODO: Should we remove bonds between metals and protein? Should we remove metals before guessing bonds and add them back in? Might crash otherwise? if segment: protmol = autoSegment2( protmol, fields=('segid', 'chain'), _logger=verbose) # Reassign segments after preparation # Assign separate segment to the metals just in case pybel takes that into account if np.any(protmol.chain == 'Z') or np.any(protmol.segid == 'ME'): raise AssertionError( 'Report this issue on the moleculekit github issue tracker. Too many chains in the protein.' ) metalmol.segid[:] = 'ME' metalmol.chain[:] = 'Z' metalmol.resid[:] = np.arange( metalmol.numAtoms ) * 2 + protmol.resid.max( ) + 1 # Just in case, let's put a residue gap between the metals so that they are considered separate chains no matter what happens mol = protmol.copy() mol.append(metalmol) return mol