def get_prop(mol, left_most_point): """ Returns atom occupancies """ n = [24, 24, 24] # Voxel size # Get the channels channels = vd._getAtomtypePropertiesPDBQT(mol) sigmas = vd._getRadii(mol) channels = sigmas[:, np.newaxis] * channels.astype(float) # Choose the grid centers centers = vd._getGridCenters(llc=left_most_point, N=n, resolution=1) centers = centers.reshape(np.prod(n), 3) # Extract the features and return features = vd._getOccupancyC(mol.coords[:, :, mol.frame], centers, channels) return features.reshape(*n, -1)
def voxelize(multisigmas, coords, center, displacement=2., rotation=True): """ Generates molecule representation. """ # Do the rotation if rotation: rrot = uniformRandomRotation() # Rotation coords = rotate(coords, rrot, center=center) # Do the translation center = center + (np.random.rand(3) - 0.5) * 2 * displacement centers2D = global_centers + center occupancy = _getOccupancyC(coords.astype(np.float32), centers2D.reshape(-1, 3), multisigmas).reshape(size, size, size, 8) return occupancy.astype(np.float32).transpose( 3, 0, 1, 2, )
def get_voxels(self, center=None, size=24, resolution=1., rotation=None, displacement=None, dtype=np.float32): """ Computes molecule voxelization. Parameters ---------- center: array-like Geometrical coordinates where descriptors will be computed. size: int Size of resulting descriptor array. resolution: float Grid resolution of resulting array. rotation : array-like of shape (3,) Prior to voxelization rotates the molecule around its center give the rotation angles in radians. displacement: array-like of shape (3,) Prior to voxelization displaces the molecule by provided (X, Y, Z) distance before returning the voxelized representation. dtype : numpy datatype returns array of the specified type. Returns ------- voxels: array-like Computed descriptors. """ coords = self.get_coords() lig_center = self.get_center(coords=coords) if center is None: center = lig_center if rotation is not None: rotation = list(rotation) matx = get_rotationMatrix([1, 0, 0], rotation[0]) maty = get_rotationMatrix([0, 1, 0], rotation[1]) matz = get_rotationMatrix([0, 0, 1], rotation[2]) coords = rotate(coords, matx, center=lig_center) coords = rotate(coords, maty, center=lig_center) coords = rotate(coords, matz, center=lig_center) if displacement is not None: coords += np.asarray(displacement) multisigmas = self._get_channel_radii() if (size, resolution) not in SmallMol.array_cache: N = [size, size, size] bbm = (np.zeros(3) - float(size * resolution / 2)) centers = _getGridCenters(bbm, N, resolution) # Cache the array SmallMol.array_cache[(size, resolution)] = centers.reshape(size**3, 3) centers2D = centers + center else: centers2D = SmallMol.array_cache[(size, resolution)] + center voxels = _getOccupancyC(coords.astype(np.float32), centers2D, multisigmas).reshape(size, size, size, 8).astype(dtype) return voxels
def getVoxelDescriptors(mol, usercenters=None, voxelsize=1, buffer=0, channels=None, method='C'): """ Calculate descriptors of atom properties for voxels in a grid bounding the Molecule object. Constructs a bounding box around Molecule with some buffer space. Then it computes pharmacophoric-like descriptors on a defined grid. Parameters ---------- mol : A Molecule object. Needs to be read from Autodock 4 .pdbqt format usercenters : np.ndarray A 2D array specifying the centers of the voxels. If None is given, it will discretize the bounding box of the Molecule plus any buffer space requested into voxels of voxelsize. voxelsize : float The voxel size in A buffer : float The buffer space to add to the bounding box. This adds zeros to the grid around the protein so that properties which are at the edge of the box can be found in the center of one. Should be usually set to localimagesize/2. channels : np.ndarray A 2D array of size (mol.numAtoms, nchannels) where nchannels is the number of channels we want to have. Each column i then has True (or a float) in the rows of the atoms which belong to channel i and False (or 0) otherwise. Such boolean arrays can be obtained for example by using mol.atomselect. If the array is boolean, each atom will get assigned its VdW radius. If the array is float, these floats will be used as the corresponding atom radii. Make sure the numpy array is of dtype=bool if passing boolean values. If no channels are given, the default ('hydrophobic', 'aromatic', 'hbond_acceptor', 'hbond_donor', 'positive_ionizable', 'negative_ionizable', 'metal', 'occupancies') channels will be used. Returns ------- features : np.ndarray A 2D array of size (centers, channels) containing the effect of each channel in the voxel with that center. centers : np.ndarray A list of the centers of all boxes N : np.ndarray Is returned only when no user centers are passed. It corresponds to the number of centers in each of the x,y,z dimensions method : str Voxel descriptors can be calculated either with our C implementation or CUDA or NUMBA implementations. Examples -------- >>> mol = Molecule('3PTB') >>> mol.filter('protein') >>> features, centers, N = getVoxelDescriptors(mol, buffer=8) >>> # Features can be reshaped to a 4D array (3D for each grid center in xyz, 1D for the properties) like this: >>> features = features.reshape(N[0], N[1], N[2], features.shape[1]) >>> # The user can provide his own centers >>> features, centers = getVoxelDescriptors(mol, usercenters=[[0, 0, 0], [16, 24, -5]], buffer=8) """ if channels is None: channels = _getAtomtypePropertiesPDBQT(mol) if channels.dtype == bool: # Calculate for each channel the atom sigmas sigmas = _getRadii(mol) channels = sigmas[:, np.newaxis] * channels.astype(float) N = None if usercenters is None: # Calculate the bbox and the number of voxels [bbm, bbM] = boundingBox(mol) bbm -= buffer bbM += buffer N = np.ceil((bbM - bbm) / voxelsize).astype(int) + 1 # Calculate grid centers centers = _getGridCenters(bbm, N, voxelsize) centers = centers.reshape(np.prod(N), 3) else: centers = usercenters # Calculate features if method.upper() == 'C': features = _getOccupancyC(mol.coords[:, :, mol.frame], centers, channels) elif method.upper() == 'CUDA': features = _getOccupancyCUDA(mol.coords[:, :, mol.frame], centers, channels) elif method.upper() == 'NUMBA': features = _getOccupancyNUMBA(mol.coords[:, :, mol.frame], centers, channels, 5) if N is None: return features, centers else: return features, centers, N
if __name__ == '__main__': from htmd.molecule.molecule import Molecule from htmd.home import home import os import numpy as np testf = os.path.join(home(), 'data', 'test-voxeldescriptors') resOcc, resCent, N = getVoxelDescriptors(Molecule( os.path.join(testf, '3ptb.pdbqt')), buffer=8, voxelsize=1) resOcc = resOcc.reshape(N[0], N[1], N[2], resOcc.shape[1]) refOcc = np.load(os.path.join(testf, '3PTB_occ.npy')) refCent = np.load(os.path.join(testf, '3PTB_center.npy')) assert np.allclose(resOcc, refOcc) assert np.allclose(resCent, refCent) import numpy as np from htmd.molecule.voxeldescriptors import _getOccupancyC, _getOccupancyCUDA centers = np.load(os.path.join(testf, '3PTB_centers_inp.npy')) coords = np.load(os.path.join(testf, '3PTB_coords_inp.npy')) sigmas = np.load(os.path.join(testf, '3PTB_channels_inp.npy')) centers = centers[::10, :].copy() res_C = _getOccupancyC(coords, centers, sigmas) # res_cuda = _getOccupancyCUDA(coords, centers, sigmas, 5) res_numba = _getOccupancyNUMBA(coords, centers, sigmas, 5) # assert np.abs(res_C - res_cuda).max() < 1e-4 assert np.abs(res_C - res_numba).max() < 1e-4
def getVoxelDescriptors(mol, usercenters=None, voxelsize=1, buffer=0, channels=None, method='C'): """ Calculate descriptors of atom properties for voxels in a grid bounding the Molecule object. Constructs a bounding box around Molecule with some buffer space. Then it computes pharmacophoric-like descriptors on a defined grid. Parameters ---------- mol : A Molecule object. Needs to be read from Autodock 4 .pdbqt format usercenters : np.ndarray A 2D array specifying the centers of the voxels. If None is given, it will discretize the bounding box of the Molecule plus any buffer space requested into voxels of voxelsize. voxelsize : float The voxel size in A buffer : float The buffer space to add to the bounding box. This adds zeros to the grid around the protein so that properties which are at the edge of the box can be found in the center of one. Should be usually set to localimagesize/2. channels : np.ndarray A 2D array of size (mol.numAtoms, nchannels) where nchannels is the number of channels we want to have. Each column i then has True (or a float) in the rows of the atoms which belong to channel i and False (or 0) otherwise. Such boolean arrays can be obtained for example by using mol.atomselect. If the array is boolean, each atom will get assigned its VdW radius. If the array is float, these floats will be used as the corresponding atom radii. Make sure the numpy array is of dtype=bool if passing boolean values. If no channels are given, the default ('hydrophobic', 'aromatic', 'hbond_acceptor', 'hbond_donor', 'positive_ionizable', 'negative_ionizable', 'metal', 'occupancies') channels will be used. Returns ------- features : np.ndarray A 2D array of size (centers, channels) containing the effect of each channel in the voxel with that center. centers : np.ndarray A list of the centers of all boxes N : np.ndarray Is returned only when no user centers are passed. It corresponds to the number of centers in each of the x,y,z dimensions method : str Voxel descriptors can be calculated either with our C implementation or CUDA or NUMBA implementations. Examples -------- >>> mol = Molecule('3PTB') >>> mol.filter('protein') >>> features, centers, N = getVoxelDescriptors(mol, buffer=8) >>> # Features can be reshaped to a 4D array (3D for each grid center in xyz, 1D for the properties) like this: >>> features = features.reshape(N[0], N[1], N[2], features.shape[1]) >>> # The user can provide his own centers >>> features, centers = getVoxelDescriptors(mol, usercenters=[[0, 0, 0], [16, 24, -5]], buffer=8) """ if channels is None: channels = _getAtomtypePropertiesPDBQT(mol) if channels.dtype == bool: # Calculate for each channel the atom sigmas sigmas = _getRadii(mol) channels = sigmas[:, np.newaxis] * channels.astype(float) N = None if usercenters is None: # Calculate the bbox and the number of voxels [bbm, bbM] = boundingBox(mol) bbm -= buffer bbM += buffer N = np.ceil((bbM - bbm) / voxelsize).astype(int) + 1 # Calculate grid centers centers = _getGridCenters(bbm, N, voxelsize) centers = centers.reshape(np.prod(N), 3) else: centers = usercenters # Calculate features if method.upper() == 'C': features = _getOccupancyC(mol.coords[:, :, mol.frame], centers, channels) elif method.upper() == 'CUDA': features = _getOccupancyCUDA(mol.coords[:, :, mol.frame], centers, channels) elif method.upper() == 'NUMBA': features = _getOccupancyNUMBA(mol.coords[:, :, mol.frame], centers, channels, 5) if N is None: return features, centers else: return features, centers, N
from htmd.home import home import os import numpy as np testf = os.path.join(home(), 'data', 'test-voxeldescriptors') resOcc, resCent, N = getVoxelDescriptors(Molecule(os.path.join(testf, '3ptb.pdbqt')), buffer=8, voxelsize=1) resOcc = resOcc.reshape(N[0], N[1], N[2], resOcc.shape[1]) refOcc = np.load(os.path.join(testf, '3PTB_occ.npy')) refCent = np.load(os.path.join(testf, '3PTB_center.npy')) assert np.allclose(resOcc, refOcc) assert np.allclose(resCent, refCent) import numpy as np from htmd.molecule.voxeldescriptors import _getOccupancyC, _getOccupancyCUDA centers = np.load(os.path.join(testf, '3PTB_centers_inp.npy')) coords = np.load(os.path.join(testf, '3PTB_coords_inp.npy')) sigmas = np.load(os.path.join(testf, '3PTB_channels_inp.npy')) centers = centers[::10, :].copy() res_C = _getOccupancyC(coords, centers, sigmas) # res_cuda = _getOccupancyCUDA(coords, centers, sigmas, 5) res_numba = _getOccupancyNUMBA(coords, centers, sigmas, 5) # assert np.abs(res_C - res_cuda).max() < 1e-4 assert np.abs(res_C - res_numba).max() < 1e-4