def kabsch_sander(traj): """Compute the Kabsch-Sander hydrogen bond energy between each pair of residues in every frame. Hydrogen bonds are defined using an electrostatic definition, assuming partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide hydrogen respectively, their opposites assigned to the carbonyl carbon and amide nitrogen. A hydrogen bond is identified if E in the following equation is less than -0.5 kcal/mol: .. math:: E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\ (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN}) Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. Returns ------- matrices : list of scipy.sparse.csr_matrix The return value is a list of length equal to the number of frames in the trajectory. Each element is an n_residues x n_residues sparse matrix, where the existence of an entry at row `i`, column `j` with value `x` means that there exists a hydrogen bond between a backbone CO group at residue `i` with a backbone NH group at residue `j` whose Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for existence of the "bond"). The exact value of the energy is given by the value `x`. See Also -------- wernet_nilsson, baker_hubbard References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') import scipy.sparse xyz, nco_indices, ca_indices, proline_indices = _prep_kabsch_sander_arrays( traj) n_residues = len(ca_indices) hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32) henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32) hbonds.fill(-1) henergies.fill(np.nan) _geometry._kabsch_sander(xyz, nco_indices, ca_indices, proline_indices, hbonds, henergies) # The C code returns its info in a pretty inconvenient format. # Let's change it to a list of scipy CSR matrices. matrices = [] hbonds_mask = (hbonds != -1) for i in range(xyz.shape[0]): # appologies for this cryptic code -- we need to deal with the low # level aspects of the csr matrix format. hbonds_frame = hbonds[i] mask = hbonds_mask[i] henergies_frame = henergies[i] indptr = np.zeros(n_residues + 1, np.int32) indptr[1:] = np.cumsum(mask.sum(axis=1)) indices = hbonds_frame[mask].flatten() data = henergies_frame[mask].flatten() matrices.append( scipy.sparse.csr_matrix((data, indices, indptr), shape=(n_residues, n_residues)).T) return matrices
def kabsch_sander(traj): """Compute the Kabsch-Sander hydrogen bond energy between each pair of residues in every frame. Hydrogen bonds are defined using an electrostatic definition, assuming partial charges of -0.42 e and +0.20 e to the carbonyl oxygen and amide hydrogen respectively, their opposites assigned to the carbonyl carbon and amide nitrogen. A hydrogen bond is identified if E in the following equation is less than -0.5 kcal/mol: .. math:: E = 0.42 \cdot 0.2 \cdot 33.2 kcal/(mol \cdot nm) * \\ (1/r_{ON} + 1/r_{CH} - 1/r_{OH} - 1/r_{CN}) Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. Returns ------- matrices : list of scipy.sparse.csr_matrix The return value is a list of length equal to the number of frames in the trajectory. Each element is an n_residues x n_residues sparse matrix, where the existence of an entry at row `i`, column `j` with value `x` means that there exists a hydrogen bond between a backbone CO group at residue `i` with a backbone NH group at residue `j` whose Kabsch-Sander energy is less than -0.5 kcal/mol (the threshold for existence of the "bond"). The exact value of the energy is given by the value `x`. See Also -------- baker_hubbard References ---------- .. [1] Kabsch W, Sander C (1983). "Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features". Biopolymers 22 (12): 2577-637. dio:10.1002/bip.360221211 """ if traj.topology is None: raise ValueError('kabsch_sander requires topology') import scipy.sparse xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) ca_indices, nco_indices = [], [] for residue in traj.topology.residues: if residue.name == 'PRO': ca_indices.append(-1) else: ca_indices.append([a.index for a in residue.atoms if a.name == 'CA'][0]) n = [a.index for a in residue.atoms if a.name == 'N'][0] c = [a.index for a in residue.atoms if a.name == 'C'][0] o = [a.index for a in residue.atoms if a.name == 'O'][0] nco_indices.append([n, c, o]) nco_indices = np.array(nco_indices, np.int32) ca_indices = np.array(ca_indices, np.int32) n_residues = len(ca_indices) hbonds = np.empty((xyz.shape[0], n_residues, 2), np.int32) henergies = np.empty((xyz.shape[0], n_residues, 2), np.float32) hbonds.fill(-1) henergies.fill(np.nan) _geometry._kabsch_sander(xyz, nco_indices, ca_indices, hbonds, henergies) # The C code returns its info in a pretty inconvenient format. # Let's change it to a list of scipy CSR matrices. matrices = [] hbonds_mask = (hbonds != -1) for i in range(xyz.shape[0]): # appologies for this cryptic code -- we need to deal with the low # level aspects of the csr matrix format. hbonds_frame = hbonds[i] mask = hbonds_mask[i] henergies_frame = henergies[i] indptr = np.zeros(n_residues + 1, np.int32) indptr[1:] = np.cumsum(mask.sum(axis=1)) indices = hbonds_frame[mask].flatten() data = henergies_frame[mask].flatten() matrices.append(scipy.sparse.csr_matrix((data, indices, indptr), shape=(n_residues, n_residues))) return matrices