def calc_contact_order(chimera: Chimera = None, filename: str = None, diss_cutoff: int = 8): """ The contact order of a protein is a measure of the locality of the inter-amino acid contacts in the native folded state. It is computed as the average seqeuence distance between residues that form contacts below a threshold in the folded protein divided by the total length of the protein" :param chimera: A Chimera object with n residues. :param filename: path to a pdb file :param diss_cutoff: The maximum distance in Armstrong between two residues to be in contact, default 8 Angstroms :return: the contact order (%) """ if chimera and filename: raise ValueError("Only a Chimera object or the path to a pdb file must be specified") if not chimera and not filename: raise ValueError("At least a Chimera object or the path to a pdb file must be specified") if filename: chimera = Chimera(filename=filename) chimera.renumberResidues() metr = MetricSelfDistance("protein and noh", groupsel="residue", metric="contacts", threshold=diss_cutoff, pbc=False) a = metr.project(chimera) mapping = metr.getMapping(chimera) matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes) triang = np.triu(matrix) idx1, idx2 = np.where(triang) total_contacts = len(idx1) total_residues = chimera.numResidues summation = np.sum(idx2 - idx1) co = 1 / (total_contacts * total_residues) * summation print(f"Contact order is {co*100} %") return co * 100
def getCrystalCO(crystal): crystalCO = (MetricSelfDistance("protein and name CA", metric="contacts", threshold=10, pbc=False).project(crystal).flatten()) longCO = getLongContacts(crystal) return crystalCO & longCO
def getCrystalCO(crystal): crystalCO = MetricSelfDistance('protein and name CA', metric='contacts', threshold=10, pbc=False).project(crystal).flatten() longCO = getLongContacts(crystal) return crystalCO & longCO
def getLongContacts(crystal, long=8): crystalMap = MetricSelfDistance("protein and name CA", metric="contacts", threshold=10, pbc=False).getMapping(crystal) indexes = np.vstack(crystalMap.atomIndexes.values) return crystal.resid[indexes[:, 1]] - crystal.resid[indexes[:, 0]] > long
def calc_dist_matrix(chimera: Chimera = None, filename: str = None, selection: str = 'residue', type='contacts', plot=False): """ Returns a matrix of C-alpha distances for a given pdb :param chimera: A Chimera object with n residues. :param filename: path to a pdb file :param selection: How to compute the distance. 'residue' (the closest two :param type: between contacts (contact map when distances are below 8 armstrongs) or distances atoms between two residues) or 'alpha' distance of the alpha carbons. :param plot: whether to plot the distance matrix. Default is False :return: matrix. np.array. An n by n distance matrix. """ if chimera and filename: raise ValueError("Only a Chimera object or the path to a pdb file must be specified") if not chimera and not filename: raise ValueError("At least a Chimera object or the path to a pdb file must be specified") if filename: chimera = Chimera(filename=filename) if selection == 'residue': metr = MetricSelfDistance("protein", groupsel="residue", metric="distances", pbc=False) mapping = metr.getMapping(chimera) a = metr.project(chimera) matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes) elif selection == 'alpha': metr = MetricSelfDistance("protein and name CA", metric="distances", pbc=False) a = metr.project(chimera) mapping = metr.getMapping(chimera) matrix, _, _ = contactVecToMatrix(a, mapping.atomIndexes) else: raise ValueError("Specify a selection type: 'residue' or 'atom'") if type == "contacts": matrix = matrix < 8 elif type != "contacts" and type != "distances": raise ValueError("Please select contact type between 'contacts' or distances") if plot: fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cmap = 'binary' cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower") if type == 'distances': cmap = 'gist_rainbow' cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower") cbar = fig.colorbar(cax, cmap=matplotlib.cm.get_cmap(cmap)) plt.xlabel('xlabel', fontsize=24) plt.ylabel('ylabel', fontsize=24) plt.xticks(fontsize=22) plt.yticks(fontsize=22) plt.xlabel("Residue index") plt.ylabel("Residue index") return matrix
md.projection = MetricDistance('protein and name CA', 'resname BEN and noh') # md.goalprojection = MetricRmsd(Molecule(htmd.home() + '/data/adaptive/generators/1/structure.pdb'), # 'protein and name CA') md.goalfunction = rmsdgoal # md.app = LocalGPUQueue() # md.run() # Some real testing now from moleculekit.projections.metricsecondarystructure import MetricSecondaryStructure from moleculekit.projections.metricdistance import MetricSelfDistance import numpy as np os.chdir(path.join(home(), 'data', 'test-adaptive')) goalProjectionDict = {'ss': MetricSecondaryStructure(), 'contacts': MetricSelfDistance('protein and name CA', metric='contacts', threshold=10), 'ss_contacts': [MetricSecondaryStructure(), MetricSelfDistance('protein and name CA', metric='contacts', threshold=10)]} def getLongContacts(crystal, long=8): crystalMap = MetricSelfDistance('protein and name CA', metric='contacts', threshold=10, pbc=False).getMapping( crystal) indexes = np.vstack(crystalMap.atomIndexes.values) return crystal.resid[indexes[:, 1]] - crystal.resid[indexes[:, 0]] > long def getCrystalSS(crystal): return MetricSecondaryStructure().project(crystal)[0].flatten() def getCrystalCO(crystal): crystalCO = MetricSelfDistance('protein and name CA', metric='contacts', threshold=10, pbc=False).project( crystal).flatten()
return datatica if __name__ == '__main__': from htmd.simlist import simlist from glob import glob from moleculekit.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir='villin') sims = simlist(glob(join(testfolder, '*', '')), join(testfolder, 'filtered.pdb')) met = Metric(sims[0:2]) met.set(MetricSelfDistance('protein and name CA')) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units='ns', dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [[3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956]] assert np.allclose(np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)), rtol=0, atol=0.01) assert np.allclose(np.abs(datatica5.trajectories[0].projection[-3:, -3:]),
# md.app = LocalGPUQueue() # md.run() # Some real testing now from moleculekit.projections.metricsecondarystructure import ( MetricSecondaryStructure, ) from moleculekit.projections.metricdistance import MetricSelfDistance os.chdir(path.join(home(), "data", "test-adaptive")) goalProjectionDict = { "ss": MetricSecondaryStructure(), "contacts": MetricSelfDistance("protein and name CA", metric="contacts", threshold=10), "ss_contacts": [ MetricSecondaryStructure(), MetricSelfDistance("protein and name CA", metric="contacts", threshold=10), ], } def getLongContacts(crystal, long=8): crystalMap = MetricSelfDistance("protein and name CA", metric="contacts", threshold=10, pbc=False).getMapping(crystal) indexes = np.vstack(crystalMap.atomIndexes.values)
return datatica if __name__ == "__main__": from htmd.simlist import simlist from glob import glob from moleculekit.projections.metricdistance import MetricSelfDistance from htmd.home import home from os.path import join testfolder = home(dataDir="villin") sims = simlist(glob(join(testfolder, "*", "")), join(testfolder, "filtered.pdb")) met = Metric(sims[0:2]) met.set(MetricSelfDistance("protein and name CA")) data = met.project() data.fstep = 0.1 tica = TICA(data, 2, dimensions=range(2, 10)) datatica = tica.project(2) tica5 = TICA(data, 0.2, units="ns", dimensions=range(2, 10)) datatica5 = tica5.project(2) expected = [ [3.69098878, -0.33862674, 0.85779184], [3.77816105, -0.31887317, 0.87724227], [3.83537507, -0.11878026, 0.65236956], ] assert np.allclose( np.abs(datatica.trajectories[0].projection[-3:, -3:]), np.abs(np.array(expected, dtype=np.float32)),