def __init__(self, sel1, sel2, numshells=4, shellwidth=3, pbc=True, gap=None, truncate=None): super().__init__() from moleculekit.projections.metricdistance import MetricDistance self.symmetrical = sel1 == sel2 self.metricdistance = MetricDistance( sel1=sel1, sel2=sel2, groupsel1=None, groupsel2=None, metric="distances", threshold=8, pbc=pbc, truncate=truncate, ) self.numshells = numshells self.shellwidth = shellwidth self.description = None self.shellcenters = None
def mygoalfunction(mol): distance_metric = MetricDistance( 'protein and resname HID and resid 42 and name CA', 'resname MOL and name C11') distance = distance_metric.project(mol) distance[distance < 20.0] = 1.0 print('THE PROJECTION VALUES:', distance) return -distance # or even 1/distance
def compute_salt_bridges(self): salts = [] [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] metr = MetricDistance('sidechain and acidic and element O', 'sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(self) mapping = metr.getMapping(self) if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures self.reps.add(sel='protein', style='NewCartoon', color=8) if mapping[data].atomIndexes.values.any(): for salt in mapping[data].atomIndexes.values: resid1 = self.get( "resid", sel=f"same residue as index {salt[0]}")[0] chain1 = self.get( "chain", sel=f"same residue as index {salt[0]}")[0] resid2 = self.get( "resid", sel=f"same residue as index {salt[1]}")[0] chain2 = self.get( "chain", sel=f"same residue as index {salt[1]}")[0] if [resid1, resid2] not in salts: salts.append({ "residues": [int(resid1), int(resid2)], "chain": [chain1, chain2] }) self.reps.add(f"protein and resid {resid1}", style="Licorice", color="1") self.reps.add(f"protein and resid {resid2}", style="Licorice", color="0") except: logger.error("Molecule has no basic or acidic residues") raise graph = make_graph_salts(salts) comp, _ = label_components(graph) if comp.a.size != 0: salts = add_networks_salts(graph, comp) else: logger.warning('No salt bridges present in the structure') return salts
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist( glob(join(home(dataDir="adaptive"), "data", "*", "")), glob(join(home(dataDir="adaptive"), "input", "*")), ) fsims = simfilter(sims, tempname(), "not water") metr = Metric(fsims) metr.set( MetricDistance( "protein and resid 10 and name CA", "resname BEN and noh", periodic="selections", metric="contacts", groupsel1="residue", threshold=4, ) ) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def setUpClass(self): from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join sims = simlist(glob(join(home(dataDir='adaptive'), 'data', '*', '')), glob(join(home(dataDir='adaptive'), 'input', '*'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set( MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) self.data1 = metr.project() metr.set(MetricDihedral()) self.data2 = metr.project()
def main(argv): inputfile = '' outputfile = '' try: opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="]) except getopt.GetoptError("usage:"): print('salt_bridges.py -i <inputfile> -o <outputfile>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('salt_bridges.py -i <inputfile> -o <outputfile>') sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-o", "--ofile"): outputfile = arg #1. Load molecule logger.info("Filtering and writing PDB") mol = filter_mol(inputfile) #2. Compute distances logger.info("Computing distances among all polar residues") metr = MetricDistance('chain A and sidechain and acidic and element O', 'chain A and sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(mol) except: logger.error("Molecule has no basic or acidic residues") raise if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures mapping = metr.getMapping(mol) #3. Write txt and vmd session out write_salt_bridges(data, mapping, mol, outputfile) inputfile_processed = f"{inputfile[:-4]}-chainA.pdb" postprocess_session(inputfile_processed, outputfile) logger.info("Saving VMD session")
def compute_salt_bridges(self): salts = [] [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] metr = MetricDistance('sidechain and acidic and element O', 'sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(self) except: logger.error("Molecule has no basic or acidic residues") raise if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures mapping = metr.getMapping(self) self.reps.add(sel='protein', style='NewCartoon', color=8) if mapping[data].atomIndexes.values.any(): for bond in mapping[data].atomIndexes.values: resid1 = self.get("resid", sel=f"same residue as index {bond[0]}")[0] resid2 = self.get("resid", sel=f"same residue as index {bond[1]}")[0] if [resid1, resid2] not in salts: salts.append([resid1, resid2]) self.reps.add(f"protein and resid {resid1}", style="Licorice", color="1") self.reps.add(f"protein and resid {resid2}", style="Licorice", color="0") else: logger.warning("No salt bridges found in this protein") return salts
def __init__( self, sel1, sel2, periodic, numshells=4, shellwidth=3, pbc=None, gap=None, truncate=None, ): super().__init__() if pbc is not None: raise DeprecationWarning( "The `pbc` option is deprecated please use the `periodic` option as described in MetricDistance." ) from moleculekit.projections.metricdistance import MetricDistance self.symmetrical = sel1 == sel2 self.metricdistance = MetricDistance( sel1=sel1, sel2=sel2, periodic=periodic, groupsel1=None, groupsel2=None, metric="distances", threshold=8, truncate=truncate, ) self.numshells = numshells self.shellwidth = shellwidth self.description = None self.shellcenters = None
def test_adaptive(self): from sklearn.cluster import MiniBatchKMeans from jobqueues.localqueue import LocalCPUQueue from moleculekit.projections.metricdistance import MetricDistance import numpy as np import random np.random.seed( 0) # Needed for the clustering to always give same results random.seed(0) md = AdaptiveBandit() md.app = LocalCPUQueue() md.generatorspath = 'generators' md.inputpath = 'input' md.datapath = 'data' md.coorname = 'input.coor' md.filter = True md.filtersel = 'all' md.clustmethod = MiniBatchKMeans md.projection = MetricDistance('protein resid 173 and name CA', 'resname BEN and name C1 C2 C3 C7', periodic='selections') md.ticadim = 2 md.nmin = 1 md.nmax = 2 md.nepochs = 9999 md.nframes = 1000000 md.reward_method = 'mean' md.exploration = 0.01 md.actionspace = 'tica' md.actionpool = 0 md.recluster = False md.save = True md.dryrun = True md.run()
if __name__ == "__main__": import htmd.home import os import shutil from htmd.util import tempname from moleculekit.projections.metricdistance import MetricDistance tmpdir = tempname() shutil.copytree(htmd.home.home() + '/data/adaptive/', tmpdir) os.chdir(tmpdir) md = AdaptiveMD() # md.dryrun = True md.nmin = 1 md.nmax = 2 md.nepochs = 3 md.ticalag = 2 md.ticadim = 3 md.updateperiod = 5 md.projection = MetricDistance('protein and name CA', 'resname BEN and noh') md.projection = [ MetricDistance('protein and name CA', 'resname BEN and noh'), MetricDistance('protein and name CA', 'resname BEN and noh') ] # md.generatorspath = htmd.home()+'/data/dhfr' # md.datapath = 'input' # md.app = AcemdLocal(inputfile='input.acemd') # md.app = AcemdLocal(datadir='data') # md.run() # Takes too long (2 minutes on 780).
# Calculating how many timescales are above the lag time to limit number of macrostates from pyemma.msm import timescales_msm timesc = timescales_msm(data.St.tolist(), lags=self.lag, nits=macronum).get_timescales() macronum = min(self.macronum, max(np.sum(timesc > self.lag), 2)) return macronum if __name__ == "__main__": import htmd.home import os import shutil from htmd.util import tempname from moleculekit.projections.metricdistance import MetricDistance tmpdir = tempname() shutil.copytree(htmd.home.home()+'/data/adaptive/', tmpdir) os.chdir(tmpdir) md = AdaptiveMD() # md.dryrun = True md.nmin = 1 md.nmax = 2 md.nepochs = 3 md.ticalag = 2 md.ticadim = 3 md.updateperiod = 5 md.projection = MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections') md.projection = [MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections'), MetricDistance('protein and name CA', 'resname BEN and noh', periodic='selections')]
class MetricShell(Projection): """ Calculates the density of atoms around other atoms. The MetricShell class calculates the density of a set of interchangeable atoms in concentric spherical shells around some other atoms. Thus it can treat identical molecules (like water or ions) and calculate summary values like the changes in water density around atoms. It produces a n-by-s dimensional vector where n the number of atoms in the first selection and s the number of shells around each of the n atoms. Parameters ---------- sel1 : str Atom selection string for the first set of atoms around which the shells will be calculated. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ sel2 : str Atom selection string for the second set of atoms whose density will be calculated in shells around `sel1`. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ numshells : int, optional Number of shells to use around atoms of `sel1` shellwidth : int, optional The width of each concentric shell in Angstroms pbc : bool, optional Set to false to disable distance calculations using periodic distances gap : int, optional Not functional yet truncate : float, optional Set all distances larger than `truncate` to `truncate` """ def __init__(self, sel1, sel2, numshells=4, shellwidth=3, pbc=True, gap=None, truncate=None): super().__init__() from moleculekit.projections.metricdistance import MetricDistance self.metricdistance = MetricDistance(sel1=sel1, sel2=sel2, groupsel1=None, groupsel2=None, metric='distances', threshold=8, pbc=pbc, truncate=truncate) self.numshells = numshells self.shellwidth = shellwidth self.description = None self.shellcenters = None def _calculateMolProp(self, mol, props='all'): props = ('shellcenters', 'map') if props == 'all' else props res = {} mapping = np.vstack(self.metricdistance.getMapping(mol).atomIndexes) if 'map' in props: res['map'] = mapping if 'shellcenters' in props: res['shellcenters'] = np.unique(mapping[:, 0]) return res def project(self, mol): """ Project molecule. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` A :class:`Molecule <moleculekit.molecule.Molecule>` object to project. kwargs : Do not use this argument. Only used for backward compatibility. Will be removed in later versions. Returns ------- data : np.ndarray An array containing the projected data. """ molprops = self._getMolProp(mol, 'all') distances = self.metricdistance.project(mol) if distances.ndim == 1: distances = distances[np.newaxis, :] return _shells(distances, molprops['map'][:, 0], molprops['shellcenters'], self.numshells, self.shellwidth) def getMapping(self, mol): """ Returns the description of each projected dimension. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object A Molecule object which will be used to calculate the descriptions of the projected dimensions. Returns ------- map : :class:`DataFrame <pandas.core.frame.DataFrame>` object A DataFrame containing the descriptions of each dimension """ shellcenters = self.metricdistance._getMolProp(mol, 'sel1') from pandas import DataFrame types = [] indexes = [] description = [] for i in np.where(shellcenters)[0]: for n in range(self.numshells): types += ['shell'] indexes += [i] description += [ 'Density of sel2 atoms in shell {}-{} A centered on atom {} {} {}' .format(n * self.shellwidth, (n + 1) * self.shellwidth, mol.resname[i], mol.resid[i], mol.name[i]) ] return DataFrame({ 'type': types, 'atomIndexes': indexes, 'description': description })
if __name__ == '__main__': from htmd.simlist import simlist, simfilter from glob import glob from htmd.projections.metric import Metric from moleculekit.projections.metricdistance import MetricDistance from moleculekit.projections.metricdihedral import MetricDihedral from moleculekit.util import tempname from htmd.home import home from os.path import join testfolder = home(dataDir='adaptive') sims = simlist(glob(join(testfolder, 'data', '*', '')), glob(join(testfolder, 'input', '*', 'structure.pdb'))) fsims = simfilter(sims, tempname(), 'not water') metr = Metric(fsims) metr.set(MetricDistance('protein and resid 10 and name CA', 'resname BEN and noh', metric='contacts', groupsel1='residue', threshold=4)) data1 = metr.project() metr.set(MetricDihedral()) data2 = metr.project() # Testing combining of metrics data1.combine(data2) # Testing dimensions assert np.array_equal(data1.description.shape, (897, 3)), 'combine not working correct' assert np.array_equal(data1.trajectories[0].projection.shape, (6, 897)), 'combine not working correct' assert np.array_equal(np.where(data1.description.type == 'contact')[0], [0, 1, 2, 3, 4, 5, 6, 7, 8]), 'combine not working correct' # Testing dimension dropping / keeping datatmp = data1.copy() data1.dropDimensions(range(9))
def rmsdgoal(proj): return -proj # Lower RMSDs should give higher score tmpdir = tempname() shutil.copytree(htmd.home.home() + "/data/adaptive/", tmpdir) os.chdir(tmpdir) md = AdaptiveGoal() md.dryrun = True md.nmin = 1 md.nmax = 2 md.nepochs = 3 md.ticalag = 2 md.ticadim = 3 md.updateperiod = 5 md.projection = MetricDistance("protein and name CA", "resname BEN and noh", periodic="selections") # md.goalprojection = MetricRmsd(Molecule(htmd.home() + '/data/adaptive/generators/1/structure.pdb'), # 'protein and name CA') md.goalfunction = rmsdgoal # md.app = LocalGPUQueue() # md.run() # Some real testing now from moleculekit.projections.metricsecondarystructure import ( MetricSecondaryStructure, ) from moleculekit.projections.metricdistance import MetricSelfDistance os.chdir(path.join(home(), "data", "test-adaptive")) goalProjectionDict = {
class MetricShell(Projection): """Calculates the density of atoms around other atoms. The MetricShell class calculates the density of a set of interchangeable atoms in concentric spherical shells around some other atoms. Thus it can treat identical molecules (like water or ions) and calculate summary values like the changes in water density around atoms. It produces a n-by-s dimensional vector where n the number of atoms in the first selection and s the number of shells around each of the n atoms. Parameters ---------- sel1 : str Atom selection string for the first set of atoms around which the shells will be calculated. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ sel2 : str Atom selection string for the second set of atoms whose density will be calculated in shells around `sel1`. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ periodic : str See the documentation of MetricDistance class for options. numshells : int, optional Number of shells to use around atoms of `sel1` shellwidth : int, optional The width of each concentric shell in Angstroms gap : int, optional Not functional yet truncate : float, optional Set all distances larger than `truncate` to `truncate` """ def __init__( self, sel1, sel2, periodic, numshells=4, shellwidth=3, pbc=None, gap=None, truncate=None, ): super().__init__() if pbc is not None: raise DeprecationWarning( "The `pbc` option is deprecated please use the `periodic` option as described in MetricDistance." ) from moleculekit.projections.metricdistance import MetricDistance self.symmetrical = sel1 == sel2 self.metricdistance = MetricDistance( sel1=sel1, sel2=sel2, periodic=periodic, groupsel1=None, groupsel2=None, metric="distances", threshold=8, truncate=truncate, ) self.numshells = numshells self.shellwidth = shellwidth self.description = None self.shellcenters = None def _calculateMolProp(self, mol, props="all"): props = ( ("map", "shellcenters", "shelledges", "shellvol") if props == "all" else props ) res = {} mapping = np.vstack(self.metricdistance.getMapping(mol).atomIndexes) if "map" in props: res["map"] = mapping if "shellcenters" in props: res["shellcenters"] = ( np.unique(mapping[:, 0]) if not self.symmetrical else np.unique(mapping) ) if "shelledges" in props: res["shelledges"] = np.arange( self.shellwidth * (self.numshells + 1), step=self.shellwidth ) if "shellvol" in props: res["shellvol"] = ( 4 / 3 * np.pi * (res["shelledges"][1:] ** 3 - res["shelledges"][:-1] ** 3) ) return res def project(self, mol): """Project molecule. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` A :class:`Molecule <moleculekit.molecule.Molecule>` object to project. kwargs : Do not use this argument. Only used for backward compatibility. Will be removed in later versions. Returns ------- data : np.ndarray An array containing the projected data. """ molprops = self._getMolProp(mol, "all") distances = self.metricdistance.project(mol) if distances.ndim == 1: distances = distances[np.newaxis, :] return _shells( distances, molprops["map"], molprops["shellcenters"], self.numshells, molprops["shelledges"], molprops["shellvol"], self.symmetrical, ) def getMapping(self, mol): """Returns the description of each projected dimension. Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object A Molecule object which will be used to calculate the descriptions of the projected dimensions. Returns ------- map : :class:`DataFrame <pandas.core.frame.DataFrame>` object A DataFrame containing the descriptions of each dimension """ shellcenters = self._getMolProp(mol, "shellcenters") from pandas import DataFrame types = [] indexes = [] description = [] for i in shellcenters: for n in range(self.numshells): types += ["shell"] indexes += [i] description += [ "Density of sel2 atoms in shell {}-{} A centered on atom {} {} {}".format( n * self.shellwidth, (n + 1) * self.shellwidth, mol.resname[i], mol.resid[i], mol.name[i], ) ] return DataFrame( {"type": types, "atomIndexes": indexes, "description": description} )