def buildSCAMatrix(msa, turbo=True, **kwargs): """Returns SCA matrix calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Implementation is case insensitive and handles ambiguous amino acids as follows: * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn) * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln) * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu) * **X** (Xaa) count is allocated to the twenty standard amino acids * Joint probability of observing a pair of ambiguous amino acids is allocated to all potential combinations, e.g. probability of **XX** is allocated to 400 combinations of standard amino acids, similarly probability of **XB** is allocated to 40 combinations of *D* and *N* with the standard amino acids. Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered as distinct amino acids. When *ambiguity* is set **False**, all alphabet characters as considered as distinct types. All non-alphabet characters are considered as gaps.""" msa = getMSA(msa) if msa.shape[0] < 100: LOGGER.warning( 'SCA performs the best with higher number of sequences, and ' 'minimal number of sequences is recommended as 100.') from .msatools import msasca LOGGER.timeit('_sca') length = msa.shape[1] sca = zeros((length, length), float) sca = msasca(msa, sca, turbo=bool(turbo)) LOGGER.report('SCA matrix was calculated in %.2fs.', '_sca') return sca
def pathPDBMirror(path=None, format=None): """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`. To release the current mirror, pass an invalid path, e.g. ``path=''``. If you are keeping a partial mirror, such as PDB files in :file:`/data/structures/divided/pdb/` folder, specify *format*, which is ``'pdb'`` in this case.""" if path is None: path = SETTINGS.get('pdb_mirror_path') format = SETTINGS.get('pdb_mirror_format', None) if path: if isdir(path): if format is None: return path else: return path, format else: LOGGER.warning( 'PDB mirror path {0} is not a accessible.'.format( repr(path))) else: if isdir(path): path = abspath(path) LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path))) SETTINGS['pdb_mirror_path'] = path SETTINGS['pdb_mirror_format'] = format SETTINGS.save() else: current = SETTINGS.pop('pdb_mirror_path') if current: LOGGER.info('PDB mirror {0} is released.'.format( repr(current))) SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(path)))
def resetTicks(x, y=None): """Reset X (and Y) axis ticks using values in given *array*. Ticks in the current figure should not be fractional values for this function to work as expected.""" import matplotlib.pyplot as plt if x is not None: try: xticks = plt.xticks()[0] xlist = list(xticks.astype(int)) if xlist[-1] > len(x): xlist.pop() if xlist: xlist = list(x[xlist]) plt.xticks(xticks, xlist + [''] * (len(xticks) - len(xlist))) except: LOGGER.warning('xticks could not be reset.') if y is not None: try: yticks = plt.yticks()[0] ylist = list(yticks.astype(int)) if ylist[-1] > len(y): ylist.pop() if ylist: ylist = list(y[ylist]) plt.yticks(yticks, ylist + [''] * (len(yticks) - len(ylist))) except: LOGGER.warning('xticks could not be reset.')
def alignCoordsets(atoms, weights=None): """Returns *atoms* after superposing coordinate sets onto its active coordinate set. Transformations will be calculated for *atoms* and applied to its :class:`.AtomGroup`, when applicable. Optionally, atomic *weights* can be passed for weighted superposition.""" try: acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets() except AttributeError: raise TypeError('atoms must have type Atomic, not {0}' .format(type(atoms))) if n_csets < 2: LOGGER.warning('{0} contains fewer than two coordinate sets, ' 'alignment was not performed.'.format(str(atoms))) return try: ag = atoms.getAtomGroup() except AttributeError: ag = atoms agacsi = ag.getACSIndex() tar = atoms._getCoords() for i in range(n_csets): if i == acsi: continue atoms.setACSIndex(i) ag.setACSIndex(i) calcTransformation(atoms, tar, weights).apply(ag) atoms.setACSIndex(acsi) ag.setACSIndex(agacsi) return atoms
def showContactMap(enm, *args, **kwargs): """Show Kirchhoff matrix using :func:`~matplotlib.pyplot.spy`. .. plot:: :context: :include-source: p38_gnm = GNM('p38') p38_gnm.buildKirchhoff( p38_structure ) plt.figure(figsize=(4,4)) showContactMap( p38_gnm ) .. plot:: :context: :nofigs: plt.close('all')""" import matplotlib.pyplot as plt if not isinstance(enm, GNMBase): raise TypeError('model argument must be an ENM instance') kirchhoff = enm.getKirchhoff() if kirchhoff is None: LOGGER.warning('kirchhoff matrix is not set') return None show = plt.spy(kirchhoff, *args, **kwargs) plt.title('{0:s} contact map'.format(enm.getTitle())) plt.xlabel('Residue index') plt.ylabel('Residue index') return show
def pathVMD(*path): """Returns VMD path, or set it to be a user specified *path*.""" if not path: path = SETTINGS.get('vmd', None) if isExecutable(path): return path else: LOGGER.warning('VMD path is not set by user, looking for it.') vmdbin = None vmddir = None if PLATFORM == 'Windows': if PY3K: import winreg else: import _winreg as winreg # PY3K: OK for vmdversion in ('1.8.7', '1.9', '1.9.1'): try: key = winreg.OpenKey( winreg.HKEY_LOCAL_MACHINE, 'Software\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass try: key = winreg.OpenKey( winreg.HKEY_LOCAL_MACHINE, 'Software\\WOW6432node\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass else: vmdbin = which('vmd') if False: pipe = os.popen('which vmd') vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith('defaultvmddir='): vmddir = line.split('=')[1].replace('"', '') break vmdfile.close() if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin elif len(path) == 1: path = path[0] if isExecutable(path): SETTINGS['vmd'] = path SETTINGS.save() LOGGER.info("VMD path is set to '{0}'.".format(path)) else: raise OSError('{0} is not executable.'.format(str(path))) else: raise ValueError('specify a single path string')
def loadPDBClusters(sqid=None): """Load previously fetched PDB sequence clusters from disk to memory.""" PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters') if sqid is None: sqid_list = list(PDB_CLUSTERS) LOGGER.info('Loading all PDB sequence clusters.') else: assert isinstance(sqid, int), 'sqid must be an integer' if sqid not in PDB_CLUSTERS: raise ValueError('PDB cluster data is not available for sequence ' 'identity {0}%, try one of {1}'.format( sqid, PDB_CLUSTERS_SQID_STR)) LOGGER.info('Loading PDB sequence clusters for sequence identity ' '{0}.'.format(sqid)) sqid_list = [sqid] global PDB_CLUSTERS_UPDATE_WARNING for sqid in sqid_list: filename = os.path.join(PDB_CLUSTERS_PATH, 'bc-{0}.out.gz'.format(sqid)) if not os.path.isfile(filename): fetchPDBClusters(sqid) if PDB_CLUSTERS_UPDATE_WARNING: import time diff = (time.time() - os.path.getmtime(filename)) / 604800. if diff > 1.: LOGGER.warning( 'PDB sequence clusters are {0:.1f} week(s) old,' ' call `fetchPDBClusters` to receive updates.'.format( diff)) PDB_CLUSTERS_UPDATE_WARNING = False inp = openFile(filename) PDB_CLUSTERS[sqid] = inp.read() inp.close()
def __add__(self, other): """Returns an :class:`.AtomMap` instance. Order of pointed atoms are preserved.""" try: ag = other.getAtomGroup() except AttributeError: raise TypeError('unsupported operand type(s) for +: {0} and ' '{1}'.format(repr(type(self).__name__), repr(type(other).__name__))) if ag != self._ag: raise ValueError('AtomPointer instances must point to the same ' 'AtomGroup instance') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warning('Active coordset indices of atoms are not the same.' ' Result will have ACSI {0}.'.format(acsi)) title = '({0}) + ({1})'.format(str(self), str(other)) indices = concatenate([self._getIndices(), other._getIndices()]) dummies = 0 try: dummies += self.numDummies() except AttributeError: pass try: dummies += other.numDummies() except AttributeError: pass return AtomMap(ag, indices, acsi, title=title, intarrays=True, dummies=dummies)
def getCoordsets(self, indices=None): """Returns coordinate sets at given *indices*. *indices* may be an integer, a list of integers or **None**. **None** returns all coordinate sets.""" if self._closed: raise ValueError('I/O operation on closed file') if (self._indices is None and (indices is None or indices == slice(None))): nfi = self._nfi self.reset() n_floats = self._n_floats + self._unitcell * 14 n_atoms = self._n_atoms n_csets = self._n_csets data = self._file.read(self._itemsize * n_floats * n_csets) data = fromstring(data, self._dtype) if len(data) > n_floats * n_csets: n_csets = len(data) / n_floats data = data[:n_csets] LOGGER.warning('DCD is corrupt, {0} out of {1} frames ' 'were parsed.'.format(n_csets, self._n_csets)) data = data.reshape((n_csets, n_floats)) if self._unitcell: data = data[:, 14:] data = data.reshape((n_csets, 3, n_atoms + 2)) data = data[:, :, 1:-1] data = data.transpose(0, 2, 1) self.goto(nfi) if self._astype is not None and self._astype != data.dtype: data = data.astype(self._astype) return data else: return TrajFile.getCoordsets(self, indices)
def calcShannonEntropy(msa, ambiguity=True, omitgaps=True, **kwargs): """Returns Shannon entropy array calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Implementation is case insensitive and handles ambiguous amino acids as follows: * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn) * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln) * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu) * **X** (Xaa) count is allocated to the twenty standard amino acids Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered as distinct amino acids. When *ambiguity* is set **False**, all alphabet characters as considered as distinct types. All non-alphabet characters are considered as gaps, and they are handled in two ways: * non-existent, the probability of observing amino acids in a given column is adjusted, by default * as a distinct character with its own probability, when *omitgaps* is **False**""" msa = getMSA(msa) length = msa.shape[1] if msa.shape[0] < 100: LOGGER.warning( "SCA performs the best with higher number of sequences, and " "minimal number of sequences is recommended as 100." ) entropy = empty(length, float) from .msatools import msaentropy return msaentropy(msa, entropy, ambiguity=bool(ambiguity), omitgaps=bool(omitgaps))
def alignCoordsets(atoms, weights=None): """Return *atoms* after superposing coordinate sets onto its active coordinate set. Transformations will be calculated for *atoms* and applied to its :class:`.AtomGroup`, when applicable. Optionally, atomic *weights* can be passed for weighted superposition.""" try: acsi, n_csets = atoms.getACSIndex(), atoms.numCoordsets() except AttributeError: raise TypeError('atoms must have type Atomic, not {0:s}' .format(type(atoms))) if n_csets < 2: LOGGER.warning('{0:s} contains fewer than two coordinate sets, ' 'alignment was not performed.'.format(str(atoms))) return try: ag = atoms.getAtomGroup() except AttributeError: ag = atoms agacsi = ag.getACSIndex() tar = atoms._getCoords() for i in range(n_csets): if i == acsi: continue atoms.setACSIndex(i) ag.setACSIndex(i) calcTransformation(atoms, tar, weights).apply(ag) atoms.setACSIndex(acsi) ag.setACSIndex(agacsi) return atoms
def __and__(self, other): if self is other: return self if not isinstance(other, AtomPointer): raise TypeError('other must be an AtomPointer') if self._ag != other.getAtomGroup(): raise ValueError('both selections must be from the same AtomGroup') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warning('active coordinate set indices do not match, ' 'so it will be set to zero in the union.') acsi = 0 acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warn('Active coordinate set indices do not match, it will ' 'be set to zero.') acsi = 0 indices = set(self._getIndices()) indices = indices.intersection(other.getIndices()) if indices: indices = np.unique(indices) return Selection(self._ag, indices, '({0:s}) and ({1:s})'.format( self.getSelstr(), other.getSelstr()), acsi)
def calcShannonEntropy(msa, ambiguity=True, omitgaps=True, **kwargs): """Returns Shannon entropy array calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Implementation is case insensitive and handles ambiguous amino acids as follows: * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn) * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln) * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu) * **X** (Xaa) count is allocated to the twenty standard amino acids Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered as distinct amino acids. When *ambiguity* is set **False**, all alphabet characters as considered as distinct types. All non-alphabet characters are considered as gaps, and they are handled in two ways: * non-existent, the probability of observing amino acids in a given column is adjusted, by default * as a distinct character with its own probability, when *omitgaps* is **False**""" msa = getMSA(msa) length = msa.shape[1] if msa.shape[0] < 100: LOGGER.warning( 'SCA performs the best with higher number of sequences, and ' 'minimal number of sequences is recommended as 100.') entropy = empty(length, float) from .msatools import msaentropy return msaentropy(msa, entropy, ambiguity=bool(ambiguity), omitgaps=bool(omitgaps))
def loadPDBClusters(sqid=None): """Load previously fetched PDB sequence clusters from disk to memory.""" PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters') if sqid is None: sqid_list = list(PDB_CLUSTERS) LOGGER.info('Loading all PDB sequence clusters.') else: assert isinstance(sqid, int), 'sqid must be an integer' if sqid not in PDB_CLUSTERS: raise ValueError('PDB cluster data is not available for sequence ' 'identity {0}%, try one of {1}' .format(sqid, PDB_CLUSTERS_SQID_STR)) LOGGER.info('Loading PDB sequence clusters for sequence identity ' '{0}.'.format(sqid)) sqid_list = [sqid] global PDB_CLUSTERS_UPDATE_WARNING for sqid in sqid_list: filename = os.path.join(PDB_CLUSTERS_PATH, 'bc-{0}.out.gz'.format(sqid)) if not os.path.isfile(filename): fetchPDBClusters(sqid) if PDB_CLUSTERS_UPDATE_WARNING: import time diff = (time.time() - os.path.getmtime(filename)) / 604800. if diff > 1.: LOGGER.warning('PDB sequence clusters are {0:.1f} week(s) old,' ' call `fetchPDBClusters` to receive updates.' .format(diff)) PDB_CLUSTERS_UPDATE_WARNING = False inp = openFile(filename) PDB_CLUSTERS[sqid] = inp.read() inp.close()
def pathPDBMirror(path=None, format=None): """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`. To release the current mirror, pass an invalid path, e.g. ``path=''``. If you are keeping a partial mirror, such as PDB files in :file:`/data/structures/divided/pdb/` folder, specify *format*, which is ``'pdb'`` in this case.""" if path is None: path = SETTINGS.get('pdb_mirror_path') format = SETTINGS.get('pdb_mirror_format', None) if path: if isdir(path): if format is None: return path else: return path, format else: LOGGER.warning('PDB mirror path {0} is not a accessible.' .format(repr(path))) else: if isdir(path): path = abspath(path) LOGGER.info('Local PDB mirror path is set: {0}' .format(repr(path))) SETTINGS['pdb_mirror_path'] = path SETTINGS['pdb_mirror_format'] = format SETTINGS.save() else: current = SETTINGS.pop('pdb_mirror_path') if current: LOGGER.info('PDB mirror {0} is released.' .format(repr(current))) SETTINGS.save() else: raise IOError('{0} is not a valid path.'.format(repr(path)))
def __and__(self, other): if self is other: return self try: ag = other.getAtomGroup() except AttributeError: raise TypeError('other must be an AtomPointer') if self._ag != ag: raise ValueError('both selections must be from the same AtomGroup') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warning('active coordinate set indices do not match, ' 'so it will be set to zero in the union.') acsi = 0 acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warn('Active coordinate set indices do not match, it will ' 'be set to zero.') acsi = 0 indices = set(self._getIndices()) indices = indices.intersection(other.getIndices()) if indices: indices = unique(indices) if indices[-1] == atommap.DUMMY: indices = indices[:-1] return Selection(self._ag, indices, '({0}) and ({1})' .format(self.getSelstr(), other.getSelstr()), acsi)
def getCoordsets(self, indices=None): """Returns coordinate sets at given *indices*. *indices* may be an integer, a list of integers or ``None``. ``None`` returns all coordinate sets.""" if self._closed: raise ValueError('I/O operation on closed file') if (self._indices is None and (indices is None or indices == slice(None))): nfi = self._nfi self.reset() n_floats = self._n_floats + self._unitcell * 14 n_atoms = self._n_atoms n_csets = self._n_csets data = np.fromfile(self._file, self._dtype, n_floats * n_csets) if len(data) > n_floats * n_csets: n_csets = len(data)/n_floats data = data[:n_csets] LOGGER.warning('DCD is corrupt, {0:d} out of {1:d} frames ' 'were parsed.'.format(n_csets, self._n_csets)) data = data.reshape((n_csets, n_floats)) if self._unitcell: data = data[:, 14:] data = data.reshape((n_csets, 3, n_atoms+2)) data = data[:, :, 1:-1] data = data.transpose(0, 2, 1) self.goto(nfi) if self._astype is not None and self._astype != data.dtype: data = data.astype(self._astype) return data else: return TrajFile.getCoordsets(self, indices)
def buildSCAMatrix(msa, turbo=True, **kwargs): """Returns SCA matrix calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Implementation is case insensitive and handles ambiguous amino acids as follows: * **B** (Asx) count is allocated to *D* (Asp) and *N* (Asn) * **Z** (Glx) count is allocated to *E* (Glu) and *Q* (Gln) * **J** (Xle) count is allocated to *I* (Ile) and *L* (Leu) * **X** (Xaa) count is allocated to the twenty standard amino acids * Joint probability of observing a pair of ambiguous amino acids is allocated to all potential combinations, e.g. probability of **XX** is allocated to 400 combinations of standard amino acids, similarly probability of **XB** is allocated to 40 combinations of *D* and *N* with the standard amino acids. Selenocysteine (**U**, Sec) and pyrrolysine (**O**, Pyl) are considered as distinct amino acids. When *ambiguity* is set **False**, all alphabet characters as considered as distinct types. All non-alphabet characters are considered as gaps.""" msa = getMSA(msa) if msa.shape[0]<100: LOGGER.warning('SCA performs the best with higher number of sequences, and ' 'minimal number of sequences is recommended as 100.') from .msatools import msasca LOGGER.timeit('_sca') length = msa.shape[1] sca = zeros((length, length), float) sca = msasca(msa, sca, turbo=bool(turbo)) LOGGER.report('SCA matrix was calculated in %.2fs.', '_sca') return sca
def getLigandResidueIndices(self): 'Returns residue indices of the residues interacting with ligands.' if self._lig: return self._ligres_idx else: LOGGER.warning('No ligand provided.')
def getLigandResidueESSAZscores(self): 'Returns ESSA z-scores of the residues interacting with ligands as a dictionary. The keys of which are the corresponding chain ids and residue numbers of the ligands. Each value comprises the indices of the residue ESSA z-scores in the profile and the corresponding scores as separate arrays.' if self._lig: return self._zs_lig else: LOGGER.warning('No ligand provided.')
def getLigandResidueCodes(self): 'Returns chain ids and residue numbers of the residues interacting with ligands.' if self._lig: return self._ligres_code else: LOGGER.warning('No ligand provided.')
def pathVMD(*path): """Return VMD path, or set it to be a user specified *path*.""" if not path: path = SETTINGS.get('vmd', None) if isExecutable(path): return path else: LOGGER.warning('VMD path is not set by user, looking for it.') vmdbin = None vmddir = None if PLATFORM == 'Windows': if PY3K: import winreg else: import _winreg as winreg # PY3K: OK for vmdversion in ('1.8.7', '1.9', '1.9.1'): try: key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 'Software\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass try: key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, 'Software\\WOW6432node\\University of Illinois\\VMD\\' + vmdversion) vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0] vmdbin = join(vmddir, 'vmd.exe') except: pass else: vmdbin = which('vmd') if False: pipe = os.popen('which vmd') vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith('defaultvmddir='): vmddir = line.split('=')[1].replace('"', '') break vmdfile.close() if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin elif len(path) == 1: path = path[0] if isExecutable(path): SETTINGS['vmd'] = path SETTINGS.save() LOGGER.info("VMD path is set to '{0}'.".format(path)) else: raise OSError('{0} is not executable.'.format(str(path))) else: raise ValueError('specify a single path string')
def saveLigandResidueESSAZscores(self): 'Saves the dictionary of ESSA z-scores of the residues interacting with ligands to a pickle `.pkl` file. The keys of the dictionary are the corresponding chain ids and residue numbers of the ligands. Each value comprises the indices of the residue ESSA z-scores in the profile and the corresponding scores as separate arrays.' if self._lig: from pickle import dump dump(self._zs_lig, open('{}_ligres_gnm_zs.pkl'.format(self._title), 'wb')) else: LOGGER.warning('No ligand provided.')
def getVMDpath(): """Return VMD path set by user or one identified automatically.""" path = SETTINGS.get("vmd", None) if isExecutable(path): return path else: LOGGER.warning("VMD path is not set by user, looking for it.") from types import StringType, UnicodeType vmdbin = None vmddir = None if PLATFORM == "Windows": import _winreg for vmdversion in ("1.8.7", "1.9", "1.9.1"): try: key = _winreg.OpenKey( _winreg.HKEY_LOCAL_MACHINE, "Software\\University of Illinois\\VMD\\" + vmdversion ) vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0] vmdbin = os.path.join(vmddir, "vmd.exe") except: pass try: key = _winreg.OpenKey( _winreg.HKEY_LOCAL_MACHINE, "Software\\WOW6432node\\University of Illinois\\VMD\\" + vmdversion ) vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0] vmdbin = os.path.join(vmddir, "vmd.exe") except: pass else: vmdbin = which("vmd") if False: pipe = os.popen("which vmd") vmdbin = pipe.next().strip() vmdfile = open(vmdbin) for line in vmdfile: if line.startswith("defaultvmddir="): vmddir = line.split("=")[1].replace('"', "") break vmdfile.close() if ( False and isinstance(vmdbin, (StringType, UnicodeType)) and isinstance(vmddir, (StringType, UnicodeType)) and os.path.isfile(vmdbin) and os.path.isdir(vmddir) ): pass # return vmdbin, vmddir if isExecutable(vmdbin): setVMDpath(vmdbin) return vmdbin
def getPDBMirrorPath(): """Return the path to a local PDB mirror, or **None** if a mirror path is not set.""" path = SETTINGS.get('pdb_mirror_path') if path: if isdir(path): return path else: LOGGER.warning('PDB mirror path {0:s} is not a accessible.' .format(repr(path)))
def getPDBLocalFolder(): """Return the path to a local PDB folder and folder structure specifier. If a local folder is not set, **None** will be returned.""" folder = SETTINGS.get('pdb_local_folder') if folder: if isdir(folder): return folder, SETTINGS.get('pdb_local_divided', True) else: LOGGER.warning('PDB local folder {0:s} is not a accessible.' .format(repr(folder)))
def _min_sim(self, coords): # coords: coordset (numAtoms, 3) in Angstrom, which should be converted into nanometer try: from simtk.openmm.app import StateDataReporter from simtk.unit import kelvin, angstrom, kilojoule_per_mole, MOLAR_GAS_CONSTANT_R except ImportError: raise ImportError( 'Please install PDBFixer and OpenMM in order to use ClustENM.') simulation = self._prep_sim(coords=coords) # automatic conversion into nanometer will be carried out. # simulation.context.setPositions(coords * angstrom) try: simulation.minimizeEnergy(tolerance=self._tolerance * kilojoule_per_mole, maxIterations=self._maxIterations) if self._sim: # heating-up the system incrementally sdr = StateDataReporter(stdout, 1, step=True, temperature=True) sdr._initializeConstants(simulation) temp = 0.0 # instantaneous temperature could be obtained by openmmtools module # but its installation using conda may lead to problem due to repository freezing, # therefore, we are here evaluating it by hand. while temp < self._temp: simulation.step(1) ke = simulation.context.getState( getEnergy=True).getKineticEnergy() temp = (2 * ke / (sdr._dof * MOLAR_GAS_CONSTANT_R) ).value_in_unit(kelvin) simulation.step(self._t_steps[self._cycle]) pos = simulation.context.getState(getPositions=True).getPositions( asNumpy=True).value_in_unit( angstrom)[:self._topology.getNumAtoms()] pot = simulation.context.getState( getEnergy=True).getPotentialEnergy().value_in_unit( kilojoule_per_mole) return pot, pos except BaseException as be: LOGGER.warning( 'OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!') return np.nan, np.full_like(coords, np.nan)
def saveLigandResidueCodes(self): 'Saves chain ids and residue numbers of the residues interacting with ligands.' if self._lig: with open('{}_ligand_rescodes.txt'.format(self._title), 'w') as f: for k, v in self._ligres_code.items(): f.write(k + '\n') for x in v: f.write(x + '\n') else: LOGGER.warning('No ligand provided.')
def fetchPDBClusters(sqid=None): """Retrieve PDB sequence clusters. PDB sequence clusters are results of the weekly clustering of protein chains in the PDB generated by blastclust. They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/ This function will download about 10 Mb of data and save it after compressing in your home directory in :file:`.prody/pdbclusters`. Compressed files will be less than 4 Mb in size. Cluster data can be loaded using :func:`loadPDBClusters` function and be accessed using :func:`listPDBCluster`.""" if sqid is not None: if isListLike(sqid): for s in sqid: if s not in PDB_CLUSTERS: raise ValueError('sqid must be one or more of ' + PDB_CLUSTERS_SQID_STR) keys = list(sqid) else: if sqid not in PDB_CLUSTERS: raise ValueError('sqid must be one or more of ' + PDB_CLUSTERS_SQID_STR) keys = [sqid] else: keys = list(PDB_CLUSTERS) PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters') if not os.path.isdir(PDB_CLUSTERS_PATH): os.mkdir(PDB_CLUSTERS_PATH) LOGGER.progress('Downloading sequence clusters', len(keys), '_prody_fetchPDBClusters') count = 0 for i, x in enumerate(keys): filename = 'bc-{0}.out'.format(x) url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename) try: inp = openURL(url) except IOError: LOGGER.warning('Clusters at {0}% sequence identity level could ' 'not be downloaded.'.format(x)) continue else: out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH) out.write(inp.read()) inp.close() out.close() count += 1 LOGGER.update(i, label='_prody_fetchPDBClusters') LOGGER.finish() if len(keys) == count: LOGGER.info('All selected PDB clusters were downloaded successfully.') elif count == 0: LOGGER.warn('PDB clusters could not be downloaded.')
def getDeviations(self): """Returns deviations from reference coordinates for selected atoms. Conformations can be aligned using one of :meth:`superpose` or :meth:`iterpose` methods prior to calculating deviations.""" if not isinstance(self._confs, ndarray): LOGGER.warning('Conformations are not set.') return None if not isinstance(self._coords, ndarray): LOGGER.warning('Coordinates are not set.') return None return self._getCoordsets() - self._getCoords()
def getDeviations(self): """Return deviations from reference coordinates. Note that you might need to align the conformations using :meth:`superpose` or :meth:`iterpose` before calculating deviations.""" if not isinstance(self._confs, np.ndarray): LOGGER.warning('Conformations are not set.') return None if not isinstance(self._coords, np.ndarray): LOGGER.warning('Coordinates are not set.') return None return self._getCoordsets() - self._coords
def buildDirectInfoMatrix(msa, seqid=.8, pseudo_weight=.5, refine=False, **kwargs): """Returns direct information matrix calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Sequences sharing sequence identity of *seqid* or more with another sequence are regarded as similar sequences for calculating their weights using :func:`.calcMeff`. *pseudo_weight* are the weight for pseudo count probability. Sequences are not refined by default. When *refine* is set **True**, the MSA will be refined by the first sequence and the shape of direct information matrix will be smaller. """ msa = getMSA(msa) from .msatools import msadipretest, msadirectinfo1, msadirectinfo2 from numpy import matrix LOGGER.timeit('_di') if msa.shape[0] < 250: LOGGER.warning( 'DI performs the best with higher number of sequences, and ' 'minimal number of sequences is recommended as 250.') refine = 1 if refine else 0 # msadipretest get some parameter from msa to set matrix size length, q = msadipretest(msa, refine=refine) c = matrix.dot(matrix(zeros((length * q, 1), float)), matrix(zeros((1, length * q), float))) prob = zeros((length, q + 1), float) # msadirectinfo1 return c to be inversed and prob to be used meff, n, length, c, prob = msadirectinfo1(msa, c, prob, theta=1. - seqid, pseudocount_weight=pseudo_weight, refine=refine, q=q + 1) c = c.I di = zeros((length, length), float) # get final DI di = msadirectinfo2(n, length, c, prob, di, q + 1) del prob, c LOGGER.report('DI matrix was calculated in %.2fs.', '_di') return di
def calcTempFactors(modes, atoms): """Returns temperature (β) factors calculated using *modes* from a :class:`.ANM` or :class:`.GNM` instance scaled according to the experimental B-factors from *atoms*.""" model = modes.getModel() if not isinstance(model, GNMBase): raise TypeError('modes must come from GNM or ANM') if model.numAtoms() != atoms.numAtoms(): raise ValueError('modes and atoms must have same number of nodes') sqf = calcSqFlucts(modes) expBetas = atoms.getBetas() # add warning message if experimental B-factors are zeros or meaningless (e.g., having same values)? if expBetas.max() < 0.5 or expBetas.std() < 0.5: LOGGER.warning('Experimental B-factors are quite small or meaningless. The calculated B-factors may be incorrect.') return sqf * (expBetas.sum() / sqf.sum())
def getWWPDBFTPServer(): """Return a tuple containing name, host, and path of the currently set `wwPDB <http://www.wwpdb.org/>`_ FTP server.""" server = SETTINGS.get('wwpdb_ftp', None) if server is None: LOGGER.warning('A wwPDB FTP server is not set, default FTP server ' 'RCSB PDB is used. Use `setWWPDBFTPServer` function ' 'to set a server close to your location.') return _WWPDB_RCSB else: if server[2].endswith('data/structures/divided/pdb/'): return (server[0], server[1], server[2][:-len('data/structures/divided/pdb/')]) else: return server
def showContactMap(enm, *args, **kwargs): """Show Kirchhoff matrix using :func:`~matplotlib.pyplot.spy`.""" import matplotlib.pyplot as plt if not isinstance(enm, GNMBase): raise TypeError('model argument must be an ENM instance') kirchhoff = enm.getKirchhoff() if kirchhoff is None: LOGGER.warning('kirchhoff matrix is not set') return None show = plt.spy(kirchhoff, *args, **kwargs) plt.title('{0} contact map'.format(enm.getTitle())) plt.xlabel('Residue index') plt.ylabel('Residue index') if SETTINGS['auto_show']: showFigure() return show
def alignByEnsemble(atomics, ensemble): """Align a set of :class:`.Atomic` objects using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Transformations will be applied based on indices so *atomics* and *ensemble* must have the same number of members. :arg atomics: a set of :class:`.Atomic` objects to be aligned :type atomics: tuple, list, :class:`~numpy.ndarray` :arg ensemble: a :class:`.PDBEnsemble` or a :class:`.PDBConformation` from which transformations can be extracted :type ensemble: :class:`.PDBEnsemble`, :class:`.PDBConformation` """ if not isListLike(atomics): raise TypeError('atomics must be list-like') if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if len(atomics) != len(ensemble): raise ValueError('atomics and ensemble must have the same length') output = [] for i, conf in enumerate(ensemble): trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') ag = atomics[i] if not isinstance(ag, Atomic): LOGGER.warning( 'No atomic object found for conformation {0}.'.format(i)) output.append(None) continue output.append(trans.apply(ag)) if len(output) == 1: return output[0] else: return output
def fetchPDBClusters(sqid=None): """Retrieve PDB sequence clusters. PDB sequence clusters are results of the weekly clustering of protein chains in the PDB generated by blastclust. They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/ This function will download about 10 Mb of data and save it after compressing in your home directory in :file:`.prody/pdbclusters`. Compressed files will be less than 4 Mb in size. Cluster data can be loaded using :func:`loadPDBClusters` function and be accessed using :func:`listPDBCluster`.""" if sqid is not None: if sqid not in PDB_CLUSTERS: raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR) keys = [sqid] else: keys = list(PDB_CLUSTERS) PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters') if not os.path.isdir(PDB_CLUSTERS_PATH): os.mkdir(PDB_CLUSTERS_PATH) LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS), '_prody_fetchPDBClusters') count = 0 for i, x in enumerate(keys): filename = 'bc-{0}.out'.format(x) url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename) try: inp = openURL(url) except IOError: LOGGER.warning('Clusters at {0}% sequence identity level could ' 'not be downloaded.') continue else: out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) out.write(inp.read()) inp.close() out.close() count += 1 LOGGER.update(i, '_prody_fetchPDBClusters') LOGGER.clear() if len(PDB_CLUSTERS) == count: LOGGER.info('All PDB clusters were downloaded successfully.') elif count == 0: LOGGER.warn('PDB clusters could not be downloaded.')
def buildDirectInfoMatrix(msa, seqid=0.8, pseudo_weight=0.5, refine=False, **kwargs): """Returns direct information matrix calculated for *msa*, which may be an :class:`.MSA` instance or a 2D Numpy character array. Sequences sharing sequence identity of *seqid* or more with another sequence are regarded as similar sequences for calculating their weights using :func:`.calcMeff`. *pseudo_weight* are the weight for pseudo count probability. Sequences are not refined by default. When *refine* is set **True**, the MSA will be refined by the first sequence and the shape of direct information matrix will be smaller. """ msa = getMSA(msa) from .msatools import msadipretest, msadirectinfo1, msadirectinfo2 from numpy import matrix LOGGER.timeit("_di") if msa.shape[0] < 250: LOGGER.warning( "DI performs the best with higher number of sequences, and " "minimal number of sequences is recommended as 250." ) refine = 1 if refine else 0 # msadipretest get some parameter from msa to set matrix size length, q = msadipretest(msa, refine=refine) c = matrix.dot(matrix(zeros((length * q, 1), float)), matrix(zeros((1, length * q), float))) prob = zeros((length, q + 1), float) # msadirectinfo1 return c to be inversed and prob to be used meff, n, length, c, prob = msadirectinfo1( msa, c, prob, theta=1.0 - seqid, pseudocount_weight=pseudo_weight, refine=refine, q=q + 1 ) c = c.I di = zeros((length, length), float) # get final DI di = msadirectinfo2(n, length, c, prob, di, q + 1) del prob, c LOGGER.report("DI matrix was calculated in %.2fs.", "_di") return di
def _iterDonors(self): """Yield pairs of indices for donored atoms that are within the pointer. Use :meth:`setDonors` for setting donors.""" if self._ag._donors is None: LOGGER.warning('donors are not set, use `AtomGroup.setDonors`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 2 >= len(self): for a, b in self._ag._iterDonors(): if a in iset and b in iset: yield a, b else: for a, dmap in zip(indices, self._ag._domap[indices]): for b in dmap: if b > -1 and b in iset: yield a, b iset.remove(a)
def _iterBonds(self): """Yield pairs of indices for bonded atoms that are within the pointer. Use :meth:`setBonds` for setting bonds.""" if self._ag._bonds is None: LOGGER.warning('bonds are not set, use `setBonds` or `inferBonds`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 2 >= len(self): for a, b in self._ag._iterBonds(): if a in iset and b in iset: yield a, b else: if any(self._ag._bmap): for a, bmap in zip(indices, self._ag._bmap[indices]): for b in bmap: if b > -1 and b in iset: yield a, b iset.remove(a)
def _iterAngles(self): """Yield triplets of indices for angled atoms that are within the pointer. Use :meth:`setAngles` for setting angles.""" if self._ag._angles is None: LOGGER.warning('angles are not set, use `AtomGroup.setAngles`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 3 >= len(self): for a, b, c in self._ag._iterAngles(): if a in iset and b in iset and c in iset: yield a, b, c else: if any(self._ag._angmap): for a, amap in zip(indices, self._ag._angmap[indices]): for b, c in amap: if b > -1 and b in iset and c > -1 and c in iset: yield a, b, c iset.remove(a)
def _iterNBExclusions(self): """Yield pairs of indices for nbexclusioned atoms that are within the pointer. Use :meth:`setNBExclusions` for setting nbexclusions.""" if self._ag._nbexclusions is None: LOGGER.warning( 'nbexclusions are not set, use `AtomGroup.setNBExclusions`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 2 >= len(self): for a, b in self._ag._iterNBExclusions(): if a in iset and b in iset: yield a, b else: for a, nbemap in zip(indices, self._ag._nbemap[indices]): for b in nbemap: if b > -1 and b in iset: yield a, b iset.remove(a)
def runTests(*mods, **kwargs): if mods: modules = [] for mod in mods: try: modules.append(MODULES[mod]) except KeyError: raise ValueError(mod + ' is not a valid test module name') else: modules = MODULES.values() # PY3K: OK try: import nose except ImportError: LOGGER.warning('Failed to import nose, using unittest for testing.') LOGGER.info('nose is available at http://readthedocs.org/docs/nose/') from sys import stderr verbosity = kwargs.get('verbose', 2) descriptions = kwargs.get('descriptions', True) stream = kwargs.get('stream', stderr) testrunner = unittest.TextTestRunner(stream, descriptions, verbosity) for module in modules: testrunner.run(unittest.defaultTestLoader. loadTestsFromName(module)) else: from numpy.testing import Tester verbose = kwargs.get('verbose', 1) label = kwargs.get('label', 'fast') if mods: for module in modules: Tester(module).test(label=label, verbose=verbose) else: Tester('prody.tests').test(label=label, verbose=verbose)
def runTests(*mods, **kwargs): if mods: modules = [] for mod in mods: try: modules.append(MODULES[mod]) except KeyError: raise ValueError(mod + ' is not a valid test module name') else: modules = MODULES.values() # PY3K: OK try: import nose except ImportError: LOGGER.warning('Failed to import nose, using unittest for testing.') LOGGER.info('nose is available at http://readthedocs.org/docs/nose/') from sys import stderr verbosity = kwargs.get('verbose', 2) descriptions = kwargs.get('descriptions', True) stream = kwargs.get('stream', stderr) testrunner = unittest.TextTestRunner(stream, descriptions, verbosity) for module in modules: testrunner.run( unittest.defaultTestLoader.loadTestsFromName(module)) else: from numpy.testing import Tester verbose = kwargs.get('verbose', 1) label = kwargs.get('label', 'fast') if mods: for module in modules: Tester(module).test(label=label, verbose=verbose) else: Tester('prody.tests').test(label=label, verbose=verbose)
def _iterDihedrals(self): """Yield quadruples of indices for dihedraled atoms that are within the pointer. Use :meth:`setDihedrals` for setting dihedrals.""" if self._ag._dihedrals is None: LOGGER.warning( 'dihedrals are not set, use `AtomGroup.setDihedrals`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 4 >= len(self): for a, b, c, d in self._ag._iterDihedrals(): if a in iset and b in iset and c in iset and d in iset: yield a, b, c, d else: if any(self._ag._dmap): for a, dmap in zip(indices, self._ag._dmap[indices]): for b, c, d in dmap: if b > -1 and b in iset and c > -1 and c in iset \ and d > -1 and d in iset: yield a, b, c, d iset.remove(a)
def _iterCrossterms(self): """Yield quadruplet of indices for crosstermed atoms that are within the pointer. Use :meth:`setCrossterms` for setting crossterms.""" if self._ag._crossterms is None: LOGGER.warning( 'crossterms are not set, use `AtomGroup.setCrossterms`') indices = self._getIndices() iset = set(indices) if len(self._ag) / 4 >= len(self): for a, b, c, d in self._ag._iterCrossterms(): if a in iset and b in iset and c in iset and d in iset: yield a, b, c, d else: if any(self._ag._cmap): for a, cmap in zip(indices, self._ag._cmap[indices]): for b, c, d in cmap: if b > -1 and b in iset and c > -1 and c in iset \ and d > -1 and d in iset: yield a, b, c, d iset.remove(a)
def getCoordsets(self, indices=None): if self._closed: raise ValueError('I/O operation on closed file') if indices is None: indices = np.arange(self._n_csets) elif isinstance(indices, int): indices = np.array([indices]) elif isinstance(indices, slice): indices = np.arange(*indices.indices(self._n_csets)) indices.sort() elif isinstance(indices, (list, np.ndarray)): indices = np.unique(indices) else: raise TypeError('indices must be an integer or a list of integers') nfi = self._nfi self.reset() n_atoms = self.numSelected() coords = np.zeros((len(indices), n_atoms, 3), self._dtype) prev = 0 next = self.nextCoordset for i, index in enumerate(indices): diff = index - prev if diff > 1: self.skip(diff-1) xyz = next() if xyz is None: LOGGER.warning('Expected {0} frames, but parsed {1}.' .format(len(indices), i)) self.goto(nfi) return coords[:i] coords[i] = xyz prev = index self.goto(nfi) return coords
def setWWPDBFTPServer(key): """Set the `wwPDB <http://www.wwpdb.org/>`_ FTP server used for downloading PDB structures when needed. Use one of the following keywords for setting a different server. +---------------------------+-----------------------------+ | wwPDB FTP server | *Key* (case insensitive) | +===========================+=============================+ | RCSB PDB (USA) (default) | RCSB, USA, US | +---------------------------+-----------------------------+ | PDBe (Europe) | PDBe, Europe, Euro, EU | +---------------------------+-----------------------------+ | PDBj (Japan) | PDBj, Japan, Jp | +---------------------------+-----------------------------+ """ server = WWPDB_FTP_SERVERS.get(key.lower()) if server is not None: SETTINGS['wwpdb_ftp'] = server SETTINGS.save() else: LOGGER.warning('{0:s} is not a valid key.'.format(key))
def __add__(self, other): """Returns an :class:`~.AtomMap` instance. Order of pointed atoms are preserved.""" if not isinstance(other, AtomPointer): raise TypeError('unsupported operand type(s) for +: {0:s} and ' '{1:s}'.format(repr(type(self).__name__), repr(type(other).__name__))) ag = self._ag if ag != other._ag: raise ValueError('AtomPointer instances must point to the same ' 'AtomGroup instance') acsi = self.getACSIndex() if acsi != other.getACSIndex(): LOGGER.warning('Active coordset indices of atoms are not the same.' ' Result will have ACSI {0:d}.'.format(acsi)) title = '({0:s}) + ({1:s})'.format(str(self), str(other)) indices = np.concatenate([self._getIndices(), other._getIndices()]) length = len(self) if isinstance(self, AtomMap): mapping = [self._getMapping()] unmapped = [self._dummies] else: mapping = [np.arange(length)] unmapped = [np.array([])] if isinstance(other, AtomMap): mapping.append(other._getMapping() + length) unmapped.append(other._dummies + length) else: mapping.append(np.arange(length, length + len(other))) unmapped.append(np.array([])) return AtomMap(ag, indices, np.concatenate(mapping), np.concatenate(unmapped), title, acsi)
def _eigh(M, eigvals=None, turbo=True): if linalg.__package__.startswith('scipy'): from scipy.sparse import issparse if eigvals: turbo = False if not issparse(M): values, vectors = linalg.eigh(M, turbo=turbo, eigvals=eigvals) else: try: from scipy.sparse import linalg as scipy_sparse_la except ImportError: raise ImportError('failed to import scipy.sparse.linalg, ' 'which is required for sparse matrix ' 'decomposition') if eigvals: j = eigvals[0] k = eigvals[-1] + 1 else: j = 0 k = dof if k >= dof: k -= 1 LOGGER.warning('Cannot calculate all eigenvalues for sparse matrices, thus ' 'the last eigenvalue is omitted. See scipy.sparse.linalg.eigsh ' 'for more information') values, vectors = scipy_sparse_la.eigsh(M, k=k, which='SA') values = values[j:k] vectors = vectors[:, j:k] else: if n_modes is not None: LOGGER.info('Scipy is not found, all modes were calculated.') else: n_modes = dof values, vectors = linalg.eigh(M) return values, vectors
def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False): """Align PDB files using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to determine the PDB structure and model number. First four characters of the label is expected to be the PDB identifier and ending numbers to be the model number. For example, the :class:`.Transformation` from conformation with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th model of structure **2k39**. After applicable transformations are made, structure will be written into *outputdir* as :file:`2k39_aligned.pdb`. If *gzip* is **True**, output files will be compressed. Return value is the output filename or list of filenames, in the order files are processed. Note that if multiple models from a file are aligned, that filename will appear in the list multiple times.""" if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if gzip: gzip = '.gz' else: gzip = '' output = [] pdbdict = {} for conf in ensemble: trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') label = conf.getLabel() pdb = label[:4] filename = pdbdict.get(pdb, fetchPDB(pdb)) if filename is None: LOGGER.warning('PDB file for conformation {0} is not found.' .format(label)) output.append(None) continue LOGGER.info('Parsing PDB file {0} for conformation {1}.' .format(pdb, label)) acsi = None model = label.rfind('m') if model > 3: model = label[model+1:] if model.isdigit(): acsi = int(model) - 1 LOGGER.info('Applying transformation to model {0}.' .format(model)) if isinstance(filename, str): ag = parsePDB(filename) else: ag = filename if acsi is not None: if acsi >= ag.numCoordsets(): LOGGER.warn('Model number {0} for {1} is out of range.' .format(model, pdb)) output.append(None) continue ag.setACSIndex(acsi) trans.apply(ag) outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip) if ag.numCoordsets() > 1: pdbdict[pdb] = ag else: writePDB(outfn, ag) output.append(os.path.normpath(outfn)) for pdb, ag in pdbdict.items(): # PY3K: OK writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag) if len(output) == 1: return output[0] else: return output
def calcModes(self, n_modes=20, zeros=False, turbo=True, hinges=True): """Calculate normal modes. This method uses :func:`scipy.linalg.eigh` function to diagonalize the Kirchhoff matrix. When Scipy is not found, :func:`numpy.linalg.eigh` is used. :arg n_modes: number of non-zero eigenvalues/vectors to calculate. If ``None`` is given, all modes will be calculated. :type n_modes: int or None, default is 20 :arg zeros: If ``True``, modes with zero eigenvalues will be kept. :type zeros: bool, default is ``False`` :arg turbo: Use a memory intensive, but faster way to calculate modes. :type turbo: bool, default is ``True`` :arg hinges: Identify hinge sites after modes are computed. :type hinges: bool, default is ``True`` """ if self._kirchhoff is None: raise ValueError('Kirchhoff matrix is not built or set') assert n_modes is None or isinstance(n_modes, int) and n_modes > 0, \ 'n_modes must be a positive integer' assert isinstance(zeros, bool), 'zeros must be a boolean' assert isinstance(turbo, bool), 'turbo must be a boolean' linalg = importLA() start = time.time() shift = 0 if linalg.__package__.startswith('scipy'): if n_modes is None: eigvals = None n_modes = self._dof else: if n_modes >= self._dof: eigvals = None n_modes = self._dof else: eigvals = (0, n_modes + shift) if eigvals: turbo = False if isinstance(self._kirchhoff, np.ndarray): values, vectors = linalg.eigh(self._kirchhoff, turbo=turbo, eigvals=eigvals) else: try: from scipy.sparse import linalg as scipy_sparse_la except ImportError: raise ImportError('failed to import scipy.sparse.linalg, ' 'which is required for sparse matrix ' 'decomposition') try: values, vectors = ( scipy_sparse_la.eigsh(self._kirchhoff, k=n_modes + 1, which='SA')) except: values, vectors = ( scipy_sparse_la.eigen_symmetric(self._kirchhoff, k=n_modes + 1, which='SA')) else: if n_modes is not None: LOGGER.info('Scipy is not found, all modes are calculated.') values, vectors = linalg.eigh(self._kirchhoff) n_zeros = sum(values < ZERO) if n_zeros < 1: LOGGER.warning('Less than 1 zero eigenvalues are calculated.') shift = n_zeros - 1 elif n_zeros > 1: LOGGER.warning('More than 1 zero eigenvalues are calculated.') shift = n_zeros - 1 if zeros: shift = -1 self._eigvals = values[1+shift:] self._vars = 1 / self._eigvals self._trace = self._vars.sum() self._array = vectors[:, 1+shift:] self._n_modes = len(self._eigvals) if hinges: self.calcHinges() LOGGER.debug('{0} modes were calculated in {1:.2f}s.' .format(self._n_modes, time.time()-start))
def calcModes(self, n_modes=20, zeros=False, turbo=True): """Calculate normal modes. This method uses :func:`scipy.linalg.eigh` function to diagonalize the Hessian matrix. When Scipy is not found, :func:`numpy.linalg.eigh` is used. :arg n_modes: number of non-zero eigenvalues/vectors to calculate. If **None** or ``'all'`` is given, all modes will be calculated. :type n_modes: int or None, default is 20 :arg zeros: If **True**, modes with zero eigenvalues will be kept. :type zeros: bool, default is **True** :arg turbo: Use a memory intensive, but faster way to calculate modes. :type turbo: bool, default is **True** """ if self._hessian is None: raise ValueError('Hessian matrix is not built or set') if str(n_modes).lower() == 'all': n_modes = None assert n_modes is None or isinstance(n_modes, int) and n_modes > 0, \ 'n_modes must be a positive integer' assert isinstance(zeros, bool), 'zeros must be a boolean' assert isinstance(turbo, bool), 'turbo must be a boolean' self._clear() linalg = importLA() LOGGER.timeit('_anm_calc_modes') shift = 5 if linalg.__package__.startswith('scipy'): if n_modes is None: eigvals = None n_modes = self._dof else: if n_modes >= self._dof: eigvals = None n_modes = self._dof else: eigvals = (0, n_modes + shift) if eigvals: turbo = False if isinstance(self._hessian, np.ndarray): values, vectors = linalg.eigh(self._hessian, turbo=turbo, eigvals=eigvals) else: try: from scipy.sparse import linalg as scipy_sparse_la except ImportError: raise ImportError('failed to import scipy.sparse.linalg, ' 'which is required for sparse matrix ' 'decomposition') try: values, vectors = ( scipy_sparse_la.eigsh(self._hessian, k=n_modes+6, which='SA')) except: values, vectors = ( scipy_sparse_la.eigen_symmetric(self._hessian, k=n_modes+6, which='SA')) else: if n_modes is not None: LOGGER.info('Scipy is not found, all modes are calculated.') values, vectors = np.linalg.eigh(self._hessian) n_zeros = sum(values < ZERO) if n_zeros < 6: LOGGER.warning('Less than 6 zero eigenvalues are calculated.') shift = n_zeros - 1 elif n_zeros > 6: LOGGER.warning('More than 6 zero eigenvalues are calculated.') shift = n_zeros - 1 if zeros: shift = -1 if n_zeros > n_modes: self._eigvals = values[1+shift:] else: self._eigvals = values[1+shift:] self._vars = 1 / self._eigvals self._trace = self._vars.sum() if shift: self._array = vectors[:, 1+shift:].copy() else: self._array = vectors self._n_modes = len(self._eigvals) LOGGER.report('{0} modes were calculated in %.2fs.' .format(self._n_modes), label='_anm_calc_modes')